sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2016 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <sys/types.h>
  27 #include <math.h>
  28 #include <time.h>
  29 #include <limits.h>
  30
  31 #include "simulator.h"
  32 #include "cpustate.h"
  33 #include "memory.h"
  34
  35 #define NO_SP 0
  36 #define SP_OK 1
  37
  38 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  39 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  41
  42 /* Space saver macro.  */
  43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  44
  45 #define HALT_UNALLOC                                                    \
  46   do                                                                    \
  47     {                                                                   \
  48       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  49       TRACE_INSN (cpu,                                                  \
  50                   "Unallocated instruction detected at sim line %d,"    \
  51                   " exe addr %" PRIx64,                                 \
  52                   __LINE__, aarch64_get_PC (cpu));                      \
  53       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  54                        sim_stopped, SIM_SIGILL);                        \
  55     }                                                                   \
  56   while (0)
  57
  58 #define HALT_NYI                                                        \
  59   do                                                                    \
  60     {                                                                   \
  61       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  62       TRACE_INSN (cpu,                                                  \
  63                   "Unimplemented instruction detected at sim line %d,"  \
  64                   " exe addr %" PRIx64,                                 \
  65                   __LINE__, aarch64_get_PC (cpu));                      \
  66       if (! TRACE_ANY_P (cpu))                                          \
  67         {                                                               \
  68           sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
  69           trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu));    \
  70         }                                                               \
  71       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  72                        sim_stopped, SIM_SIGABRT);                       \
  73     }                                                                   \
  74   while (0)
  75
  76 #define NYI_assert(HI, LO, EXPECTED)                                    \
  77   do                                                                    \
  78     {                                                                   \
  79       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  80         HALT_NYI;                                                       \
  81     }                                                                   \
  82   while (0)
  83
  84 /* Helper functions used by expandLogicalImmediate.  */
  85
  86 /* for i = 1, ... N result<i-1> = 1 other bits are zero  */
  87 static inline uint64_t
  88 ones (int N)
  89 {
  90   return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
  91 }
  92
  93 /* result<0> to val<N>  */
  94 static inline uint64_t
  95 pickbit (uint64_t val, int N)
  96 {
  97   return pickbits64 (val, N, N);
  98 }
  99
 100 static uint64_t
 101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
 102 {
 103   uint64_t mask;
 104   uint64_t imm;
 105   unsigned simd_size;
 106
 107   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
 108      (in other words, right rotated by R), then replicated. */
 109   if (N != 0)
 110     {
 111       simd_size = 64;
 112       mask = 0xffffffffffffffffull;
 113     }
 114   else
 115     {
 116       switch (S)
 117         {
 118         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 119         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 120         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 121         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 122         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 123         default: return 0;
 124         }
 125       mask = (1ull << simd_size) - 1;
 126       /* Top bits are IGNORED.  */
 127       R &= simd_size - 1;
 128     }
 129
 130   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 131   if (S == simd_size - 1)
 132     return 0;
 133
 134   /* S+1 consecutive bits to 1.  */
 135   /* NOTE: S can't be 63 due to detection above.  */
 136   imm = (1ull << (S + 1)) - 1;
 137
 138   /* Rotate to the left by simd_size - R.  */
 139   if (R != 0)
 140     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 141
 142   /* Replicate the value according to SIMD size.  */
 143   switch (simd_size)
 144     {
 145     case  2: imm = (imm <<  2) | imm;
 146     case  4: imm = (imm <<  4) | imm;
 147     case  8: imm = (imm <<  8) | imm;
 148     case 16: imm = (imm << 16) | imm;
 149     case 32: imm = (imm << 32) | imm;
 150     case 64: break;
 151     default: return 0;
 152     }
 153
 154   return imm;
 155 }
 156
 157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 158    for each possible combination i.e. 13 bits worth of int entries.  */
 159 #define  LI_TABLE_SIZE  (1 << 13)
 160 static uint64_t LITable[LI_TABLE_SIZE];
 161
 162 void
 163 aarch64_init_LIT_table (void)
 164 {
 165   unsigned index;
 166
 167   for (index = 0; index < LI_TABLE_SIZE; index++)
 168     {
 169       uint32_t N    = uimm (index, 12, 12);
 170       uint32_t immr = uimm (index, 11, 6);
 171       uint32_t imms = uimm (index, 5, 0);
 172
 173       LITable [index] = expand_logical_immediate (imms, immr, N);
 174     }
 175 }
 176
 177 static void
 178 dexNotify (sim_cpu *cpu)
 179 {
 180   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 181                            2 ==> exit Java, 3 ==> start next bytecode.  */
 182   uint32_t type = INSTR (14, 0);
 183
 184   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 185
 186   switch (type)
 187     {
 188     case 0:
 189       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 190          aarch64_get_reg_u64 (cpu, R22, 0));  */
 191       break;
 192     case 1:
 193       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 194          aarch64_get_reg_u64 (cpu, R22, 0));  */
 195       break;
 196     case 2:
 197       /* aarch64_notifyMethodExit ();  */
 198       break;
 199     case 3:
 200       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 201          aarch64_get_reg_u64 (cpu, R22, 0));  */
 202       break;
 203     }
 204 }
 205
 206 /* secondary decode within top level groups  */
 207
 208 static void
 209 dexPseudo (sim_cpu *cpu)
 210 {
 211   /* assert instr[28,27] = 00
 212
 213      We provide 2 pseudo instructions:
 214
 215      HALT stops execution of the simulator causing an immediate
 216      return to the x86 code which entered it.
 217
 218      CALLOUT initiates recursive entry into x86 code.  A register
 219      argument holds the address of the x86 routine.  Immediate
 220      values in the instruction identify the number of general
 221      purpose and floating point register arguments to be passed
 222      and the type of any value to be returned.  */
 223
 224   uint32_t PSEUDO_HALT      =  0xE0000000U;
 225   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 226   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 227   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 228   uint32_t dispatch;
 229
 230   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 231     {
 232       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 233       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 234                        sim_stopped, SIM_SIGTRAP);
 235     }
 236
 237   dispatch = INSTR (31, 15);
 238
 239   /* We do not handle callouts at the moment.  */
 240   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 241     {
 242       TRACE_EVENTS (cpu, " Callout");
 243       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 244                        sim_stopped, SIM_SIGABRT);
 245     }
 246
 247   else if (dispatch == PSEUDO_NOTIFY)
 248     dexNotify (cpu);
 249
 250   else
 251     HALT_UNALLOC;
 252 }
 253
 254 /* Load-store single register (unscaled offset)
 255    These instructions employ a base register plus an unscaled signed
 256    9 bit offset.
 257
 258    N.B. the base register (source) can be Xn or SP. all other
 259    registers may not be SP.  */
 260
 261 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 262 static void
 263 ldur32 (sim_cpu *cpu, int32_t offset)
 264 {
 265   unsigned rn = INSTR (9, 5);
 266   unsigned rt = INSTR (4, 0);
 267
 268   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 269                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 270                         + offset));
 271 }
 272
 273 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 274 static void
 275 ldur64 (sim_cpu *cpu, int32_t offset)
 276 {
 277   unsigned rn = INSTR (9, 5);
 278   unsigned rt = INSTR (4, 0);
 279
 280   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 281                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 282                         + offset));
 283 }
 284
 285 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 286 static void
 287 ldurb32 (sim_cpu *cpu, int32_t offset)
 288 {
 289   unsigned rn = INSTR (9, 5);
 290   unsigned rt = INSTR (4, 0);
 291
 292   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 293                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 294                         + offset));
 295 }
 296
 297 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 298 static void
 299 ldursb32 (sim_cpu *cpu, int32_t offset)
 300 {
 301   unsigned rn = INSTR (9, 5);
 302   unsigned rt = INSTR (4, 0);
 303
 304   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 305                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 306                         + offset));
 307 }
 308
 309 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 310 static void
 311 ldursb64 (sim_cpu *cpu, int32_t offset)
 312 {
 313   unsigned rn = INSTR (9, 5);
 314   unsigned rt = INSTR (4, 0);
 315
 316   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 317                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 318                         + offset));
 319 }
 320
 321 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 322 static void
 323 ldurh32 (sim_cpu *cpu, int32_t offset)
 324 {
 325   unsigned rn = INSTR (9, 5);
 326   unsigned rd = INSTR (4, 0);
 327
 328   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 329                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 330                         + offset));
 331 }
 332
 333 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 334 static void
 335 ldursh32 (sim_cpu *cpu, int32_t offset)
 336 {
 337   unsigned rn = INSTR (9, 5);
 338   unsigned rd = INSTR (4, 0);
 339
 340   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 341                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 342                         + offset));
 343 }
 344
 345 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 346 static void
 347 ldursh64 (sim_cpu *cpu, int32_t offset)
 348 {
 349   unsigned rn = INSTR (9, 5);
 350   unsigned rt = INSTR (4, 0);
 351
 352   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 353                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 354                         + offset));
 355 }
 356
 357 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 358 static void
 359 ldursw (sim_cpu *cpu, int32_t offset)
 360 {
 361   unsigned rn = INSTR (9, 5);
 362   unsigned rd = INSTR (4, 0);
 363
 364   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 365                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 366                         + offset));
 367 }
 368
 369 /* N.B. with stores the value in source is written to the address
 370    identified by source2 modified by offset.  */
 371
 372 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 373 static void
 374 stur32 (sim_cpu *cpu, int32_t offset)
 375 {
 376   unsigned rn = INSTR (9, 5);
 377   unsigned rd = INSTR (4, 0);
 378
 379   aarch64_set_mem_u32 (cpu,
 380                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 381                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 382 }
 383
 384 /* 64 bit store 64 bit unscaled signed 9 bit  */
 385 static void
 386 stur64 (sim_cpu *cpu, int32_t offset)
 387 {
 388   unsigned rn = INSTR (9, 5);
 389   unsigned rd = INSTR (4, 0);
 390
 391   aarch64_set_mem_u64 (cpu,
 392                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 393                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 394 }
 395
 396 /* 32 bit store byte unscaled signed 9 bit  */
 397 static void
 398 sturb (sim_cpu *cpu, int32_t offset)
 399 {
 400   unsigned rn = INSTR (9, 5);
 401   unsigned rd = INSTR (4, 0);
 402
 403   aarch64_set_mem_u8 (cpu,
 404                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 405                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 406 }
 407
 408 /* 32 bit store short unscaled signed 9 bit  */
 409 static void
 410 sturh (sim_cpu *cpu, int32_t offset)
 411 {
 412   unsigned rn = INSTR (9, 5);
 413   unsigned rd = INSTR (4, 0);
 414
 415   aarch64_set_mem_u16 (cpu,
 416                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 417                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 418 }
 419
 420 /* Load single register pc-relative label
 421    Offset is a signed 19 bit immediate count in words
 422    rt may not be SP.  */
 423
 424 /* 32 bit pc-relative load  */
 425 static void
 426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 427 {
 428   unsigned rd = INSTR (4, 0);
 429
 430   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 431                        aarch64_get_mem_u32
 432                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 433 }
 434
 435 /* 64 bit pc-relative load  */
 436 static void
 437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 438 {
 439   unsigned rd = INSTR (4, 0);
 440
 441   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 442                        aarch64_get_mem_u64
 443                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 444 }
 445
 446 /* sign extended 32 bit pc-relative load  */
 447 static void
 448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 449 {
 450   unsigned rd = INSTR (4, 0);
 451
 452   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 453                        aarch64_get_mem_s32
 454                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 455 }
 456
 457 /* float pc-relative load  */
 458 static void
 459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 460 {
 461   unsigned int rd = INSTR (4, 0);
 462
 463   aarch64_set_vec_u32 (cpu, rd, 0,
 464                        aarch64_get_mem_u32
 465                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 466 }
 467
 468 /* double pc-relative load  */
 469 static void
 470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 471 {
 472   unsigned int st = INSTR (4, 0);
 473
 474   aarch64_set_vec_u64 (cpu, st, 0,
 475                        aarch64_get_mem_u64
 476                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 477 }
 478
 479 /* long double pc-relative load.  */
 480 static void
 481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 482 {
 483   unsigned int st = INSTR (4, 0);
 484   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 485   FRegister a;
 486
 487   aarch64_get_mem_long_double (cpu, addr, & a);
 488   aarch64_set_FP_long_double (cpu, st, a);
 489 }
 490
 491 /* This can be used to scale an offset by applying
 492    the requisite shift. the second argument is either
 493    16, 32 or 64.  */
 494
 495 #define SCALE(_offset, _elementSize) \
 496     ((_offset) << ScaleShift ## _elementSize)
 497
 498 /* This can be used to optionally scale a register derived offset
 499    by applying the requisite shift as indicated by the Scaling
 500    argument.  The second argument is either Byte, Short, Word
 501    or Long. The third argument is either Scaled or Unscaled.
 502    N.B. when _Scaling is Scaled the shift gets ANDed with
 503    all 1s while when it is Unscaled it gets ANDed with 0.  */
 504
 505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 506   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 507
 508 /* This can be used to zero or sign extend a 32 bit register derived
 509    value to a 64 bit value.  the first argument must be the value as
 510    a uint32_t and the second must be either UXTW or SXTW. The result
 511    is returned as an int64_t.  */
 512
 513 static inline int64_t
 514 extend (uint32_t value, Extension extension)
 515 {
 516   union
 517   {
 518     uint32_t u;
 519     int32_t   n;
 520   } x;
 521
 522   /* A branchless variant of this ought to be possible.  */
 523   if (extension == UXTW || extension == NoExtension)
 524     return value;
 525
 526   x.u = value;
 527   return x.n;
 528 }
 529
 530 /* Scalar Floating Point
 531
 532    FP load/store single register (4 addressing modes)
 533
 534    N.B. the base register (source) can be the stack pointer.
 535    The secondary source register (source2) can only be an Xn register.  */
 536
 537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 538 static void
 539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 540 {
 541   unsigned rn = INSTR (9, 5);
 542   unsigned st = INSTR (4, 0);
 543   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 544
 545   if (wb != Post)
 546     address += offset;
 547
 548   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 549   if (wb == Post)
 550     address += offset;
 551
 552   if (wb != NoWriteBack)
 553     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 554 }
 555
 556 /* Load 8 bit with unsigned 12 bit offset.  */
 557 static void
 558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 559 {
 560   unsigned rd = INSTR (4, 0);
 561   unsigned rn = INSTR (9, 5);
 562   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 563
 564   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 565 }
 566
 567 /* Load 16 bit scaled unsigned 12 bit.  */
 568 static void
 569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 570 {
 571   unsigned rd = INSTR (4, 0);
 572   unsigned rn = INSTR (9, 5);
 573   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 574
 575   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 576 }
 577
 578 /* Load 32 bit scaled unsigned 12 bit.  */
 579 static void
 580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 581 {
 582   unsigned rd = INSTR (4, 0);
 583   unsigned rn = INSTR (9, 5);
 584   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 585
 586   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 587 }
 588
 589 /* Load 64 bit scaled unsigned 12 bit.  */
 590 static void
 591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 592 {
 593   unsigned rd = INSTR (4, 0);
 594   unsigned rn = INSTR (9, 5);
 595   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 596
 597   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 598 }
 599
 600 /* Load 128 bit scaled unsigned 12 bit.  */
 601 static void
 602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 603 {
 604   unsigned rd = INSTR (4, 0);
 605   unsigned rn = INSTR (9, 5);
 606   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 607
 608   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 609   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 610 }
 611
 612 /* Load 32 bit scaled or unscaled zero- or sign-extended
 613    32-bit register offset.  */
 614 static void
 615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 616 {
 617   unsigned rm = INSTR (20, 16);
 618   unsigned rn = INSTR (9, 5);
 619   unsigned st = INSTR (4, 0);
 620   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 621   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 622   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 623
 624   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 625                        (cpu, address + displacement));
 626 }
 627
 628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 629 static void
 630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 631 {
 632   unsigned rn = INSTR (9, 5);
 633   unsigned st = INSTR (4, 0);
 634   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 635
 636   if (wb != Post)
 637     address += offset;
 638
 639   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 640
 641   if (wb == Post)
 642     address += offset;
 643
 644   if (wb != NoWriteBack)
 645     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 646 }
 647
 648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 649 static void
 650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 651 {
 652   unsigned rm = INSTR (20, 16);
 653   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 654   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 655
 656   fldrd_wb (cpu, displacement, NoWriteBack);
 657 }
 658
 659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 660 static void
 661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 662 {
 663   FRegister a;
 664   unsigned rn = INSTR (9, 5);
 665   unsigned st = INSTR (4, 0);
 666   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 667
 668   if (wb != Post)
 669     address += offset;
 670
 671   aarch64_get_mem_long_double (cpu, address, & a);
 672   aarch64_set_FP_long_double (cpu, st, a);
 673
 674   if (wb == Post)
 675     address += offset;
 676
 677   if (wb != NoWriteBack)
 678     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 679 }
 680
 681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 682 static void
 683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 684 {
 685   unsigned rm = INSTR (20, 16);
 686   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 687   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 688
 689   fldrq_wb (cpu, displacement, NoWriteBack);
 690 }
 691
 692 /* Memory Access
 693
 694    load-store single register
 695    There are four addressing modes available here which all employ a
 696    64 bit source (base) register.
 697
 698    N.B. the base register (source) can be the stack pointer.
 699    The secondary source register (source2)can only be an Xn register.
 700
 701    Scaled, 12-bit, unsigned immediate offset, without pre- and
 702    post-index options.
 703    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 704    writeback.
 705    scaled or unscaled 64-bit register offset.
 706    scaled or unscaled 32-bit extended register offset.
 707
 708    All offsets are assumed to be raw from the decode i.e. the
 709    simulator is expected to adjust scaled offsets based on the
 710    accessed data size with register or extended register offset
 711    versions the same applies except that in the latter case the
 712    operation may also require a sign extend.
 713
 714    A separate method is provided for each possible addressing mode.  */
 715
 716 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 717 static void
 718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 719 {
 720   unsigned rn = INSTR (9, 5);
 721   unsigned rt = INSTR (4, 0);
 722
 723   /* The target register may not be SP but the source may be.  */
 724   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 725                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 726                         + SCALE (offset, 32)));
 727 }
 728
 729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 730 static void
 731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 732 {
 733   unsigned rn = INSTR (9, 5);
 734   unsigned rt = INSTR (4, 0);
 735   uint64_t address;
 736
 737   if (rn == rt && wb != NoWriteBack)
 738     HALT_UNALLOC;
 739
 740   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 741
 742   if (wb != Post)
 743     address += offset;
 744
 745   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 746
 747   if (wb == Post)
 748     address += offset;
 749
 750   if (wb != NoWriteBack)
 751     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 752 }
 753
 754 /* 32 bit load 32 bit scaled or unscaled
 755    zero- or sign-extended 32-bit register offset  */
 756 static void
 757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 758 {
 759   unsigned rm = INSTR (20, 16);
 760   unsigned rn = INSTR (9, 5);
 761   unsigned rt = INSTR (4, 0);
 762   /* rn may reference SP, rm and rt must reference ZR  */
 763
 764   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 765   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 766   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 767
 768   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 769                        aarch64_get_mem_u32 (cpu, address + displacement));
 770 }
 771
 772 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 773 static void
 774 ldr_abs (sim_cpu *cpu, uint32_t offset)
 775 {
 776   unsigned rn = INSTR (9, 5);
 777   unsigned rt = INSTR (4, 0);
 778
 779   /* The target register may not be SP but the source may be.  */
 780   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 781                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 782                         + SCALE (offset, 64)));
 783 }
 784
 785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 786 static void
 787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 788 {
 789   unsigned rn = INSTR (9, 5);
 790   unsigned rt = INSTR (4, 0);
 791   uint64_t address;
 792
 793   if (rn == rt && wb != NoWriteBack)
 794     HALT_UNALLOC;
 795
 796   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 797
 798   if (wb != Post)
 799     address += offset;
 800
 801   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 802
 803   if (wb == Post)
 804     address += offset;
 805
 806   if (wb != NoWriteBack)
 807     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 808 }
 809
 810 /* 64 bit load 64 bit scaled or unscaled zero-
 811    or sign-extended 32-bit register offset.  */
 812 static void
 813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 814 {
 815   unsigned rm = INSTR (20, 16);
 816   unsigned rn = INSTR (9, 5);
 817   unsigned rt = INSTR (4, 0);
 818   /* rn may reference SP, rm and rt must reference ZR  */
 819
 820   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 821   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 822   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 823
 824   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 825                        aarch64_get_mem_u64 (cpu, address + displacement));
 826 }
 827
 828 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 829 static void
 830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 831 {
 832   unsigned rn = INSTR (9, 5);
 833   unsigned rt = INSTR (4, 0);
 834
 835   /* The target register may not be SP but the source may be
 836      there is no scaling required for a byte load.  */
 837   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 838                        aarch64_get_mem_u8
 839                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 840 }
 841
 842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 843 static void
 844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 845 {
 846   unsigned rn = INSTR (9, 5);
 847   unsigned rt = INSTR (4, 0);
 848   uint64_t address;
 849
 850   if (rn == rt && wb != NoWriteBack)
 851     HALT_UNALLOC;
 852
 853   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 854
 855   if (wb != Post)
 856     address += offset;
 857
 858   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 859
 860   if (wb == Post)
 861     address += offset;
 862
 863   if (wb != NoWriteBack)
 864     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 865 }
 866
 867 /* 32 bit load zero-extended byte scaled or unscaled zero-
 868    or sign-extended 32-bit register offset.  */
 869 static void
 870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 871 {
 872   unsigned rm = INSTR (20, 16);
 873   unsigned rn = INSTR (9, 5);
 874   unsigned rt = INSTR (4, 0);
 875   /* rn may reference SP, rm and rt must reference ZR  */
 876
 877   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 878   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 879                                  extension);
 880
 881   /* There is no scaling required for a byte load.  */
 882   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 883                        aarch64_get_mem_u8 (cpu, address + displacement));
 884 }
 885
 886 /* 64 bit load sign-extended byte unscaled signed 9 bit
 887    with pre- or post-writeback.  */
 888 static void
 889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 890 {
 891   unsigned rn = INSTR (9, 5);
 892   unsigned rt = INSTR (4, 0);
 893   uint64_t address;
 894   int64_t val;
 895
 896   if (rn == rt && wb != NoWriteBack)
 897     HALT_UNALLOC;
 898
 899   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 900
 901   if (wb != Post)
 902     address += offset;
 903
 904   val = aarch64_get_mem_s8 (cpu, address);
 905   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 906
 907   if (wb == Post)
 908     address += offset;
 909
 910   if (wb != NoWriteBack)
 911     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 912 }
 913
 914 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 915 static void
 916 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 917 {
 918   ldrsb_wb (cpu, offset, NoWriteBack);
 919 }
 920
 921 /* 64 bit load sign-extended byte scaled or unscaled zero-
 922    or sign-extended 32-bit register offset.  */
 923 static void
 924 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 925 {
 926   unsigned rm = INSTR (20, 16);
 927   unsigned rn = INSTR (9, 5);
 928   unsigned rt = INSTR (4, 0);
 929   /* rn may reference SP, rm and rt must reference ZR  */
 930
 931   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 932   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 933                                  extension);
 934   /* There is no scaling required for a byte load.  */
 935   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 936                        aarch64_get_mem_s8 (cpu, address + displacement));
 937 }
 938
 939 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 940 static void
 941 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 942 {
 943   unsigned rn = INSTR (9, 5);
 944   unsigned rt = INSTR (4, 0);
 945   uint32_t val;
 946
 947   /* The target register may not be SP but the source may be.  */
 948   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 949                              + SCALE (offset, 16));
 950   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 951 }
 952
 953 /* 32 bit load zero-extended short unscaled signed 9 bit
 954    with pre- or post-writeback.  */
 955 static void
 956 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 957 {
 958   unsigned rn = INSTR (9, 5);
 959   unsigned rt = INSTR (4, 0);
 960   uint64_t address;
 961
 962   if (rn == rt && wb != NoWriteBack)
 963     HALT_UNALLOC;
 964
 965   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 966
 967   if (wb != Post)
 968     address += offset;
 969
 970   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
 971
 972   if (wb == Post)
 973     address += offset;
 974
 975   if (wb != NoWriteBack)
 976     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 977 }
 978
 979 /* 32 bit load zero-extended short scaled or unscaled zero-
 980    or sign-extended 32-bit register offset.  */
 981 static void
 982 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 983 {
 984   unsigned rm = INSTR (20, 16);
 985   unsigned rn = INSTR (9, 5);
 986   unsigned rt = INSTR (4, 0);
 987   /* rn may reference SP, rm and rt must reference ZR  */
 988
 989   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 990   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 991   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
 992
 993   aarch64_set_reg_u32 (cpu, rt, NO_SP,
 994                        aarch64_get_mem_u16 (cpu, address + displacement));
 995 }
 996
 997 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
 998 static void
 999 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1000 {
1001   unsigned rn = INSTR (9, 5);
1002   unsigned rt = INSTR (4, 0);
1003   int32_t val;
1004
1005   /* The target register may not be SP but the source may be.  */
1006   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1007                              + SCALE (offset, 16));
1008   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1009 }
1010
1011 /* 32 bit load sign-extended short unscaled signed 9 bit
1012    with pre- or post-writeback.  */
1013 static void
1014 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1015 {
1016   unsigned rn = INSTR (9, 5);
1017   unsigned rt = INSTR (4, 0);
1018   uint64_t address;
1019
1020   if (rn == rt && wb != NoWriteBack)
1021     HALT_UNALLOC;
1022
1023   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1024
1025   if (wb != Post)
1026     address += offset;
1027
1028   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1029                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1030
1031   if (wb == Post)
1032     address += offset;
1033
1034   if (wb != NoWriteBack)
1035     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1036 }
1037
1038 /* 32 bit load sign-extended short scaled or unscaled zero-
1039    or sign-extended 32-bit register offset.  */
1040 static void
1041 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1042 {
1043   unsigned rm = INSTR (20, 16);
1044   unsigned rn = INSTR (9, 5);
1045   unsigned rt = INSTR (4, 0);
1046   /* rn may reference SP, rm and rt must reference ZR  */
1047
1048   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1049   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1050   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1051
1052   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1053                        (int32_t) aarch64_get_mem_s16
1054                        (cpu, address + displacement));
1055 }
1056
1057 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1058 static void
1059 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1060 {
1061   unsigned rn = INSTR (9, 5);
1062   unsigned rt = INSTR (4, 0);
1063   int64_t val;
1064
1065   /* The target register may not be SP but the source may be.  */
1066   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1067                               + SCALE (offset, 16));
1068   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1069 }
1070
1071 /* 64 bit load sign-extended short unscaled signed 9 bit
1072    with pre- or post-writeback.  */
1073 static void
1074 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1075 {
1076   unsigned rn = INSTR (9, 5);
1077   unsigned rt = INSTR (4, 0);
1078   uint64_t address;
1079   int64_t val;
1080
1081   if (rn == rt && wb != NoWriteBack)
1082     HALT_UNALLOC;
1083
1084   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1085
1086   if (wb != Post)
1087     address += offset;
1088
1089   val = aarch64_get_mem_s16 (cpu, address);
1090   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1091
1092   if (wb == Post)
1093     address += offset;
1094
1095   if (wb != NoWriteBack)
1096     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1097 }
1098
1099 /* 64 bit load sign-extended short scaled or unscaled zero-
1100    or sign-extended 32-bit register offset.  */
1101 static void
1102 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1103 {
1104   unsigned rm = INSTR (20, 16);
1105   unsigned rn = INSTR (9, 5);
1106   unsigned rt = INSTR (4, 0);
1107
1108   /* rn may reference SP, rm and rt must reference ZR  */
1109
1110   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1111   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1112   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1113   int64_t val;
1114
1115   val = aarch64_get_mem_s16 (cpu, address + displacement);
1116   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1117 }
1118
1119 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1120 static void
1121 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1122 {
1123   unsigned rn = INSTR (9, 5);
1124   unsigned rt = INSTR (4, 0);
1125   int64_t val;
1126
1127   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1128                              + SCALE (offset, 32));
1129   /* The target register may not be SP but the source may be.  */
1130   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1131 }
1132
1133 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1134    with pre- or post-writeback.  */
1135 static void
1136 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1137 {
1138   unsigned rn = INSTR (9, 5);
1139   unsigned rt = INSTR (4, 0);
1140   uint64_t address;
1141
1142   if (rn == rt && wb != NoWriteBack)
1143     HALT_UNALLOC;
1144
1145   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1146
1147   if (wb != Post)
1148     address += offset;
1149
1150   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1151
1152   if (wb == Post)
1153     address += offset;
1154
1155   if (wb != NoWriteBack)
1156     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1157 }
1158
1159 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1160    or sign-extended 32-bit register offset.  */
1161 static void
1162 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1163 {
1164   unsigned rm = INSTR (20, 16);
1165   unsigned rn = INSTR (9, 5);
1166   unsigned rt = INSTR (4, 0);
1167   /* rn may reference SP, rm and rt must reference ZR  */
1168
1169   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1170   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1171   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1172
1173   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1174                        aarch64_get_mem_s32 (cpu, address + displacement));
1175 }
1176
1177 /* N.B. with stores the value in source is written to the
1178    address identified by source2 modified by source3/offset.  */
1179
1180 /* 32 bit store scaled unsigned 12 bit.  */
1181 static void
1182 str32_abs (sim_cpu *cpu, uint32_t offset)
1183 {
1184   unsigned rn = INSTR (9, 5);
1185   unsigned rt = INSTR (4, 0);
1186
1187   /* The target register may not be SP but the source may be.  */
1188   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1189                              + SCALE (offset, 32)),
1190                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1191 }
1192
1193 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1194 static void
1195 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1196 {
1197   unsigned rn = INSTR (9, 5);
1198   unsigned rt = INSTR (4, 0);
1199   uint64_t address;
1200
1201   if (rn == rt && wb != NoWriteBack)
1202     HALT_UNALLOC;
1203
1204   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1205   if (wb != Post)
1206     address += offset;
1207
1208   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1209
1210   if (wb == Post)
1211     address += offset;
1212
1213   if (wb != NoWriteBack)
1214     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1215 }
1216
1217 /* 32 bit store scaled or unscaled zero- or
1218    sign-extended 32-bit register offset.  */
1219 static void
1220 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1221 {
1222   unsigned rm = INSTR (20, 16);
1223   unsigned rn = INSTR (9, 5);
1224   unsigned rt = INSTR (4, 0);
1225
1226   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1227   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1228   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1229
1230   aarch64_set_mem_u32 (cpu, address + displacement,
1231                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1232 }
1233
1234 /* 64 bit store scaled unsigned 12 bit.  */
1235 static void
1236 str_abs (sim_cpu *cpu, uint32_t offset)
1237 {
1238   unsigned rn = INSTR (9, 5);
1239   unsigned rt = INSTR (4, 0);
1240
1241   aarch64_set_mem_u64 (cpu,
1242                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243                        + SCALE (offset, 64),
1244                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1245 }
1246
1247 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1248 static void
1249 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1250 {
1251   unsigned rn = INSTR (9, 5);
1252   unsigned rt = INSTR (4, 0);
1253   uint64_t address;
1254
1255   if (rn == rt && wb != NoWriteBack)
1256     HALT_UNALLOC;
1257
1258   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1259
1260   if (wb != Post)
1261     address += offset;
1262
1263   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1264
1265   if (wb == Post)
1266     address += offset;
1267
1268   if (wb != NoWriteBack)
1269     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1270 }
1271
1272 /* 64 bit store scaled or unscaled zero-
1273    or sign-extended 32-bit register offset.  */
1274 static void
1275 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1276 {
1277   unsigned rm = INSTR (20, 16);
1278   unsigned rn = INSTR (9, 5);
1279   unsigned rt = INSTR (4, 0);
1280   /* rn may reference SP, rm and rt must reference ZR  */
1281
1282   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1283   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1284                                extension);
1285   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1286
1287   aarch64_set_mem_u64 (cpu, address + displacement,
1288                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1289 }
1290
1291 /* 32 bit store byte scaled unsigned 12 bit.  */
1292 static void
1293 strb_abs (sim_cpu *cpu, uint32_t offset)
1294 {
1295   unsigned rn = INSTR (9, 5);
1296   unsigned rt = INSTR (4, 0);
1297
1298   /* The target register may not be SP but the source may be.
1299      There is no scaling required for a byte load.  */
1300   aarch64_set_mem_u8 (cpu,
1301                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1302                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1303 }
1304
1305 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1306 static void
1307 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1308 {
1309   unsigned rn = INSTR (9, 5);
1310   unsigned rt = INSTR (4, 0);
1311   uint64_t address;
1312
1313   if (rn == rt && wb != NoWriteBack)
1314     HALT_UNALLOC;
1315
1316   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1317
1318   if (wb != Post)
1319     address += offset;
1320
1321   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1322
1323   if (wb == Post)
1324     address += offset;
1325
1326   if (wb != NoWriteBack)
1327     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1328 }
1329
1330 /* 32 bit store byte scaled or unscaled zero-
1331    or sign-extended 32-bit register offset.  */
1332 static void
1333 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1334 {
1335   unsigned rm = INSTR (20, 16);
1336   unsigned rn = INSTR (9, 5);
1337   unsigned rt = INSTR (4, 0);
1338   /* rn may reference SP, rm and rt must reference ZR  */
1339
1340   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1342                                  extension);
1343
1344   /* There is no scaling required for a byte load.  */
1345   aarch64_set_mem_u8 (cpu, address + displacement,
1346                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1347 }
1348
1349 /* 32 bit store short scaled unsigned 12 bit.  */
1350 static void
1351 strh_abs (sim_cpu *cpu, uint32_t offset)
1352 {
1353   unsigned rn = INSTR (9, 5);
1354   unsigned rt = INSTR (4, 0);
1355
1356   /* The target register may not be SP but the source may be.  */
1357   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1358                        + SCALE (offset, 16),
1359                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1360 }
1361
1362 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1363 static void
1364 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 {
1366   unsigned rn = INSTR (9, 5);
1367   unsigned rt = INSTR (4, 0);
1368   uint64_t address;
1369
1370   if (rn == rt && wb != NoWriteBack)
1371     HALT_UNALLOC;
1372
1373   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1374
1375   if (wb != Post)
1376     address += offset;
1377
1378   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1379
1380   if (wb == Post)
1381     address += offset;
1382
1383   if (wb != NoWriteBack)
1384     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store short scaled or unscaled zero-
1388    or sign-extended 32-bit register offset.  */
1389 static void
1390 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392   unsigned rm = INSTR (20, 16);
1393   unsigned rn = INSTR (9, 5);
1394   unsigned rt = INSTR (4, 0);
1395   /* rn may reference SP, rm and rt must reference ZR  */
1396
1397   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1399   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1400
1401   aarch64_set_mem_u16 (cpu, address + displacement,
1402                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1403 }
1404
1405 /* Prefetch unsigned 12 bit.  */
1406 static void
1407 prfm_abs (sim_cpu *cpu, uint32_t offset)
1408 {
1409   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1410                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1411                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1412                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1413                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1414                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1415                           ow ==> UNALLOC
1416      PrfOp prfop = prfop (instr, 4, 0);
1417      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1418      + SCALE (offset, 64).  */
1419
1420   /* TODO : implement prefetch of address.  */
1421 }
1422
1423 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1424 static void
1425 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1426 {
1427   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1428                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1429                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1430                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1431                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1432                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1433                           ow ==> UNALLOC
1434      rn may reference SP, rm may only reference ZR
1435      PrfOp prfop = prfop (instr, 4, 0);
1436      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1438                                 extension);
1439      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1440      uint64_t address = base + displacement.  */
1441
1442   /* TODO : implement prefetch of address  */
1443 }
1444
1445 /* 64 bit pc-relative prefetch.  */
1446 static void
1447 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1448 {
1449   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1450                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1451                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1452                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1453                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1454                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1455                           ow ==> UNALLOC
1456      PrfOp prfop = prfop (instr, 4, 0);
1457      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1458
1459   /* TODO : implement this  */
1460 }
1461
1462 /* Load-store exclusive.  */
1463
1464 static void
1465 ldxr (sim_cpu *cpu)
1466 {
1467   unsigned rn = INSTR (9, 5);
1468   unsigned rt = INSTR (4, 0);
1469   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1470   int size = INSTR (31, 30);
1471   /* int ordered = INSTR (15, 15);  */
1472   /* int exclusive = ! INSTR (23, 23);  */
1473
1474   switch (size)
1475     {
1476     case 0:
1477       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1478       break;
1479     case 1:
1480       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1481       break;
1482     case 2:
1483       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1484       break;
1485     case 3:
1486       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1487       break;
1488     }
1489 }
1490
1491 static void
1492 stxr (sim_cpu *cpu)
1493 {
1494   unsigned rn = INSTR (9, 5);
1495   unsigned rt = INSTR (4, 0);
1496   unsigned rs = INSTR (20, 16);
1497   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498   int      size = INSTR (31, 30);
1499   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1500
1501   switch (size)
1502     {
1503     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1504     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1505     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1506     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1507     }
1508
1509   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1510 }
1511
1512 static void
1513 dexLoadLiteral (sim_cpu *cpu)
1514 {
1515   /* instr[29,27] == 011
1516      instr[25,24] == 00
1517      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1518                             010 ==> LDRX,  011 ==> FLDRD
1519                             100 ==> LDRSW, 101 ==> FLDRQ
1520                             110 ==> PRFM, 111 ==> UNALLOC
1521      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1522      instr[23, 5] == simm19  */
1523
1524   /* unsigned rt = INSTR (4, 0);  */
1525   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1526   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1527
1528   switch (dispatch)
1529     {
1530     case 0: ldr32_pcrel (cpu, imm); break;
1531     case 1: fldrs_pcrel (cpu, imm); break;
1532     case 2: ldr_pcrel   (cpu, imm); break;
1533     case 3: fldrd_pcrel (cpu, imm); break;
1534     case 4: ldrsw_pcrel (cpu, imm); break;
1535     case 5: fldrq_pcrel (cpu, imm); break;
1536     case 6: prfm_pcrel  (cpu, imm); break;
1537     case 7:
1538     default:
1539       HALT_UNALLOC;
1540     }
1541 }
1542
1543 /* Immediate arithmetic
1544    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1545    value left shifted by 12 bits (done at decode).
1546
1547    N.B. the register args (dest, source) can normally be Xn or SP.
1548    the exception occurs for flag setting instructions which may
1549    only use Xn for the output (dest).  */
1550
1551 /* 32 bit add immediate.  */
1552 static void
1553 add32 (sim_cpu *cpu, uint32_t aimm)
1554 {
1555   unsigned rn = INSTR (9, 5);
1556   unsigned rd = INSTR (4, 0);
1557
1558   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1559                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1560 }
1561
1562 /* 64 bit add immediate.  */
1563 static void
1564 add64 (sim_cpu *cpu, uint32_t aimm)
1565 {
1566   unsigned rn = INSTR (9, 5);
1567   unsigned rd = INSTR (4, 0);
1568
1569   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1570                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1571 }
1572
1573 static void
1574 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1575 {
1576   int32_t   result = value1 + value2;
1577   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1578   uint64_t  uresult = (uint64_t)(uint32_t) value1
1579     + (uint64_t)(uint32_t) value2;
1580   uint32_t  flags = 0;
1581
1582   if (result == 0)
1583     flags |= Z;
1584
1585   if (result & (1 << 31))
1586     flags |= N;
1587
1588   if (uresult != result)
1589     flags |= C;
1590
1591   if (sresult != result)
1592     flags |= V;
1593
1594   aarch64_set_CPSR (cpu, flags);
1595 }
1596
1597 static void
1598 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1599 {
1600   int64_t   sval1 = value1;
1601   int64_t   sval2 = value2;
1602   uint64_t  result = value1 + value2;
1603   int64_t   sresult = sval1 + sval2;
1604   uint32_t  flags = 0;
1605
1606   if (result == 0)
1607     flags |= Z;
1608
1609   if (result & (1ULL << 63))
1610     flags |= N;
1611
1612   if (sval1 < 0)
1613     {
1614       if (sval2 < 0)
1615         {
1616           /* Negative plus a negative.  Overflow happens if
1617              the result is greater than either of the operands.  */
1618           if (sresult > sval1 || sresult > sval2)
1619             flags |= V;
1620         }
1621       /* else Negative plus a positive.  Overflow cannot happen.  */
1622     }
1623   else /* value1 is +ve.  */
1624     {
1625       if (sval2 < 0)
1626         {
1627           /* Overflow can only occur if we computed "0 - MININT".  */
1628           if (sval1 == 0 && sval2 == (1LL << 63))
1629             flags |= V;
1630         }
1631       else
1632         {
1633           /* Postive plus positive - overflow has happened if the
1634              result is smaller than either of the operands.  */
1635           if (result < value1 || result < value2)
1636             flags |= V | C;
1637         }
1638     }
1639
1640   aarch64_set_CPSR (cpu, flags);
1641 }
1642
1643 #define NEG(a) (((a) & signbit) == signbit)
1644 #define POS(a) (((a) & signbit) == 0)
1645
1646 static void
1647 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1648 {
1649   uint32_t result = value1 - value2;
1650   uint32_t flags = 0;
1651   uint32_t signbit = 1U << 31;
1652
1653   if (result == 0)
1654     flags |= Z;
1655
1656   if (NEG (result))
1657     flags |= N;
1658
1659   if (   (NEG (value1) && POS (value2))
1660       || (NEG (value1) && POS (result))
1661       || (POS (value2) && POS (result)))
1662     flags |= C;
1663
1664   if (   (NEG (value1) && POS (value2) && POS (result))
1665       || (POS (value1) && NEG (value2) && NEG (result)))
1666     flags |= V;
1667
1668   aarch64_set_CPSR (cpu, flags);
1669 }
1670
1671 static void
1672 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1673 {
1674   uint64_t result = value1 - value2;
1675   uint32_t flags = 0;
1676   uint64_t signbit = 1ULL << 63;
1677
1678   if (result == 0)
1679     flags |= Z;
1680
1681   if (NEG (result))
1682     flags |= N;
1683
1684   if (   (NEG (value1) && POS (value2))
1685       || (NEG (value1) && POS (result))
1686       || (POS (value2) && POS (result)))
1687     flags |= C;
1688
1689   if (   (NEG (value1) && POS (value2) && POS (result))
1690       || (POS (value1) && NEG (value2) && NEG (result)))
1691     flags |= V;
1692
1693   aarch64_set_CPSR (cpu, flags);
1694 }
1695
1696 static void
1697 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1698 {
1699   uint32_t flags = 0;
1700
1701   if (result == 0)
1702     flags |= Z;
1703   else
1704     flags &= ~ Z;
1705
1706   if (result & (1 << 31))
1707     flags |= N;
1708   else
1709     flags &= ~ N;
1710
1711   aarch64_set_CPSR (cpu, flags);
1712 }
1713
1714 static void
1715 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1716 {
1717   uint32_t flags = 0;
1718
1719   if (result == 0)
1720     flags |= Z;
1721   else
1722     flags &= ~ Z;
1723
1724   if (result & (1ULL << 63))
1725     flags |= N;
1726   else
1727     flags &= ~ N;
1728
1729   aarch64_set_CPSR (cpu, flags);
1730 }
1731
1732 /* 32 bit add immediate set flags.  */
1733 static void
1734 adds32 (sim_cpu *cpu, uint32_t aimm)
1735 {
1736   unsigned rn = INSTR (9, 5);
1737   unsigned rd = INSTR (4, 0);
1738   /* TODO : do we need to worry about signs here?  */
1739   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1740
1741   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1742   set_flags_for_add32 (cpu, value1, aimm);
1743 }
1744
1745 /* 64 bit add immediate set flags.  */
1746 static void
1747 adds64 (sim_cpu *cpu, uint32_t aimm)
1748 {
1749   unsigned rn = INSTR (9, 5);
1750   unsigned rd = INSTR (4, 0);
1751   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1752   uint64_t value2 = aimm;
1753
1754   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1755   set_flags_for_add64 (cpu, value1, value2);
1756 }
1757
1758 /* 32 bit sub immediate.  */
1759 static void
1760 sub32 (sim_cpu *cpu, uint32_t aimm)
1761 {
1762   unsigned rn = INSTR (9, 5);
1763   unsigned rd = INSTR (4, 0);
1764
1765   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1766                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1767 }
1768
1769 /* 64 bit sub immediate.  */
1770 static void
1771 sub64 (sim_cpu *cpu, uint32_t aimm)
1772 {
1773   unsigned rn = INSTR (9, 5);
1774   unsigned rd = INSTR (4, 0);
1775
1776   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1777                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1778 }
1779
1780 /* 32 bit sub immediate set flags.  */
1781 static void
1782 subs32 (sim_cpu *cpu, uint32_t aimm)
1783 {
1784   unsigned rn = INSTR (9, 5);
1785   unsigned rd = INSTR (4, 0);
1786   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1787   uint32_t value2 = aimm;
1788
1789   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1790   set_flags_for_sub32 (cpu, value1, value2);
1791 }
1792
1793 /* 64 bit sub immediate set flags.  */
1794 static void
1795 subs64 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797   unsigned rn = INSTR (9, 5);
1798   unsigned rd = INSTR (4, 0);
1799   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1800   uint32_t value2 = aimm;
1801
1802   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1803   set_flags_for_sub64 (cpu, value1, value2);
1804 }
1805
1806 /* Data Processing Register.  */
1807
1808 /* First two helpers to perform the shift operations.  */
1809
1810 static inline uint32_t
1811 shifted32 (uint32_t value, Shift shift, uint32_t count)
1812 {
1813   switch (shift)
1814     {
1815     default:
1816     case LSL:
1817       return (value << count);
1818     case LSR:
1819       return (value >> count);
1820     case ASR:
1821       {
1822         int32_t svalue = value;
1823         return (svalue >> count);
1824       }
1825     case ROR:
1826       {
1827         uint32_t top = value >> count;
1828         uint32_t bottom = value << (32 - count);
1829         return (bottom | top);
1830       }
1831     }
1832 }
1833
1834 static inline uint64_t
1835 shifted64 (uint64_t value, Shift shift, uint32_t count)
1836 {
1837   switch (shift)
1838     {
1839     default:
1840     case LSL:
1841       return (value << count);
1842     case LSR:
1843       return (value >> count);
1844     case ASR:
1845       {
1846         int64_t svalue = value;
1847         return (svalue >> count);
1848       }
1849     case ROR:
1850       {
1851         uint64_t top = value >> count;
1852         uint64_t bottom = value << (64 - count);
1853         return (bottom | top);
1854       }
1855     }
1856 }
1857
1858 /* Arithmetic shifted register.
1859    These allow an optional LSL, ASR or LSR to the second source
1860    register with a count up to the register bit count.
1861
1862    N.B register args may not be SP.  */
1863
1864 /* 32 bit ADD shifted register.  */
1865 static void
1866 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1867 {
1868   unsigned rm = INSTR (20, 16);
1869   unsigned rn = INSTR (9, 5);
1870   unsigned rd = INSTR (4, 0);
1871
1872   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1873                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1874                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1875                                     shift, count));
1876 }
1877
1878 /* 64 bit ADD shifted register.  */
1879 static void
1880 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1881 {
1882   unsigned rm = INSTR (20, 16);
1883   unsigned rn = INSTR (9, 5);
1884   unsigned rd = INSTR (4, 0);
1885
1886   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1887                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1888                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1889                                     shift, count));
1890 }
1891
1892 /* 32 bit ADD shifted register setting flags.  */
1893 static void
1894 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1895 {
1896   unsigned rm = INSTR (20, 16);
1897   unsigned rn = INSTR (9, 5);
1898   unsigned rd = INSTR (4, 0);
1899
1900   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1901   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1902                                shift, count);
1903
1904   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1905   set_flags_for_add32 (cpu, value1, value2);
1906 }
1907
1908 /* 64 bit ADD shifted register setting flags.  */
1909 static void
1910 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1911 {
1912   unsigned rm = INSTR (20, 16);
1913   unsigned rn = INSTR (9, 5);
1914   unsigned rd = INSTR (4, 0);
1915
1916   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1917   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1918                                shift, count);
1919
1920   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1921   set_flags_for_add64 (cpu, value1, value2);
1922 }
1923
1924 /* 32 bit SUB shifted register.  */
1925 static void
1926 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1927 {
1928   unsigned rm = INSTR (20, 16);
1929   unsigned rn = INSTR (9, 5);
1930   unsigned rd = INSTR (4, 0);
1931
1932   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1933                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1934                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1935                                     shift, count));
1936 }
1937
1938 /* 64 bit SUB shifted register.  */
1939 static void
1940 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1941 {
1942   unsigned rm = INSTR (20, 16);
1943   unsigned rn = INSTR (9, 5);
1944   unsigned rd = INSTR (4, 0);
1945
1946   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1947                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1948                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1949                                     shift, count));
1950 }
1951
1952 /* 32 bit SUB shifted register setting flags.  */
1953 static void
1954 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1955 {
1956   unsigned rm = INSTR (20, 16);
1957   unsigned rn = INSTR (9, 5);
1958   unsigned rd = INSTR (4, 0);
1959
1960   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1961   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1962                               shift, count);
1963
1964   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1965   set_flags_for_sub32 (cpu, value1, value2);
1966 }
1967
1968 /* 64 bit SUB shifted register setting flags.  */
1969 static void
1970 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1971 {
1972   unsigned rm = INSTR (20, 16);
1973   unsigned rn = INSTR (9, 5);
1974   unsigned rd = INSTR (4, 0);
1975
1976   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1977   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1978                                shift, count);
1979
1980   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1981   set_flags_for_sub64 (cpu, value1, value2);
1982 }
1983
1984 /* First a couple more helpers to fetch the
1985    relevant source register element either
1986    sign or zero extended as required by the
1987    extension value.  */
1988
1989 static uint32_t
1990 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1991 {
1992   switch (extension)
1993     {
1994     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
1995     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1996     case UXTW: /* Fall through.  */
1997     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1998     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
1999     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2000     case SXTW: /* Fall through.  */
2001     case SXTX: /* Fall through.  */
2002     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2003   }
2004 }
2005
2006 static uint64_t
2007 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2008 {
2009   switch (extension)
2010     {
2011     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2012     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2013     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2014     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2015     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2016     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2017     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2018     case SXTX:
2019     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2020     }
2021 }
2022
2023 /* Arithmetic extending register
2024    These allow an optional sign extension of some portion of the
2025    second source register followed by an optional left shift of
2026    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2027
2028    N.B output (dest) and first input arg (source) may normally be Xn
2029    or SP. However, for flag setting operations dest can only be
2030    Xn. Second input registers are always Xn.  */
2031
2032 /* 32 bit ADD extending register.  */
2033 static void
2034 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2035 {
2036   unsigned rm = INSTR (20, 16);
2037   unsigned rn = INSTR (9, 5);
2038   unsigned rd = INSTR (4, 0);
2039
2040   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2041                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2042                        + (extreg32 (cpu, rm, extension) << shift));
2043 }
2044
2045 /* 64 bit ADD extending register.
2046    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2047 static void
2048 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2049 {
2050   unsigned rm = INSTR (20, 16);
2051   unsigned rn = INSTR (9, 5);
2052   unsigned rd = INSTR (4, 0);
2053
2054   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2055                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2056                        + (extreg64 (cpu, rm, extension) << shift));
2057 }
2058
2059 /* 32 bit ADD extending register setting flags.  */
2060 static void
2061 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2062 {
2063   unsigned rm = INSTR (20, 16);
2064   unsigned rn = INSTR (9, 5);
2065   unsigned rd = INSTR (4, 0);
2066
2067   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2068   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2069
2070   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2071   set_flags_for_add32 (cpu, value1, value2);
2072 }
2073
2074 /* 64 bit ADD extending register setting flags  */
2075 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2076 static void
2077 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2078 {
2079   unsigned rm = INSTR (20, 16);
2080   unsigned rn = INSTR (9, 5);
2081   unsigned rd = INSTR (4, 0);
2082
2083   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2084   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2085
2086   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2087   set_flags_for_add64 (cpu, value1, value2);
2088 }
2089
2090 /* 32 bit SUB extending register.  */
2091 static void
2092 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094   unsigned rm = INSTR (20, 16);
2095   unsigned rn = INSTR (9, 5);
2096   unsigned rd = INSTR (4, 0);
2097
2098   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2099                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2100                        - (extreg32 (cpu, rm, extension) << shift));
2101 }
2102
2103 /* 64 bit SUB extending register.  */
2104 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2105 static void
2106 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2107 {
2108   unsigned rm = INSTR (20, 16);
2109   unsigned rn = INSTR (9, 5);
2110   unsigned rd = INSTR (4, 0);
2111
2112   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2113                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2114                        - (extreg64 (cpu, rm, extension) << shift));
2115 }
2116
2117 /* 32 bit SUB extending register setting flags.  */
2118 static void
2119 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2120 {
2121   unsigned rm = INSTR (20, 16);
2122   unsigned rn = INSTR (9, 5);
2123   unsigned rd = INSTR (4, 0);
2124
2125   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2126   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2127
2128   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2129   set_flags_for_sub32 (cpu, value1, value2);
2130 }
2131
2132 /* 64 bit SUB extending register setting flags  */
2133 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2134 static void
2135 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2136 {
2137   unsigned rm = INSTR (20, 16);
2138   unsigned rn = INSTR (9, 5);
2139   unsigned rd = INSTR (4, 0);
2140
2141   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2142   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2143
2144   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2145   set_flags_for_sub64 (cpu, value1, value2);
2146 }
2147
2148 static void
2149 dexAddSubtractImmediate (sim_cpu *cpu)
2150 {
2151   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2152      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2153      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2154      instr[28,24] = 10001
2155      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2156      instr[21,10] = uimm12
2157      instr[9,5]   = Rn
2158      instr[4,0]   = Rd  */
2159
2160   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2161   uint32_t shift = INSTR (23, 22);
2162   uint32_t imm = INSTR (21, 10);
2163   uint32_t dispatch = INSTR (31, 29);
2164
2165   NYI_assert (28, 24, 0x11);
2166
2167   if (shift > 1)
2168     HALT_UNALLOC;
2169
2170   if (shift)
2171     imm <<= 12;
2172
2173   switch (dispatch)
2174     {
2175     case 0: add32 (cpu, imm); break;
2176     case 1: adds32 (cpu, imm); break;
2177     case 2: sub32 (cpu, imm); break;
2178     case 3: subs32 (cpu, imm); break;
2179     case 4: add64 (cpu, imm); break;
2180     case 5: adds64 (cpu, imm); break;
2181     case 6: sub64 (cpu, imm); break;
2182     case 7: subs64 (cpu, imm); break;
2183     }
2184 }
2185
2186 static void
2187 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2188 {
2189   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2190      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2191      instr[28,24] = 01011
2192      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2193      instr[21]    = 0
2194      instr[20,16] = Rm
2195      instr[15,10] = count : must be 0xxxxx for 32 bit
2196      instr[9,5]   = Rn
2197      instr[4,0]   = Rd  */
2198
2199   uint32_t size = INSTR (31, 31);
2200   uint32_t count = INSTR (15, 10);
2201   Shift shiftType = INSTR (23, 22);
2202
2203   NYI_assert (28, 24, 0x0B);
2204   NYI_assert (21, 21, 0);
2205
2206   /* Shift encoded as ROR is unallocated.  */
2207   if (shiftType == ROR)
2208     HALT_UNALLOC;
2209
2210   /* 32 bit operations must have count[5] = 0
2211      or else we have an UNALLOC.  */
2212   if (size == 0 && uimm (count, 5, 5))
2213     HALT_UNALLOC;
2214
2215   /* Dispatch on size:op i.e instr [31,29].  */
2216   switch (INSTR (31, 29))
2217     {
2218     case 0: add32_shift  (cpu, shiftType, count); break;
2219     case 1: adds32_shift (cpu, shiftType, count); break;
2220     case 2: sub32_shift  (cpu, shiftType, count); break;
2221     case 3: subs32_shift (cpu, shiftType, count); break;
2222     case 4: add64_shift  (cpu, shiftType, count); break;
2223     case 5: adds64_shift (cpu, shiftType, count); break;
2224     case 6: sub64_shift  (cpu, shiftType, count); break;
2225     case 7: subs64_shift (cpu, shiftType, count); break;
2226     }
2227 }
2228
2229 static void
2230 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2231 {
2232   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2233      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2234      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2235      instr[28,24] = 01011
2236      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2237      instr[21]    = 1
2238      instr[20,16] = Rm
2239      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2240                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2241                              000 ==> SXTB, 001 ==> SXTH,
2242                              000 ==> SXTW, 001 ==> SXTX,
2243      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2244      instr[9,5]   = Rn
2245      instr[4,0]   = Rd  */
2246
2247   Extension extensionType = INSTR (15, 13);
2248   uint32_t shift = INSTR (12, 10);
2249
2250   NYI_assert (28, 24, 0x0B);
2251   NYI_assert (21, 21, 1);
2252
2253   /* Shift may not exceed 4.  */
2254   if (shift > 4)
2255     HALT_UNALLOC;
2256
2257   /* Dispatch on size:op:set?.  */
2258   switch (INSTR (31, 29))
2259     {
2260     case 0: add32_ext  (cpu, extensionType, shift); break;
2261     case 1: adds32_ext (cpu, extensionType, shift); break;
2262     case 2: sub32_ext  (cpu, extensionType, shift); break;
2263     case 3: subs32_ext (cpu, extensionType, shift); break;
2264     case 4: add64_ext  (cpu, extensionType, shift); break;
2265     case 5: adds64_ext (cpu, extensionType, shift); break;
2266     case 6: sub64_ext  (cpu, extensionType, shift); break;
2267     case 7: subs64_ext (cpu, extensionType, shift); break;
2268     }
2269 }
2270
2271 /* Conditional data processing
2272    Condition register is implicit 3rd source.  */
2273
2274 /* 32 bit add with carry.  */
2275 /* N.B register args may not be SP.  */
2276
2277 static void
2278 adc32 (sim_cpu *cpu)
2279 {
2280   unsigned rm = INSTR (20, 16);
2281   unsigned rn = INSTR (9, 5);
2282   unsigned rd = INSTR (4, 0);
2283
2284   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2285                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2286                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2287                        + IS_SET (C));
2288 }
2289
2290 /* 64 bit add with carry  */
2291 static void
2292 adc64 (sim_cpu *cpu)
2293 {
2294   unsigned rm = INSTR (20, 16);
2295   unsigned rn = INSTR (9, 5);
2296   unsigned rd = INSTR (4, 0);
2297
2298   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2299                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2300                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2301                        + IS_SET (C));
2302 }
2303
2304 /* 32 bit add with carry setting flags.  */
2305 static void
2306 adcs32 (sim_cpu *cpu)
2307 {
2308   unsigned rm = INSTR (20, 16);
2309   unsigned rn = INSTR (9, 5);
2310   unsigned rd = INSTR (4, 0);
2311
2312   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2313   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2314   uint32_t carry = IS_SET (C);
2315
2316   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2317   set_flags_for_add32 (cpu, value1, value2 + carry);
2318 }
2319
2320 /* 64 bit add with carry setting flags.  */
2321 static void
2322 adcs64 (sim_cpu *cpu)
2323 {
2324   unsigned rm = INSTR (20, 16);
2325   unsigned rn = INSTR (9, 5);
2326   unsigned rd = INSTR (4, 0);
2327
2328   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2329   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2330   uint64_t carry = IS_SET (C);
2331
2332   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2333   set_flags_for_add64 (cpu, value1, value2 + carry);
2334 }
2335
2336 /* 32 bit sub with carry.  */
2337 static void
2338 sbc32 (sim_cpu *cpu)
2339 {
2340   unsigned rm = INSTR (20, 16);
2341   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2342   unsigned rd = INSTR (4, 0);
2343
2344   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2345                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2346                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2347                        - 1 + IS_SET (C));
2348 }
2349
2350 /* 64 bit sub with carry  */
2351 static void
2352 sbc64 (sim_cpu *cpu)
2353 {
2354   unsigned rm = INSTR (20, 16);
2355   unsigned rn = INSTR (9, 5);
2356   unsigned rd = INSTR (4, 0);
2357
2358   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2359                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2360                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2361                        - 1 + IS_SET (C));
2362 }
2363
2364 /* 32 bit sub with carry setting flags  */
2365 static void
2366 sbcs32 (sim_cpu *cpu)
2367 {
2368   unsigned rm = INSTR (20, 16);
2369   unsigned rn = INSTR (9, 5);
2370   unsigned rd = INSTR (4, 0);
2371
2372   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2373   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2374   uint32_t carry  = IS_SET (C);
2375   uint32_t result = value1 - value2 + 1 - carry;
2376
2377   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2378   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2379 }
2380
2381 /* 64 bit sub with carry setting flags  */
2382 static void
2383 sbcs64 (sim_cpu *cpu)
2384 {
2385   unsigned rm = INSTR (20, 16);
2386   unsigned rn = INSTR (9, 5);
2387   unsigned rd = INSTR (4, 0);
2388
2389   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2390   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2391   uint64_t carry  = IS_SET (C);
2392   uint64_t result = value1 - value2 + 1 - carry;
2393
2394   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2395   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2396 }
2397
2398 static void
2399 dexAddSubtractWithCarry (sim_cpu *cpu)
2400 {
2401   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2402      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2403      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2404      instr[28,21] = 1 1010 000
2405      instr[20,16] = Rm
2406      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2407      instr[9,5]   = Rn
2408      instr[4,0]   = Rd  */
2409
2410   uint32_t op2 = INSTR (15, 10);
2411
2412   NYI_assert (28, 21, 0xD0);
2413
2414   if (op2 != 0)
2415     HALT_UNALLOC;
2416
2417   /* Dispatch on size:op:set?.  */
2418   switch (INSTR (31, 29))
2419     {
2420     case 0: adc32 (cpu); break;
2421     case 1: adcs32 (cpu); break;
2422     case 2: sbc32 (cpu); break;
2423     case 3: sbcs32 (cpu); break;
2424     case 4: adc64 (cpu); break;
2425     case 5: adcs64 (cpu); break;
2426     case 6: sbc64 (cpu); break;
2427     case 7: sbcs64 (cpu); break;
2428     }
2429 }
2430
2431 static uint32_t
2432 testConditionCode (sim_cpu *cpu, CondCode cc)
2433 {
2434   /* This should be reduceable to branchless logic
2435      by some careful testing of bits in CC followed
2436      by the requisite masking and combining of bits
2437      from the flag register.
2438
2439      For now we do it with a switch.  */
2440   int res;
2441
2442   switch (cc)
2443     {
2444     case EQ:  res = IS_SET (Z);    break;
2445     case NE:  res = IS_CLEAR (Z);  break;
2446     case CS:  res = IS_SET (C);    break;
2447     case CC:  res = IS_CLEAR (C);  break;
2448     case MI:  res = IS_SET (N);    break;
2449     case PL:  res = IS_CLEAR (N);  break;
2450     case VS:  res = IS_SET (V);    break;
2451     case VC:  res = IS_CLEAR (V);  break;
2452     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2453     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2454     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2455     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2456     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2457     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2458     case AL:
2459     case NV:
2460     default:
2461       res = 1;
2462       break;
2463     }
2464   return res;
2465 }
2466
2467 static void
2468 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2469 {
2470   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2471      instr[30]    = compare with positive (1) or negative value (0)
2472      instr[29,21] = 1 1101 0010
2473      instr[20,16] = Rm or const
2474      instr[15,12] = cond
2475      instr[11]    = compare reg (0) or const (1)
2476      instr[10]    = 0
2477      instr[9,5]   = Rn
2478      instr[4]     = 0
2479      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2480   signed int negate;
2481   unsigned rm;
2482   unsigned rn;
2483
2484   NYI_assert (29, 21, 0x1d2);
2485   NYI_assert (10, 10, 0);
2486   NYI_assert (4, 4, 0);
2487
2488   if (! testConditionCode (cpu, INSTR (15, 12)))
2489     {
2490       aarch64_set_CPSR (cpu, INSTR (3, 0));
2491       return;
2492     }
2493
2494   negate = INSTR (30, 30) ? 1 : -1;
2495   rm = INSTR (20, 16);
2496   rn = INSTR ( 9,  5);
2497
2498   if (INSTR (31, 31))
2499     {
2500       if (INSTR (11, 11))
2501         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2502                              negate * (uint64_t) rm);
2503       else
2504         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2505                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2506     }
2507   else
2508     {
2509       if (INSTR (11, 11))
2510         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2511                              negate * rm);
2512       else
2513         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2514                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2515     }
2516 }
2517
2518 static void
2519 do_vec_MOV_whole_vector (sim_cpu *cpu)
2520 {
2521   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2522
2523      instr[31]    = 0
2524      instr[30]    = half(0)/full(1)
2525      instr[29,21] = 001110101
2526      instr[20,16] = Vs
2527      instr[15,10] = 000111
2528      instr[9,5]   = Vs
2529      instr[4,0]   = Vd  */
2530
2531   unsigned vs = INSTR (9, 5);
2532   unsigned vd = INSTR (4, 0);
2533
2534   NYI_assert (29, 21, 0x075);
2535   NYI_assert (15, 10, 0x07);
2536
2537   if (INSTR (20, 16) != vs)
2538     HALT_NYI;
2539
2540   if (INSTR (30, 30))
2541     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2542
2543   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2544 }
2545
2546 static void
2547 do_vec_MOV_into_scalar (sim_cpu *cpu)
2548 {
2549   /* instr[31]    = 0
2550      instr[30]    = word(0)/long(1)
2551      instr[29,21] = 00 1110 000
2552      instr[20,18] = element size and index
2553      instr[17,10] = 00 0011 11
2554      instr[9,5]   = V source
2555      instr[4,0]   = R dest  */
2556
2557   unsigned vs = INSTR (9, 5);
2558   unsigned rd = INSTR (4, 0);
2559
2560   NYI_assert (29, 21, 0x070);
2561   NYI_assert (17, 10, 0x0F);
2562
2563   switch (INSTR (20, 18))
2564     {
2565     case 0x2:
2566       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2567       break;
2568
2569     case 0x6:
2570       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2571       break;
2572
2573     case 0x1:
2574     case 0x3:
2575     case 0x5:
2576     case 0x7:
2577       aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2578                            (cpu, vs, INSTR (20, 19)));
2579       break;
2580
2581     default:
2582       HALT_NYI;
2583     }
2584 }
2585
2586 static void
2587 do_vec_INS (sim_cpu *cpu)
2588 {
2589   /* instr[31,21] = 01001110000
2590      instr[20,16] = element size and index
2591      instr[15,10] = 000111
2592      instr[9,5]   = W source
2593      instr[4,0]   = V dest  */
2594
2595   int index;
2596   unsigned rs = INSTR (9, 5);
2597   unsigned vd = INSTR (4, 0);
2598
2599   NYI_assert (31, 21, 0x270);
2600   NYI_assert (15, 10, 0x07);
2601
2602   if (INSTR (16, 16))
2603     {
2604       index = INSTR (20, 17);
2605       aarch64_set_vec_u8 (cpu, vd, index,
2606                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2607     }
2608   else if (INSTR (17, 17))
2609     {
2610       index = INSTR (20, 18);
2611       aarch64_set_vec_u16 (cpu, vd, index,
2612                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2613     }
2614   else if (INSTR (18, 18))
2615     {
2616       index = INSTR (20, 19);
2617       aarch64_set_vec_u32 (cpu, vd, index,
2618                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2619     }
2620   else if (INSTR (19, 19))
2621     {
2622       index = INSTR (20, 20);
2623       aarch64_set_vec_u64 (cpu, vd, index,
2624                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2625     }
2626   else
2627     HALT_NYI;
2628 }
2629
2630 static void
2631 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2632 {
2633   /* instr[31]    = 0
2634      instr[30]    = half(0)/full(1)
2635      instr[29,21] = 00 1110 000
2636      instr[20,16] = element size and index
2637      instr[15,10] = 0000 01
2638      instr[9,5]   = V source
2639      instr[4,0]   = V dest.  */
2640
2641   unsigned full = INSTR (30, 30);
2642   unsigned vs = INSTR (9, 5);
2643   unsigned vd = INSTR (4, 0);
2644   int i, index;
2645
2646   NYI_assert (29, 21, 0x070);
2647   NYI_assert (15, 10, 0x01);
2648
2649   if (INSTR (16, 16))
2650     {
2651       index = INSTR (20, 17);
2652
2653       for (i = 0; i < (full ? 16 : 8); i++)
2654         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2655     }
2656   else if (INSTR (17, 17))
2657     {
2658       index = INSTR (20, 18);
2659
2660       for (i = 0; i < (full ? 8 : 4); i++)
2661         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2662     }
2663   else if (INSTR (18, 18))
2664     {
2665       index = INSTR (20, 19);
2666
2667       for (i = 0; i < (full ? 4 : 2); i++)
2668         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2669     }
2670   else
2671     {
2672       if (INSTR (19, 19) == 0)
2673         HALT_UNALLOC;
2674
2675       if (! full)
2676         HALT_UNALLOC;
2677
2678       index = INSTR (20, 20);
2679
2680       for (i = 0; i < 2; i++)
2681         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2682     }
2683 }
2684
2685 static void
2686 do_vec_TBL (sim_cpu *cpu)
2687 {
2688   /* instr[31]    = 0
2689      instr[30]    = half(0)/full(1)
2690      instr[29,21] = 00 1110 000
2691      instr[20,16] = Vm
2692      instr[15]    = 0
2693      instr[14,13] = vec length
2694      instr[12,10] = 000
2695      instr[9,5]   = V start
2696      instr[4,0]   = V dest  */
2697
2698   int full    = INSTR (30, 30);
2699   int len     = INSTR (14, 13) + 1;
2700   unsigned vm = INSTR (20, 16);
2701   unsigned vn = INSTR (9, 5);
2702   unsigned vd = INSTR (4, 0);
2703   unsigned i;
2704
2705   NYI_assert (29, 21, 0x070);
2706   NYI_assert (12, 10, 0);
2707
2708   for (i = 0; i < (full ? 16 : 8); i++)
2709     {
2710       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2711       uint8_t val;
2712
2713       if (selector < 16)
2714         val = aarch64_get_vec_u8 (cpu, vn, selector);
2715       else if (selector < 32)
2716         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2717       else if (selector < 48)
2718         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2719       else if (selector < 64)
2720         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2721       else
2722         val = 0;
2723
2724       aarch64_set_vec_u8 (cpu, vd, i, val);
2725     }
2726 }
2727
2728 static void
2729 do_vec_TRN (sim_cpu *cpu)
2730 {
2731   /* instr[31]    = 0
2732      instr[30]    = half(0)/full(1)
2733      instr[29,24] = 00 1110
2734      instr[23,22] = size
2735      instr[21]    = 0
2736      instr[20,16] = Vm
2737      instr[15]    = 0
2738      instr[14]    = TRN1 (0) / TRN2 (1)
2739      instr[13,10] = 1010
2740      instr[9,5]   = V source
2741      instr[4,0]   = V dest.  */
2742
2743   int full    = INSTR (30, 30);
2744   int second  = INSTR (14, 14);
2745   unsigned vm = INSTR (20, 16);
2746   unsigned vn = INSTR (9, 5);
2747   unsigned vd = INSTR (4, 0);
2748   unsigned i;
2749
2750   NYI_assert (29, 24, 0x0E);
2751   NYI_assert (13, 10, 0xA);
2752
2753   switch (INSTR (23, 22))
2754     {
2755     case 0:
2756       for (i = 0; i < (full ? 8 : 4); i++)
2757         {
2758           aarch64_set_vec_u8
2759             (cpu, vd, i * 2,
2760              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2761           aarch64_set_vec_u8
2762             (cpu, vd, 1 * 2 + 1,
2763              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2764         }
2765       break;
2766
2767     case 1:
2768       for (i = 0; i < (full ? 4 : 2); i++)
2769         {
2770           aarch64_set_vec_u16
2771             (cpu, vd, i * 2,
2772              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2773           aarch64_set_vec_u16
2774             (cpu, vd, 1 * 2 + 1,
2775              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2776         }
2777       break;
2778
2779     case 2:
2780       aarch64_set_vec_u32
2781         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2782       aarch64_set_vec_u32
2783         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2784       aarch64_set_vec_u32
2785         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2786       aarch64_set_vec_u32
2787         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2788       break;
2789
2790     case 3:
2791       if (! full)
2792         HALT_UNALLOC;
2793
2794       aarch64_set_vec_u64 (cpu, vd, 0,
2795                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2796       aarch64_set_vec_u64 (cpu, vd, 1,
2797                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2798       break;
2799     }
2800 }
2801
2802 static void
2803 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2804 {
2805   /* instr[31]    = 0
2806      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2807                     [must be 1 for 64-bit xfer]
2808      instr[29,20] = 00 1110 0000
2809      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2810                                   0100=> 32-bits. 1000=>64-bits
2811      instr[15,10] = 0000 11
2812      instr[9,5]   = W source
2813      instr[4,0]   = V dest.  */
2814
2815   unsigned i;
2816   unsigned Vd = INSTR (4, 0);
2817   unsigned Rs = INSTR (9, 5);
2818   int both    = INSTR (30, 30);
2819
2820   NYI_assert (29, 20, 0x0E0);
2821   NYI_assert (15, 10, 0x03);
2822
2823   switch (INSTR (19, 16))
2824     {
2825     case 1:
2826       for (i = 0; i < (both ? 16 : 8); i++)
2827         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2828       break;
2829
2830     case 2:
2831       for (i = 0; i < (both ? 8 : 4); i++)
2832         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2833       break;
2834
2835     case 4:
2836       for (i = 0; i < (both ? 4 : 2); i++)
2837         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2838       break;
2839
2840     case 8:
2841       if (!both)
2842         HALT_NYI;
2843       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2844       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2845       break;
2846
2847     default:
2848       HALT_NYI;
2849     }
2850 }
2851
2852 static void
2853 do_vec_UZP (sim_cpu *cpu)
2854 {
2855   /* instr[31]    = 0
2856      instr[30]    = half(0)/full(1)
2857      instr[29,24] = 00 1110
2858      instr[23,22] = size: byte(00), half(01), word (10), long (11)
2859      instr[21]    = 0
2860      instr[20,16] = Vm
2861      instr[15]    = 0
2862      instr[14]    = lower (0) / upper (1)
2863      instr[13,10] = 0110
2864      instr[9,5]   = Vn
2865      instr[4,0]   = Vd.  */
2866
2867   int full = INSTR (30, 30);
2868   int upper = INSTR (14, 14);
2869
2870   unsigned vm = INSTR (20, 16);
2871   unsigned vn = INSTR (9, 5);
2872   unsigned vd = INSTR (4, 0);
2873
2874   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2875   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2876   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2877   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2878
2879   uint64_t val1 = 0;
2880   uint64_t val2 = 0;
2881
2882   uint64_t input1 = upper ? val_n1 : val_m1;
2883   uint64_t input2 = upper ? val_n2 : val_m2;
2884   unsigned i;
2885
2886   NYI_assert (29, 24, 0x0E);
2887   NYI_assert (21, 21, 0);
2888   NYI_assert (15, 15, 0);
2889   NYI_assert (13, 10, 6);
2890
2891   switch (INSTR (23, 23))
2892     {
2893     case 0:
2894       for (i = 0; i < 8; i++)
2895         {
2896           val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2897           val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2898         }
2899       break;
2900
2901     case 1:
2902       for (i = 0; i < 4; i++)
2903         {
2904           val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2905           val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2906         }
2907       break;
2908
2909     case 2:
2910       val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2911       val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2912
2913     case 3:
2914       val1 = input1;
2915       val2 = input2;
2916            break;
2917     }
2918
2919   aarch64_set_vec_u64 (cpu, vd, 0, val1);
2920   if (full)
2921     aarch64_set_vec_u64 (cpu, vd, 1, val2);
2922 }
2923
2924 static void
2925 do_vec_ZIP (sim_cpu *cpu)
2926 {
2927   /* instr[31]    = 0
2928      instr[30]    = half(0)/full(1)
2929      instr[29,24] = 00 1110
2930      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2931      instr[21]    = 0
2932      instr[20,16] = Vm
2933      instr[15]    = 0
2934      instr[14]    = lower (0) / upper (1)
2935      instr[13,10] = 1110
2936      instr[9,5]   = Vn
2937      instr[4,0]   = Vd.  */
2938
2939   int full = INSTR (30, 30);
2940   int upper = INSTR (14, 14);
2941
2942   unsigned vm = INSTR (20, 16);
2943   unsigned vn = INSTR (9, 5);
2944   unsigned vd = INSTR (4, 0);
2945
2946   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2947   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2948   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2949   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2950
2951   uint64_t val1 = 0;
2952   uint64_t val2 = 0;
2953
2954   uint64_t input1 = upper ? val_n1 : val_m1;
2955   uint64_t input2 = upper ? val_n2 : val_m2;
2956
2957   NYI_assert (29, 24, 0x0E);
2958   NYI_assert (21, 21, 0);
2959   NYI_assert (15, 15, 0);
2960   NYI_assert (13, 10, 0xE);
2961
2962   switch (INSTR (23, 23))
2963     {
2964     case 0:
2965       val1 =
2966           ((input1 <<  0) & (0xFF    <<  0))
2967         | ((input2 <<  8) & (0xFF    <<  8))
2968         | ((input1 <<  8) & (0xFF    << 16))
2969         | ((input2 << 16) & (0xFF    << 24))
2970         | ((input1 << 16) & (0xFFULL << 32))
2971         | ((input2 << 24) & (0xFFULL << 40))
2972         | ((input1 << 24) & (0xFFULL << 48))
2973         | ((input2 << 32) & (0xFFULL << 56));
2974
2975       val2 =
2976           ((input1 >> 32) & (0xFF    <<  0))
2977         | ((input2 >> 24) & (0xFF    <<  8))
2978         | ((input1 >> 24) & (0xFF    << 16))
2979         | ((input2 >> 16) & (0xFF    << 24))
2980         | ((input1 >> 16) & (0xFFULL << 32))
2981         | ((input2 >>  8) & (0xFFULL << 40))
2982         | ((input1 >>  8) & (0xFFULL << 48))
2983         | ((input2 >>  0) & (0xFFULL << 56));
2984       break;
2985
2986     case 1:
2987       val1 =
2988           ((input1 <<  0) & (0xFFFF    <<  0))
2989         | ((input2 << 16) & (0xFFFF    << 16))
2990         | ((input1 << 16) & (0xFFFFULL << 32))
2991         | ((input2 << 32) & (0xFFFFULL << 48));
2992
2993       val2 =
2994           ((input1 >> 32) & (0xFFFF    <<  0))
2995         | ((input2 >> 16) & (0xFFFF    << 16))
2996         | ((input1 >> 16) & (0xFFFFULL << 32))
2997         | ((input2 >>  0) & (0xFFFFULL << 48));
2998       break;
2999
3000     case 2:
3001       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3002       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3003       break;
3004
3005     case 3:
3006       val1 = input1;
3007       val2 = input2;
3008       break;
3009     }
3010
3011   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3012   if (full)
3013     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3014 }
3015
3016 /* Floating point immediates are encoded in 8 bits.
3017    fpimm[7] = sign bit.
3018    fpimm[6:4] = signed exponent.
3019    fpimm[3:0] = fraction (assuming leading 1).
3020    i.e. F = s * 1.f * 2^(e - b).  */
3021
3022 static float
3023 fp_immediate_for_encoding_32 (uint32_t imm8)
3024 {
3025   float u;
3026   uint32_t s, e, f, i;
3027
3028   s = (imm8 >> 7) & 0x1;
3029   e = (imm8 >> 4) & 0x7;
3030   f = imm8 & 0xf;
3031
3032   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3033   u = (16.0 + f) / 16.0;
3034
3035   /* N.B. exponent is signed.  */
3036   if (e < 4)
3037     {
3038       int epos = e;
3039
3040       for (i = 0; i <= epos; i++)
3041         u *= 2.0;
3042     }
3043   else
3044     {
3045       int eneg = 7 - e;
3046
3047       for (i = 0; i < eneg; i++)
3048         u /= 2.0;
3049     }
3050
3051   if (s)
3052     u = - u;
3053
3054   return u;
3055 }
3056
3057 static double
3058 fp_immediate_for_encoding_64 (uint32_t imm8)
3059 {
3060   double u;
3061   uint32_t s, e, f, i;
3062
3063   s = (imm8 >> 7) & 0x1;
3064   e = (imm8 >> 4) & 0x7;
3065   f = imm8 & 0xf;
3066
3067   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3068   u = (16.0 + f) / 16.0;
3069
3070   /* N.B. exponent is signed.  */
3071   if (e < 4)
3072     {
3073       int epos = e;
3074
3075       for (i = 0; i <= epos; i++)
3076         u *= 2.0;
3077     }
3078   else
3079     {
3080       int eneg = 7 - e;
3081
3082       for (i = 0; i < eneg; i++)
3083         u /= 2.0;
3084     }
3085
3086   if (s)
3087     u = - u;
3088
3089   return u;
3090 }
3091
3092 static void
3093 do_vec_MOV_immediate (sim_cpu *cpu)
3094 {
3095   /* instr[31]    = 0
3096      instr[30]    = full/half selector
3097      instr[29,19] = 00111100000
3098      instr[18,16] = high 3 bits of uimm8
3099      instr[15,12] = size & shift:
3100                                   0000 => 32-bit
3101                                   0010 => 32-bit + LSL#8
3102                                   0100 => 32-bit + LSL#16
3103                                   0110 => 32-bit + LSL#24
3104                                   1010 => 16-bit + LSL#8
3105                                   1000 => 16-bit
3106                                   1101 => 32-bit + MSL#16
3107                                   1100 => 32-bit + MSL#8
3108                                   1110 => 8-bit
3109                                   1111 => double
3110      instr[11,10] = 01
3111      instr[9,5]   = low 5-bits of uimm8
3112      instr[4,0]   = Vd.  */
3113
3114   int full     = INSTR (30, 30);
3115   unsigned vd  = INSTR (4, 0);
3116   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3117   unsigned i;
3118
3119   NYI_assert (29, 19, 0x1E0);
3120   NYI_assert (11, 10, 1);
3121
3122   switch (INSTR (15, 12))
3123     {
3124     case 0x0: /* 32-bit, no shift.  */
3125     case 0x2: /* 32-bit, shift by 8.  */
3126     case 0x4: /* 32-bit, shift by 16.  */
3127     case 0x6: /* 32-bit, shift by 24.  */
3128       val <<= (8 * INSTR (14, 13));
3129       for (i = 0; i < (full ? 4 : 2); i++)
3130         aarch64_set_vec_u32 (cpu, vd, i, val);
3131       break;
3132
3133     case 0xa: /* 16-bit, shift by 8.  */
3134       val <<= 8;
3135       /* Fall through.  */
3136     case 0x8: /* 16-bit, no shift.  */
3137       for (i = 0; i < (full ? 8 : 4); i++)
3138         aarch64_set_vec_u16 (cpu, vd, i, val);
3139       /* Fall through.  */
3140     case 0xd: /* 32-bit, mask shift by 16.  */
3141       val <<= 8;
3142       val |= 0xFF;
3143       /* Fall through.  */
3144     case 0xc: /* 32-bit, mask shift by 8. */
3145       val <<= 8;
3146       val |= 0xFF;
3147       for (i = 0; i < (full ? 4 : 2); i++)
3148         aarch64_set_vec_u32 (cpu, vd, i, val);
3149       break;
3150
3151     case 0xe: /* 8-bit, no shift.  */
3152       for (i = 0; i < (full ? 16 : 8); i++)
3153         aarch64_set_vec_u8 (cpu, vd, i, val);
3154       break;
3155
3156     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3157       {
3158         float u = fp_immediate_for_encoding_32 (val);
3159         for (i = 0; i < (full ? 4 : 2); i++)
3160           aarch64_set_vec_float (cpu, vd, i, u);
3161         break;
3162       }
3163
3164     default:
3165       HALT_NYI;
3166     }
3167 }
3168
3169 static void
3170 do_vec_MVNI (sim_cpu *cpu)
3171 {
3172   /* instr[31]    = 0
3173      instr[30]    = full/half selector
3174      instr[29,19] = 10111100000
3175      instr[18,16] = high 3 bits of uimm8
3176      instr[15,12] = selector
3177      instr[11,10] = 01
3178      instr[9,5]   = low 5-bits of uimm8
3179      instr[4,0]   = Vd.  */
3180
3181   int full     = INSTR (30, 30);
3182   unsigned vd  = INSTR (4, 0);
3183   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3184   unsigned i;
3185
3186   NYI_assert (29, 19, 0x5E0);
3187   NYI_assert (11, 10, 1);
3188
3189   switch (INSTR (15, 12))
3190     {
3191     case 0x0: /* 32-bit, no shift.  */
3192     case 0x2: /* 32-bit, shift by 8.  */
3193     case 0x4: /* 32-bit, shift by 16.  */
3194     case 0x6: /* 32-bit, shift by 24.  */
3195       val <<= (8 * INSTR (14, 13));
3196       val = ~ val;
3197       for (i = 0; i < (full ? 4 : 2); i++)
3198         aarch64_set_vec_u32 (cpu, vd, i, val);
3199       return;
3200
3201     case 0xa: /* 16-bit, 8 bit shift. */
3202       val <<= 8;
3203     case 0x8: /* 16-bit, no shift. */
3204       val = ~ val;
3205       for (i = 0; i < (full ? 8 : 4); i++)
3206         aarch64_set_vec_u16 (cpu, vd, i, val);
3207       return;
3208
3209     case 0xd: /* 32-bit, mask shift by 16.  */
3210       val <<= 8;
3211       val |= 0xFF;
3212     case 0xc: /* 32-bit, mask shift by 8. */
3213       val <<= 8;
3214       val |= 0xFF;
3215       val = ~ val;
3216       for (i = 0; i < (full ? 4 : 2); i++)
3217         aarch64_set_vec_u32 (cpu, vd, i, val);
3218       return;
3219
3220     case 0xE: /* MOVI Dn, #mask64 */
3221       {
3222         uint64_t mask = 0;
3223
3224         for (i = 0; i < 8; i++)
3225           if (val & (1 << i))
3226             mask |= (0xFFUL << (i * 8));
3227         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3228         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3229         return;
3230       }
3231
3232     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3233       {
3234         double u = fp_immediate_for_encoding_64 (val);
3235
3236         if (! full)
3237           HALT_UNALLOC;
3238
3239         aarch64_set_vec_double (cpu, vd, 0, u);
3240         aarch64_set_vec_double (cpu, vd, 1, u);
3241         return;
3242       }
3243
3244     default:
3245       HALT_NYI;
3246     }
3247 }
3248
3249 #define ABS(A) ((A) < 0 ? - (A) : (A))
3250
3251 static void
3252 do_vec_ABS (sim_cpu *cpu)
3253 {
3254   /* instr[31]    = 0
3255      instr[30]    = half(0)/full(1)
3256      instr[29,24] = 00 1110
3257      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3258      instr[21,10] = 10 0000 1011 10
3259      instr[9,5]   = Vn
3260      instr[4.0]   = Vd.  */
3261
3262   unsigned vn = INSTR (9, 5);
3263   unsigned vd = INSTR (4, 0);
3264   unsigned full = INSTR (30, 30);
3265   unsigned i;
3266
3267   NYI_assert (29, 24, 0x0E);
3268   NYI_assert (21, 10, 0x82E);
3269
3270   switch (INSTR (23, 22))
3271     {
3272     case 0:
3273       for (i = 0; i < (full ? 16 : 8); i++)
3274         aarch64_set_vec_s8 (cpu, vd, i,
3275                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3276       break;
3277
3278     case 1:
3279       for (i = 0; i < (full ? 8 : 4); i++)
3280         aarch64_set_vec_s16 (cpu, vd, i,
3281                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3282       break;
3283
3284     case 2:
3285       for (i = 0; i < (full ? 4 : 2); i++)
3286         aarch64_set_vec_s32 (cpu, vd, i,
3287                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3288       break;
3289
3290     case 3:
3291       if (! full)
3292         HALT_NYI;
3293       for (i = 0; i < 2; i++)
3294         aarch64_set_vec_s64 (cpu, vd, i,
3295                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3296       break;
3297     }
3298 }
3299
3300 static void
3301 do_vec_ADDV (sim_cpu *cpu)
3302 {
3303   /* instr[31]    = 0
3304      instr[30]    = full/half selector
3305      instr[29,24] = 00 1110
3306      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3307      instr[21,10] = 11 0001 1011 10
3308      instr[9,5]   = Vm
3309      instr[4.0]   = Rd.  */
3310
3311   unsigned vm = INSTR (9, 5);
3312   unsigned rd = INSTR (4, 0);
3313   unsigned i;
3314   uint64_t val = 0;
3315   int      full = INSTR (30, 30);
3316
3317   NYI_assert (29, 24, 0x0E);
3318   NYI_assert (21, 10, 0xC6E);
3319
3320   switch (INSTR (23, 22))
3321     {
3322     case 0:
3323       for (i = 0; i < (full ? 16 : 8); i++)
3324         val += aarch64_get_vec_u8 (cpu, vm, i);
3325       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3326       return;
3327
3328     case 1:
3329       for (i = 0; i < (full ? 8 : 4); i++)
3330         val += aarch64_get_vec_u16 (cpu, vm, i);
3331       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3332       return;
3333
3334     case 2:
3335       for (i = 0; i < (full ? 4 : 2); i++)
3336         val += aarch64_get_vec_u32 (cpu, vm, i);
3337       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3338       return;
3339
3340     case 3:
3341       if (! full)
3342         HALT_UNALLOC;
3343       val = aarch64_get_vec_u64 (cpu, vm, 0);
3344       val += aarch64_get_vec_u64 (cpu, vm, 1);
3345       aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3346       return;
3347     }
3348 }
3349
3350 static void
3351 do_vec_ins_2 (sim_cpu *cpu)
3352 {
3353   /* instr[31,21] = 01001110000
3354      instr[20,18] = size & element selector
3355      instr[17,14] = 0000
3356      instr[13]    = direction: to vec(0), from vec (1)
3357      instr[12,10] = 111
3358      instr[9,5]   = Vm
3359      instr[4,0]   = Vd.  */
3360
3361   unsigned elem;
3362   unsigned vm = INSTR (9, 5);
3363   unsigned vd = INSTR (4, 0);
3364
3365   NYI_assert (31, 21, 0x270);
3366   NYI_assert (17, 14, 0);
3367   NYI_assert (12, 10, 7);
3368
3369   if (INSTR (13, 13) == 1)
3370     {
3371       if (INSTR (18, 18) == 1)
3372         {
3373           /* 32-bit moves.  */
3374           elem = INSTR (20, 19);
3375           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3376                                aarch64_get_vec_u32 (cpu, vm, elem));
3377         }
3378       else
3379         {
3380           /* 64-bit moves.  */
3381           if (INSTR (19, 19) != 1)
3382             HALT_NYI;
3383
3384           elem = INSTR (20, 20);
3385           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3386                                aarch64_get_vec_u64 (cpu, vm, elem));
3387         }
3388     }
3389   else
3390     {
3391       if (INSTR (18, 18) == 1)
3392         {
3393           /* 32-bit moves.  */
3394           elem = INSTR (20, 19);
3395           aarch64_set_vec_u32 (cpu, vd, elem,
3396                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3397         }
3398       else
3399         {
3400           /* 64-bit moves.  */
3401           if (INSTR (19, 19) != 1)
3402             HALT_NYI;
3403
3404           elem = INSTR (20, 20);
3405           aarch64_set_vec_u64 (cpu, vd, elem,
3406                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3407         }
3408     }
3409 }
3410
3411 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3412   do                                                              \
3413     {                                                             \
3414       DST_TYPE a[N], b[N];                                        \
3415                                                                   \
3416       for (i = 0; i < (N); i++)                                   \
3417         {                                                         \
3418           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3419           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3420         }                                                         \
3421       for (i = 0; i < (N); i++)                                   \
3422         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3423     }                                                             \
3424   while (0)
3425
3426 static void
3427 do_vec_mull (sim_cpu *cpu)
3428 {
3429   /* instr[31]    = 0
3430      instr[30]    = lower(0)/upper(1) selector
3431      instr[29]    = signed(0)/unsigned(1)
3432      instr[28,24] = 0 1110
3433      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3434      instr[21]    = 1
3435      instr[20,16] = Vm
3436      instr[15,10] = 11 0000
3437      instr[9,5]   = Vn
3438      instr[4.0]   = Vd.  */
3439
3440   int    unsign = INSTR (29, 29);
3441   int    bias = INSTR (30, 30);
3442   unsigned vm = INSTR (20, 16);
3443   unsigned vn = INSTR ( 9,  5);
3444   unsigned vd = INSTR ( 4,  0);
3445   unsigned i;
3446
3447   NYI_assert (28, 24, 0x0E);
3448   NYI_assert (15, 10, 0x30);
3449
3450   /* NB: Read source values before writing results, in case
3451      the source and destination vectors are the same.  */
3452   switch (INSTR (23, 22))
3453     {
3454     case 0:
3455       if (bias)
3456         bias = 8;
3457       if (unsign)
3458         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3459       else
3460         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3461       return;
3462
3463     case 1:
3464       if (bias)
3465         bias = 4;
3466       if (unsign)
3467         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3468       else
3469         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3470       return;
3471
3472     case 2:
3473       if (bias)
3474         bias = 2;
3475       if (unsign)
3476         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3477       else
3478         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3479       return;
3480
3481     case 3:
3482       HALT_NYI;
3483     }
3484 }
3485
3486 static void
3487 do_vec_fadd (sim_cpu *cpu)
3488 {
3489   /* instr[31]    = 0
3490      instr[30]    = half(0)/full(1)
3491      instr[29,24] = 001110
3492      instr[23]    = FADD(0)/FSUB(1)
3493      instr[22]    = float (0)/double(1)
3494      instr[21]    = 1
3495      instr[20,16] = Vm
3496      instr[15,10] = 110101
3497      instr[9,5]   = Vn
3498      instr[4.0]   = Vd.  */
3499
3500   unsigned vm = INSTR (20, 16);
3501   unsigned vn = INSTR (9, 5);
3502   unsigned vd = INSTR (4, 0);
3503   unsigned i;
3504   int      full = INSTR (30, 30);
3505
3506   NYI_assert (29, 24, 0x0E);
3507   NYI_assert (21, 21, 1);
3508   NYI_assert (15, 10, 0x35);
3509
3510   if (INSTR (23, 23))
3511     {
3512       if (INSTR (22, 22))
3513         {
3514           if (! full)
3515             HALT_NYI;
3516
3517           for (i = 0; i < 2; i++)
3518             aarch64_set_vec_double (cpu, vd, i,
3519                                     aarch64_get_vec_double (cpu, vn, i)
3520                                     - aarch64_get_vec_double (cpu, vm, i));
3521         }
3522       else
3523         {
3524           for (i = 0; i < (full ? 4 : 2); i++)
3525             aarch64_set_vec_float (cpu, vd, i,
3526                                    aarch64_get_vec_float (cpu, vn, i)
3527                                    - aarch64_get_vec_float (cpu, vm, i));
3528         }
3529     }
3530   else
3531     {
3532       if (INSTR (22, 22))
3533         {
3534           if (! full)
3535             HALT_NYI;
3536
3537           for (i = 0; i < 2; i++)
3538             aarch64_set_vec_double (cpu, vd, i,
3539                                     aarch64_get_vec_double (cpu, vm, i)
3540                                     + aarch64_get_vec_double (cpu, vn, i));
3541         }
3542       else
3543         {
3544           for (i = 0; i < (full ? 4 : 2); i++)
3545             aarch64_set_vec_float (cpu, vd, i,
3546                                    aarch64_get_vec_float (cpu, vm, i)
3547                                    + aarch64_get_vec_float (cpu, vn, i));
3548         }
3549     }
3550 }
3551
3552 static void
3553 do_vec_add (sim_cpu *cpu)
3554 {
3555   /* instr[31]    = 0
3556      instr[30]    = full/half selector
3557      instr[29,24] = 001110
3558      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3559      instr[21]    = 1
3560      instr[20,16] = Vn
3561      instr[15,10] = 100001
3562      instr[9,5]   = Vm
3563      instr[4.0]   = Vd.  */
3564
3565   unsigned vm = INSTR (20, 16);
3566   unsigned vn = INSTR (9, 5);
3567   unsigned vd = INSTR (4, 0);
3568   unsigned i;
3569   int      full = INSTR (30, 30);
3570
3571   NYI_assert (29, 24, 0x0E);
3572   NYI_assert (21, 21, 1);
3573   NYI_assert (15, 10, 0x21);
3574
3575   switch (INSTR (23, 22))
3576     {
3577     case 0:
3578       for (i = 0; i < (full ? 16 : 8); i++)
3579         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3580                             + aarch64_get_vec_u8 (cpu, vm, i));
3581       return;
3582
3583     case 1:
3584       for (i = 0; i < (full ? 8 : 4); i++)
3585         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3586                              + aarch64_get_vec_u16 (cpu, vm, i));
3587       return;
3588
3589     case 2:
3590       for (i = 0; i < (full ? 4 : 2); i++)
3591         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3592                              + aarch64_get_vec_u32 (cpu, vm, i));
3593       return;
3594
3595     case 3:
3596       if (! full)
3597         HALT_UNALLOC;
3598       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3599                            + aarch64_get_vec_u64 (cpu, vm, 0));
3600       aarch64_set_vec_u64 (cpu, vd, 1,
3601                            aarch64_get_vec_u64 (cpu, vn, 1)
3602                            + aarch64_get_vec_u64 (cpu, vm, 1));
3603       return;
3604     }
3605 }
3606
3607 static void
3608 do_vec_mul (sim_cpu *cpu)
3609 {
3610   /* instr[31]    = 0
3611      instr[30]    = full/half selector
3612      instr[29,24] = 00 1110
3613      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3614      instr[21]    = 1
3615      instr[20,16] = Vn
3616      instr[15,10] = 10 0111
3617      instr[9,5]   = Vm
3618      instr[4.0]   = Vd.  */
3619
3620   unsigned vm = INSTR (20, 16);
3621   unsigned vn = INSTR (9, 5);
3622   unsigned vd = INSTR (4, 0);
3623   unsigned i;
3624   int      full = INSTR (30, 30);
3625   int      bias = 0;
3626
3627   NYI_assert (29, 24, 0x0E);
3628   NYI_assert (21, 21, 1);
3629   NYI_assert (15, 10, 0x27);
3630
3631   switch (INSTR (23, 22))
3632     {
3633     case 0:
3634       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3635       return;
3636
3637     case 1:
3638       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3639       return;
3640
3641     case 2:
3642       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3643       return;
3644
3645     case 3:
3646       HALT_UNALLOC;
3647     }
3648 }
3649
3650 static void
3651 do_vec_MLA (sim_cpu *cpu)
3652 {
3653   /* instr[31]    = 0
3654      instr[30]    = full/half selector
3655      instr[29,24] = 00 1110
3656      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3657      instr[21]    = 1
3658      instr[20,16] = Vn
3659      instr[15,10] = 1001 01
3660      instr[9,5]   = Vm
3661      instr[4.0]   = Vd.  */
3662
3663   unsigned vm = INSTR (20, 16);
3664   unsigned vn = INSTR (9, 5);
3665   unsigned vd = INSTR (4, 0);
3666   unsigned i;
3667   int      full = INSTR (30, 30);
3668
3669   NYI_assert (29, 24, 0x0E);
3670   NYI_assert (21, 21, 1);
3671   NYI_assert (15, 10, 0x25);
3672
3673   switch (INSTR (23, 22))
3674     {
3675     case 0:
3676       {
3677         uint16_t a[16], b[16];
3678
3679         for (i = 0; i < (full ? 16 : 8); i++)
3680           {
3681             a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3682             b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3683           }
3684
3685         for (i = 0; i < (full ? 16 : 8); i++)
3686           {
3687             uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3688
3689             aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3690           }
3691       }
3692       return;
3693
3694     case 1:
3695       {
3696         uint32_t a[8], b[8];
3697
3698         for (i = 0; i < (full ? 8 : 4); i++)
3699           {
3700             a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3701             b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3702           }
3703
3704         for (i = 0; i < (full ? 8 : 4); i++)
3705           {
3706             uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3707
3708             aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3709           }
3710       }
3711       return;
3712
3713     case 2:
3714       {
3715         uint64_t a[4], b[4];
3716
3717         for (i = 0; i < (full ? 4 : 2); i++)
3718           {
3719             a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3720             b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3721           }
3722
3723         for (i = 0; i < (full ? 4 : 2); i++)
3724           {
3725             uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3726
3727             aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3728           }
3729       }
3730       return;
3731
3732     case 3:
3733       HALT_UNALLOC;
3734     }
3735 }
3736
3737 static float
3738 fmaxnm (float a, float b)
3739 {
3740   if (fpclassify (a) == FP_NORMAL)
3741     {
3742       if (fpclassify (b) == FP_NORMAL)
3743         return a > b ? a : b;
3744       return a;
3745     }
3746   else if (fpclassify (b) == FP_NORMAL)
3747     return b;
3748   return a;
3749 }
3750
3751 static float
3752 fminnm (float a, float b)
3753 {
3754   if (fpclassify (a) == FP_NORMAL)
3755     {
3756       if (fpclassify (b) == FP_NORMAL)
3757         return a < b ? a : b;
3758       return a;
3759     }
3760   else if (fpclassify (b) == FP_NORMAL)
3761     return b;
3762   return a;
3763 }
3764
3765 static double
3766 dmaxnm (double a, double b)
3767 {
3768   if (fpclassify (a) == FP_NORMAL)
3769     {
3770       if (fpclassify (b) == FP_NORMAL)
3771         return a > b ? a : b;
3772       return a;
3773     }
3774   else if (fpclassify (b) == FP_NORMAL)
3775     return b;
3776   return a;
3777 }
3778
3779 static double
3780 dminnm (double a, double b)
3781 {
3782   if (fpclassify (a) == FP_NORMAL)
3783     {
3784       if (fpclassify (b) == FP_NORMAL)
3785         return a < b ? a : b;
3786       return a;
3787     }
3788   else if (fpclassify (b) == FP_NORMAL)
3789     return b;
3790   return a;
3791 }
3792
3793 static void
3794 do_vec_FminmaxNMP (sim_cpu *cpu)
3795 {
3796   /* instr [31]    = 0
3797      instr [30]    = half (0)/full (1)
3798      instr [29,24] = 10 1110
3799      instr [23]    = max(0)/min(1)
3800      instr [22]    = float (0)/double (1)
3801      instr [21]    = 1
3802      instr [20,16] = Vn
3803      instr [15,10] = 1100 01
3804      instr [9,5]   = Vm
3805      instr [4.0]   = Vd.  */
3806
3807   unsigned vm = INSTR (20, 16);
3808   unsigned vn = INSTR (9, 5);
3809   unsigned vd = INSTR (4, 0);
3810   int      full = INSTR (30, 30);
3811
3812   NYI_assert (29, 24, 0x2E);
3813   NYI_assert (21, 21, 1);
3814   NYI_assert (15, 10, 0x31);
3815
3816   if (INSTR (22, 22))
3817     {
3818       double (* fn)(double, double) = INSTR (23, 23)
3819         ? dminnm : dmaxnm;
3820
3821       if (! full)
3822         HALT_NYI;
3823       aarch64_set_vec_double (cpu, vd, 0,
3824                               fn (aarch64_get_vec_double (cpu, vn, 0),
3825                                   aarch64_get_vec_double (cpu, vn, 1)));
3826       aarch64_set_vec_double (cpu, vd, 0,
3827                               fn (aarch64_get_vec_double (cpu, vm, 0),
3828                                   aarch64_get_vec_double (cpu, vm, 1)));
3829     }
3830   else
3831     {
3832       float (* fn)(float, float) = INSTR (23, 23)
3833         ? fminnm : fmaxnm;
3834
3835       aarch64_set_vec_float (cpu, vd, 0,
3836                              fn (aarch64_get_vec_float (cpu, vn, 0),
3837                                  aarch64_get_vec_float (cpu, vn, 1)));
3838       if (full)
3839         aarch64_set_vec_float (cpu, vd, 1,
3840                                fn (aarch64_get_vec_float (cpu, vn, 2),
3841                                    aarch64_get_vec_float (cpu, vn, 3)));
3842
3843       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3844                              fn (aarch64_get_vec_float (cpu, vm, 0),
3845                                  aarch64_get_vec_float (cpu, vm, 1)));
3846       if (full)
3847         aarch64_set_vec_float (cpu, vd, 3,
3848                                fn (aarch64_get_vec_float (cpu, vm, 2),
3849                                    aarch64_get_vec_float (cpu, vm, 3)));
3850     }
3851 }
3852
3853 static void
3854 do_vec_AND (sim_cpu *cpu)
3855 {
3856   /* instr[31]    = 0
3857      instr[30]    = half (0)/full (1)
3858      instr[29,21] = 001110001
3859      instr[20,16] = Vm
3860      instr[15,10] = 000111
3861      instr[9,5]   = Vn
3862      instr[4.0]   = Vd.  */
3863
3864   unsigned vm = INSTR (20, 16);
3865   unsigned vn = INSTR (9, 5);
3866   unsigned vd = INSTR (4, 0);
3867   unsigned i;
3868   int      full = INSTR (30, 30);
3869
3870   NYI_assert (29, 21, 0x071);
3871   NYI_assert (15, 10, 0x07);
3872
3873   for (i = 0; i < (full ? 4 : 2); i++)
3874     aarch64_set_vec_u32 (cpu, vd, i,
3875                          aarch64_get_vec_u32 (cpu, vn, i)
3876                          & aarch64_get_vec_u32 (cpu, vm, i));
3877 }
3878
3879 static void
3880 do_vec_BSL (sim_cpu *cpu)
3881 {
3882   /* instr[31]    = 0
3883      instr[30]    = half (0)/full (1)
3884      instr[29,21] = 101110011
3885      instr[20,16] = Vm
3886      instr[15,10] = 000111
3887      instr[9,5]   = Vn
3888      instr[4.0]   = Vd.  */
3889
3890   unsigned vm = INSTR (20, 16);
3891   unsigned vn = INSTR (9, 5);
3892   unsigned vd = INSTR (4, 0);
3893   unsigned i;
3894   int      full = INSTR (30, 30);
3895
3896   NYI_assert (29, 21, 0x173);
3897   NYI_assert (15, 10, 0x07);
3898
3899   for (i = 0; i < (full ? 16 : 8); i++)
3900     aarch64_set_vec_u8 (cpu, vd, i,
3901                         (    aarch64_get_vec_u8 (cpu, vd, i)
3902                            & aarch64_get_vec_u8 (cpu, vn, i))
3903                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3904                            & aarch64_get_vec_u8 (cpu, vm, i)));
3905 }
3906
3907 static void
3908 do_vec_EOR (sim_cpu *cpu)
3909 {
3910   /* instr[31]    = 0
3911      instr[30]    = half (0)/full (1)
3912      instr[29,21] = 10 1110 001
3913      instr[20,16] = Vm
3914      instr[15,10] = 000111
3915      instr[9,5]   = Vn
3916      instr[4.0]   = Vd.  */
3917
3918   unsigned vm = INSTR (20, 16);
3919   unsigned vn = INSTR (9, 5);
3920   unsigned vd = INSTR (4, 0);
3921   unsigned i;
3922   int      full = INSTR (30, 30);
3923
3924   NYI_assert (29, 21, 0x171);
3925   NYI_assert (15, 10, 0x07);
3926
3927   for (i = 0; i < (full ? 4 : 2); i++)
3928     aarch64_set_vec_u32 (cpu, vd, i,
3929                          aarch64_get_vec_u32 (cpu, vn, i)
3930                          ^ aarch64_get_vec_u32 (cpu, vm, i));
3931 }
3932
3933 static void
3934 do_vec_bit (sim_cpu *cpu)
3935 {
3936   /* instr[31]    = 0
3937      instr[30]    = half (0)/full (1)
3938      instr[29,23] = 10 1110 1
3939      instr[22]    = BIT (0) / BIF (1)
3940      instr[21]    = 1
3941      instr[20,16] = Vm
3942      instr[15,10] = 0001 11
3943      instr[9,5]   = Vn
3944      instr[4.0]   = Vd.  */
3945
3946   unsigned vm = INSTR (20, 16);
3947   unsigned vn = INSTR (9, 5);
3948   unsigned vd = INSTR (4, 0);
3949   unsigned full = INSTR (30, 30);
3950   unsigned test_false = INSTR (22, 22);
3951   unsigned i;
3952
3953   NYI_assert (29, 23, 0x5D);
3954   NYI_assert (21, 21, 1);
3955   NYI_assert (15, 10, 0x07);
3956
3957   if (test_false)
3958     {
3959       for (i = 0; i < (full ? 16 : 8); i++)
3960         if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3961           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3962     }
3963   else
3964     {
3965       for (i = 0; i < (full ? 16 : 8); i++)
3966         if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3967           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3968     }
3969 }
3970
3971 static void
3972 do_vec_ORN (sim_cpu *cpu)
3973 {
3974   /* instr[31]    = 0
3975      instr[30]    = half (0)/full (1)
3976      instr[29,21] = 00 1110 111
3977      instr[20,16] = Vm
3978      instr[15,10] = 00 0111
3979      instr[9,5]   = Vn
3980      instr[4.0]   = Vd.  */
3981
3982   unsigned vm = INSTR (20, 16);
3983   unsigned vn = INSTR (9, 5);
3984   unsigned vd = INSTR (4, 0);
3985   unsigned i;
3986   int      full = INSTR (30, 30);
3987
3988   NYI_assert (29, 21, 0x077);
3989   NYI_assert (15, 10, 0x07);
3990
3991   for (i = 0; i < (full ? 16 : 8); i++)
3992     aarch64_set_vec_u8 (cpu, vd, i,
3993                         aarch64_get_vec_u8 (cpu, vn, i)
3994                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
3995 }
3996
3997 static void
3998 do_vec_ORR (sim_cpu *cpu)
3999 {
4000   /* instr[31]    = 0
4001      instr[30]    = half (0)/full (1)
4002      instr[29,21] = 00 1110 101
4003      instr[20,16] = Vm
4004      instr[15,10] = 0001 11
4005      instr[9,5]   = Vn
4006      instr[4.0]   = Vd.  */
4007
4008   unsigned vm = INSTR (20, 16);
4009   unsigned vn = INSTR (9, 5);
4010   unsigned vd = INSTR (4, 0);
4011   unsigned i;
4012   int      full = INSTR (30, 30);
4013
4014   NYI_assert (29, 21, 0x075);
4015   NYI_assert (15, 10, 0x07);
4016
4017   for (i = 0; i < (full ? 16 : 8); i++)
4018     aarch64_set_vec_u8 (cpu, vd, i,
4019                         aarch64_get_vec_u8 (cpu, vn, i)
4020                         | aarch64_get_vec_u8 (cpu, vm, i));
4021 }
4022
4023 static void
4024 do_vec_BIC (sim_cpu *cpu)
4025 {
4026   /* instr[31]    = 0
4027      instr[30]    = half (0)/full (1)
4028      instr[29,21] = 00 1110 011
4029      instr[20,16] = Vm
4030      instr[15,10] = 00 0111
4031      instr[9,5]   = Vn
4032      instr[4.0]   = Vd.  */
4033
4034   unsigned vm = INSTR (20, 16);
4035   unsigned vn = INSTR (9, 5);
4036   unsigned vd = INSTR (4, 0);
4037   unsigned i;
4038   int      full = INSTR (30, 30);
4039
4040   NYI_assert (29, 21, 0x073);
4041   NYI_assert (15, 10, 0x07);
4042
4043   for (i = 0; i < (full ? 16 : 8); i++)
4044     aarch64_set_vec_u8 (cpu, vd, i,
4045                         aarch64_get_vec_u8 (cpu, vn, i)
4046                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4047 }
4048
4049 static void
4050 do_vec_XTN (sim_cpu *cpu)
4051 {
4052   /* instr[31]    = 0
4053      instr[30]    = first part (0)/ second part (1)
4054      instr[29,24] = 00 1110
4055      instr[23,22] = size: byte(00), half(01), word (10)
4056      instr[21,10] = 1000 0100 1010
4057      instr[9,5]   = Vs
4058      instr[4,0]   = Vd.  */
4059
4060   unsigned vs = INSTR (9, 5);
4061   unsigned vd = INSTR (4, 0);
4062   unsigned bias = INSTR (30, 30);
4063   unsigned i;
4064
4065   NYI_assert (29, 24, 0x0E);
4066   NYI_assert (21, 10, 0x84A);
4067
4068   switch (INSTR (23, 22))
4069     {
4070     case 0:
4071       if (bias)
4072         for (i = 0; i < 8; i++)
4073           aarch64_set_vec_u8 (cpu, vd, i + 8,
4074                               aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4075       else
4076         for (i = 0; i < 8; i++)
4077           aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4078       return;
4079
4080     case 1:
4081       if (bias)
4082         for (i = 0; i < 4; i++)
4083           aarch64_set_vec_u16 (cpu, vd, i + 4,
4084                                aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4085       else
4086         for (i = 0; i < 4; i++)
4087           aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4088       return;
4089
4090     case 2:
4091       if (bias)
4092         for (i = 0; i < 2; i++)
4093           aarch64_set_vec_u32 (cpu, vd, i + 4,
4094                                aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4095       else
4096         for (i = 0; i < 2; i++)
4097           aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4098       return;
4099     }
4100 }
4101
4102 static void
4103 do_vec_maxv (sim_cpu *cpu)
4104 {
4105   /* instr[31]    = 0
4106      instr[30]    = half(0)/full(1)
4107      instr[29]    = signed (0)/unsigned(1)
4108      instr[28,24] = 0 1110
4109      instr[23,22] = size: byte(00), half(01), word (10)
4110      instr[21]    = 1
4111      instr[20,17] = 1 000
4112      instr[16]    = max(0)/min(1)
4113      instr[15,10] = 1010 10
4114      instr[9,5]   = V source
4115      instr[4.0]   = R dest.  */
4116
4117   unsigned vs = INSTR (9, 5);
4118   unsigned rd = INSTR (4, 0);
4119   unsigned full = INSTR (30, 30);
4120   unsigned i;
4121
4122   NYI_assert (28, 24, 0x0E);
4123   NYI_assert (21, 21, 1);
4124   NYI_assert (20, 17, 8);
4125   NYI_assert (15, 10, 0x2A);
4126
4127   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4128     {
4129     case 0: /* SMAXV.  */
4130        {
4131         int64_t smax;
4132         switch (INSTR (23, 22))
4133           {
4134           case 0:
4135             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4136             for (i = 1; i < (full ? 16 : 8); i++)
4137               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4138             break;
4139           case 1:
4140             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4141             for (i = 1; i < (full ? 8 : 4); i++)
4142               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4143             break;
4144           case 2:
4145             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4146             for (i = 1; i < (full ? 4 : 2); i++)
4147               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4148             break;
4149           case 3:
4150             HALT_UNALLOC;
4151           }
4152         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4153         return;
4154       }
4155
4156     case 1: /* SMINV.  */
4157       {
4158         int64_t smin;
4159         switch (INSTR (23, 22))
4160           {
4161           case 0:
4162             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4163             for (i = 1; i < (full ? 16 : 8); i++)
4164               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4165             break;
4166           case 1:
4167             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4168             for (i = 1; i < (full ? 8 : 4); i++)
4169               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4170             break;
4171           case 2:
4172             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4173             for (i = 1; i < (full ? 4 : 2); i++)
4174               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4175             break;
4176
4177           case 3:
4178             HALT_UNALLOC;
4179           }
4180         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4181         return;
4182       }
4183
4184     case 2: /* UMAXV.  */
4185       {
4186         uint64_t umax;
4187         switch (INSTR (23, 22))
4188           {
4189           case 0:
4190             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4191             for (i = 1; i < (full ? 16 : 8); i++)
4192               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4193             break;
4194           case 1:
4195             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4196             for (i = 1; i < (full ? 8 : 4); i++)
4197               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4198             break;
4199           case 2:
4200             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4201             for (i = 1; i < (full ? 4 : 2); i++)
4202               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4203             break;
4204
4205           case 3:
4206             HALT_UNALLOC;
4207           }
4208         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4209         return;
4210       }
4211
4212     case 3: /* UMINV.  */
4213       {
4214         uint64_t umin;
4215         switch (INSTR (23, 22))
4216           {
4217           case 0:
4218             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4219             for (i = 1; i < (full ? 16 : 8); i++)
4220               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4221             break;
4222           case 1:
4223             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4224             for (i = 1; i < (full ? 8 : 4); i++)
4225               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4226             break;
4227           case 2:
4228             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4229             for (i = 1; i < (full ? 4 : 2); i++)
4230               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4231             break;
4232
4233           case 3:
4234             HALT_UNALLOC;
4235           }
4236         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4237         return;
4238       }
4239     }
4240 }
4241
4242 static void
4243 do_vec_fminmaxV (sim_cpu *cpu)
4244 {
4245   /* instr[31,24] = 0110 1110
4246      instr[23]    = max(0)/min(1)
4247      instr[22,14] = 011 0000 11
4248      instr[13,12] = nm(00)/normal(11)
4249      instr[11,10] = 10
4250      instr[9,5]   = V source
4251      instr[4.0]   = R dest.  */
4252
4253   unsigned vs = INSTR (9, 5);
4254   unsigned rd = INSTR (4, 0);
4255   unsigned i;
4256   float res   = aarch64_get_vec_float (cpu, vs, 0);
4257
4258   NYI_assert (31, 24, 0x6E);
4259   NYI_assert (22, 14, 0x0C3);
4260   NYI_assert (11, 10, 2);
4261
4262   if (INSTR (23, 23))
4263     {
4264       switch (INSTR (13, 12))
4265         {
4266         case 0: /* FMNINNMV.  */
4267           for (i = 1; i < 4; i++)
4268             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4269           break;
4270
4271         case 3: /* FMINV.  */
4272           for (i = 1; i < 4; i++)
4273             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4274           break;
4275
4276         default:
4277           HALT_NYI;
4278         }
4279     }
4280   else
4281     {
4282       switch (INSTR (13, 12))
4283         {
4284         case 0: /* FMNAXNMV.  */
4285           for (i = 1; i < 4; i++)
4286             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4287           break;
4288
4289         case 3: /* FMAXV.  */
4290           for (i = 1; i < 4; i++)
4291             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4292           break;
4293
4294         default:
4295           HALT_NYI;
4296         }
4297     }
4298
4299   aarch64_set_FP_float (cpu, rd, res);
4300 }
4301
4302 static void
4303 do_vec_Fminmax (sim_cpu *cpu)
4304 {
4305   /* instr[31]    = 0
4306      instr[30]    = half(0)/full(1)
4307      instr[29,24] = 00 1110
4308      instr[23]    = max(0)/min(1)
4309      instr[22]    = float(0)/double(1)
4310      instr[21]    = 1
4311      instr[20,16] = Vm
4312      instr[15,14] = 11
4313      instr[13,12] = nm(00)/normal(11)
4314      instr[11,10] = 01
4315      instr[9,5]   = Vn
4316      instr[4,0]   = Vd.  */
4317
4318   unsigned vm = INSTR (20, 16);
4319   unsigned vn = INSTR (9, 5);
4320   unsigned vd = INSTR (4, 0);
4321   unsigned full = INSTR (30, 30);
4322   unsigned min = INSTR (23, 23);
4323   unsigned i;
4324
4325   NYI_assert (29, 24, 0x0E);
4326   NYI_assert (21, 21, 1);
4327   NYI_assert (15, 14, 3);
4328   NYI_assert (11, 10, 1);
4329
4330   if (INSTR (22, 22))
4331     {
4332       double (* func)(double, double);
4333
4334       if (! full)
4335         HALT_NYI;
4336
4337       if (INSTR (13, 12) == 0)
4338         func = min ? dminnm : dmaxnm;
4339       else if (INSTR (13, 12) == 3)
4340         func = min ? fmin : fmax;
4341       else
4342         HALT_NYI;
4343
4344       for (i = 0; i < 2; i++)
4345         aarch64_set_vec_double (cpu, vd, i,
4346                                 func (aarch64_get_vec_double (cpu, vn, i),
4347                                       aarch64_get_vec_double (cpu, vm, i)));
4348     }
4349   else
4350     {
4351       float (* func)(float, float);
4352
4353       if (INSTR (13, 12) == 0)
4354         func = min ? fminnm : fmaxnm;
4355       else if (INSTR (13, 12) == 3)
4356         func = min ? fminf : fmaxf;
4357       else
4358         HALT_NYI;
4359
4360       for (i = 0; i < (full ? 4 : 2); i++)
4361         aarch64_set_vec_float (cpu, vd, i,
4362                                func (aarch64_get_vec_float (cpu, vn, i),
4363                                      aarch64_get_vec_float (cpu, vm, i)));
4364     }
4365 }
4366
4367 static void
4368 do_vec_SCVTF (sim_cpu *cpu)
4369 {
4370   /* instr[31]    = 0
4371      instr[30]    = Q
4372      instr[29,23] = 00 1110 0
4373      instr[22]    = float(0)/double(1)
4374      instr[21,10] = 10 0001 1101 10
4375      instr[9,5]   = Vn
4376      instr[4,0]   = Vd.  */
4377
4378   unsigned vn = INSTR (9, 5);
4379   unsigned vd = INSTR (4, 0);
4380   unsigned full = INSTR (30, 30);
4381   unsigned size = INSTR (22, 22);
4382   unsigned i;
4383
4384   NYI_assert (29, 23, 0x1C);
4385   NYI_assert (21, 10, 0x876);
4386
4387   if (size)
4388     {
4389       if (! full)
4390         HALT_UNALLOC;
4391
4392       for (i = 0; i < 2; i++)
4393         {
4394           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4395           aarch64_set_vec_double (cpu, vd, i, val);
4396         }
4397     }
4398   else
4399     {
4400       for (i = 0; i < (full ? 4 : 2); i++)
4401         {
4402           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4403           aarch64_set_vec_float (cpu, vd, i, val);
4404         }
4405     }
4406 }
4407
4408 #define VEC_CMP(SOURCE, CMP)                                            \
4409   do                                                                    \
4410     {                                                                   \
4411       switch (size)                                                     \
4412         {                                                               \
4413         case 0:                                                         \
4414           for (i = 0; i < (full ? 16 : 8); i++)                         \
4415             aarch64_set_vec_u8 (cpu, vd, i,                             \
4416                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4417                                 CMP                                     \
4418                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4419                                 ? -1 : 0);                              \
4420           return;                                                       \
4421         case 1:                                                         \
4422           for (i = 0; i < (full ? 8 : 4); i++)                          \
4423             aarch64_set_vec_u16 (cpu, vd, i,                            \
4424                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4425                                  CMP                                    \
4426                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4427                                  ? -1 : 0);                             \
4428           return;                                                       \
4429         case 2:                                                         \
4430           for (i = 0; i < (full ? 4 : 2); i++)                          \
4431             aarch64_set_vec_u32 (cpu, vd, i, \
4432                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4433                                  CMP                                    \
4434                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4435                                  ? -1 : 0);                             \
4436           return;                                                       \
4437         case 3:                                                         \
4438           if (! full)                                                   \
4439             HALT_UNALLOC;                                               \
4440           for (i = 0; i < 2; i++)                                       \
4441             aarch64_set_vec_u64 (cpu, vd, i, \
4442                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4443                                  CMP                                    \
4444                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4445                                  ? -1ULL : 0);                          \
4446           return;                                                       \
4447         }                                                               \
4448     }                                                                   \
4449   while (0)
4450
4451 #define VEC_CMP0(SOURCE, CMP)                                           \
4452   do                                                                    \
4453     {                                                                   \
4454       switch (size)                                                     \
4455         {                                                               \
4456         case 0:                                                         \
4457           for (i = 0; i < (full ? 16 : 8); i++)                         \
4458             aarch64_set_vec_u8 (cpu, vd, i,                             \
4459                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4460                                 CMP 0 ? -1 : 0);                        \
4461           return;                                                       \
4462         case 1:                                                         \
4463           for (i = 0; i < (full ? 8 : 4); i++)                          \
4464             aarch64_set_vec_u16 (cpu, vd, i,                            \
4465                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4466                                  CMP 0 ? -1 : 0);                       \
4467           return;                                                       \
4468         case 2:                                                         \
4469           for (i = 0; i < (full ? 4 : 2); i++)                          \
4470             aarch64_set_vec_u32 (cpu, vd, i,                            \
4471                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4472                                  CMP 0 ? -1 : 0);                       \
4473           return;                                                       \
4474         case 3:                                                         \
4475           if (! full)                                                   \
4476             HALT_UNALLOC;                                               \
4477           for (i = 0; i < 2; i++)                                       \
4478             aarch64_set_vec_u64 (cpu, vd, i,                            \
4479                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4480                                  CMP 0 ? -1ULL : 0);                    \
4481           return;                                                       \
4482         }                                                               \
4483     }                                                                   \
4484   while (0)
4485
4486 #define VEC_FCMP0(CMP)                                                  \
4487   do                                                                    \
4488     {                                                                   \
4489       if (vm != 0)                                                      \
4490         HALT_NYI;                                                       \
4491       if (INSTR (22, 22))                       \
4492         {                                                               \
4493           if (! full)                                                   \
4494             HALT_NYI;                                                   \
4495           for (i = 0; i < 2; i++)                                       \
4496             aarch64_set_vec_u64 (cpu, vd, i,                            \
4497                                  aarch64_get_vec_double (cpu, vn, i)    \
4498                                  CMP 0.0 ? -1 : 0);                     \
4499         }                                                               \
4500       else                                                              \
4501         {                                                               \
4502           for (i = 0; i < (full ? 4 : 2); i++)                          \
4503             aarch64_set_vec_u32 (cpu, vd, i,                            \
4504                                  aarch64_get_vec_float (cpu, vn, i)     \
4505                                  CMP 0.0 ? -1 : 0);                     \
4506         }                                                               \
4507       return;                                                           \
4508     }                                                                   \
4509   while (0)
4510
4511 #define VEC_FCMP(CMP)                                                   \
4512   do                                                                    \
4513     {                                                                   \
4514       if (INSTR (22, 22))                       \
4515         {                                                               \
4516           if (! full)                                                   \
4517             HALT_NYI;                                                   \
4518           for (i = 0; i < 2; i++)                                       \
4519             aarch64_set_vec_u64 (cpu, vd, i,                            \
4520                                  aarch64_get_vec_double (cpu, vn, i)    \
4521                                  CMP                                    \
4522                                  aarch64_get_vec_double (cpu, vm, i)    \
4523                                  ? -1 : 0);                             \
4524         }                                                               \
4525       else                                                              \
4526         {                                                               \
4527           for (i = 0; i < (full ? 4 : 2); i++)                          \
4528             aarch64_set_vec_u32 (cpu, vd, i,                            \
4529                                  aarch64_get_vec_float (cpu, vn, i)     \
4530                                  CMP                                    \
4531                                  aarch64_get_vec_float (cpu, vm, i)     \
4532                                  ? -1 : 0);                             \
4533         }                                                               \
4534       return;                                                           \
4535     }                                                                   \
4536   while (0)
4537
4538 static void
4539 do_vec_compare (sim_cpu *cpu)
4540 {
4541   /* instr[31]    = 0
4542      instr[30]    = half(0)/full(1)
4543      instr[29]    = part-of-comparison-type
4544      instr[28,24] = 0 1110
4545      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4546                     type of float compares: single (-0) / double (-1)
4547      instr[21]    = 1
4548      instr[20,16] = Vm or 00000 (compare vs 0)
4549      instr[15,10] = part-of-comparison-type
4550      instr[9,5]   = Vn
4551      instr[4.0]   = Vd.  */
4552
4553   int full = INSTR (30, 30);
4554   int size = INSTR (23, 22);
4555   unsigned vm = INSTR (20, 16);
4556   unsigned vn = INSTR (9, 5);
4557   unsigned vd = INSTR (4, 0);
4558   unsigned i;
4559
4560   NYI_assert (28, 24, 0x0E);
4561   NYI_assert (21, 21, 1);
4562
4563   if ((INSTR (11, 11)
4564        && INSTR (14, 14))
4565       || ((INSTR (11, 11) == 0
4566            && INSTR (10, 10) == 0)))
4567     {
4568       /* A compare vs 0.  */
4569       if (vm != 0)
4570         {
4571           if (INSTR (15, 10) == 0x2A)
4572             do_vec_maxv (cpu);
4573           else if (INSTR (15, 10) == 0x32
4574                    || INSTR (15, 10) == 0x3E)
4575             do_vec_fminmaxV (cpu);
4576           else if (INSTR (29, 23) == 0x1C
4577                    && INSTR (21, 10) == 0x876)
4578             do_vec_SCVTF (cpu);
4579           else
4580             HALT_NYI;
4581           return;
4582         }
4583     }
4584
4585   if (INSTR (14, 14))
4586     {
4587       /* A floating point compare.  */
4588       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4589         | INSTR (13, 10);
4590
4591       NYI_assert (15, 15, 1);
4592
4593       switch (decode)
4594         {
4595         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4596         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4597         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4598         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4599         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4600         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4601         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4602         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4603
4604         default:
4605           HALT_NYI;
4606         }
4607     }
4608   else
4609     {
4610       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4611
4612       switch (decode)
4613         {
4614         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4615         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4616         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4617         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4618         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4619         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4620         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4621         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4622         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4623         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4624         default:
4625           if (vm == 0)
4626             HALT_NYI;
4627           do_vec_maxv (cpu);
4628         }
4629     }
4630 }
4631
4632 static void
4633 do_vec_SSHL (sim_cpu *cpu)
4634 {
4635   /* instr[31]    = 0
4636      instr[30]    = first part (0)/ second part (1)
4637      instr[29,24] = 00 1110
4638      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4639      instr[21]    = 1
4640      instr[20,16] = Vm
4641      instr[15,10] = 0100 01
4642      instr[9,5]   = Vn
4643      instr[4,0]   = Vd.  */
4644
4645   unsigned full = INSTR (30, 30);
4646   unsigned vm = INSTR (20, 16);
4647   unsigned vn = INSTR (9, 5);
4648   unsigned vd = INSTR (4, 0);
4649   unsigned i;
4650   signed int shift;
4651
4652   NYI_assert (29, 24, 0x0E);
4653   NYI_assert (21, 21, 1);
4654   NYI_assert (15, 10, 0x11);
4655
4656   /* FIXME: What is a signed shift left in this context ?.  */
4657
4658   switch (INSTR (23, 22))
4659     {
4660     case 0:
4661       for (i = 0; i < (full ? 16 : 8); i++)
4662         {
4663           shift = aarch64_get_vec_s8 (cpu, vm, i);
4664           if (shift >= 0)
4665             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4666                                 << shift);
4667           else
4668             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4669                                 >> - shift);
4670         }
4671       return;
4672
4673     case 1:
4674       for (i = 0; i < (full ? 8 : 4); i++)
4675         {
4676           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4677           if (shift >= 0)
4678             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4679                                  << shift);
4680           else
4681             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4682                                  >> - shift);
4683         }
4684       return;
4685
4686     case 2:
4687       for (i = 0; i < (full ? 4 : 2); i++)
4688         {
4689           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4690           if (shift >= 0)
4691             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4692                                  << shift);
4693           else
4694             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4695                                  >> - shift);
4696         }
4697       return;
4698
4699     case 3:
4700       if (! full)
4701         HALT_UNALLOC;
4702       for (i = 0; i < 2; i++)
4703         {
4704           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4705           if (shift >= 0)
4706             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4707                                  << shift);
4708           else
4709             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4710                                  >> - shift);
4711         }
4712       return;
4713     }
4714 }
4715
4716 static void
4717 do_vec_USHL (sim_cpu *cpu)
4718 {
4719   /* instr[31]    = 0
4720      instr[30]    = first part (0)/ second part (1)
4721      instr[29,24] = 10 1110
4722      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4723      instr[21]    = 1
4724      instr[20,16] = Vm
4725      instr[15,10] = 0100 01
4726      instr[9,5]   = Vn
4727      instr[4,0]   = Vd  */
4728
4729   unsigned full = INSTR (30, 30);
4730   unsigned vm = INSTR (20, 16);
4731   unsigned vn = INSTR (9, 5);
4732   unsigned vd = INSTR (4, 0);
4733   unsigned i;
4734   signed int shift;
4735
4736   NYI_assert (29, 24, 0x2E);
4737   NYI_assert (15, 10, 0x11);
4738
4739   switch (INSTR (23, 22))
4740     {
4741     case 0:
4742         for (i = 0; i < (full ? 16 : 8); i++)
4743           {
4744             shift = aarch64_get_vec_s8 (cpu, vm, i);
4745             if (shift >= 0)
4746               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4747                                   << shift);
4748             else
4749               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4750                                   >> - shift);
4751           }
4752       return;
4753
4754     case 1:
4755       for (i = 0; i < (full ? 8 : 4); i++)
4756         {
4757           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4758           if (shift >= 0)
4759             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4760                                  << shift);
4761           else
4762             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4763                                  >> - shift);
4764         }
4765       return;
4766
4767     case 2:
4768       for (i = 0; i < (full ? 4 : 2); i++)
4769         {
4770           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4771           if (shift >= 0)
4772             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4773                                  << shift);
4774           else
4775             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4776                                  >> - shift);
4777         }
4778       return;
4779
4780     case 3:
4781       if (! full)
4782         HALT_UNALLOC;
4783       for (i = 0; i < 2; i++)
4784         {
4785           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4786           if (shift >= 0)
4787             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4788                                  << shift);
4789           else
4790             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4791                                  >> - shift);
4792         }
4793       return;
4794     }
4795 }
4796
4797 static void
4798 do_vec_FMLA (sim_cpu *cpu)
4799 {
4800   /* instr[31]    = 0
4801      instr[30]    = full/half selector
4802      instr[29,23] = 0011100
4803      instr[22]    = size: 0=>float, 1=>double
4804      instr[21]    = 1
4805      instr[20,16] = Vn
4806      instr[15,10] = 1100 11
4807      instr[9,5]   = Vm
4808      instr[4.0]   = Vd.  */
4809
4810   unsigned vm = INSTR (20, 16);
4811   unsigned vn = INSTR (9, 5);
4812   unsigned vd = INSTR (4, 0);
4813   unsigned i;
4814   int      full = INSTR (30, 30);
4815
4816   NYI_assert (29, 23, 0x1C);
4817   NYI_assert (21, 21, 1);
4818   NYI_assert (15, 10, 0x33);
4819
4820   if (INSTR (22, 22))
4821     {
4822       if (! full)
4823         HALT_UNALLOC;
4824       for (i = 0; i < 2; i++)
4825         aarch64_set_vec_double (cpu, vd, i,
4826                                 aarch64_get_vec_double (cpu, vn, i) *
4827                                 aarch64_get_vec_double (cpu, vm, i) +
4828                                 aarch64_get_vec_double (cpu, vd, i));
4829     }
4830   else
4831     {
4832       for (i = 0; i < (full ? 4 : 2); i++)
4833         aarch64_set_vec_float (cpu, vd, i,
4834                                aarch64_get_vec_float (cpu, vn, i) *
4835                                aarch64_get_vec_float (cpu, vm, i) +
4836                                aarch64_get_vec_float (cpu, vd, i));
4837     }
4838 }
4839
4840 static void
4841 do_vec_max (sim_cpu *cpu)
4842 {
4843   /* instr[31]    = 0
4844      instr[30]    = full/half selector
4845      instr[29]    = SMAX (0) / UMAX (1)
4846      instr[28,24] = 0 1110
4847      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4848      instr[21]    = 1
4849      instr[20,16] = Vn
4850      instr[15,10] = 0110 01
4851      instr[9,5]   = Vm
4852      instr[4.0]   = Vd.  */
4853
4854   unsigned vm = INSTR (20, 16);
4855   unsigned vn = INSTR (9, 5);
4856   unsigned vd = INSTR (4, 0);
4857   unsigned i;
4858   int      full = INSTR (30, 30);
4859
4860   NYI_assert (28, 24, 0x0E);
4861   NYI_assert (21, 21, 1);
4862   NYI_assert (15, 10, 0x19);
4863
4864   if (INSTR (29, 29))
4865     {
4866       switch (INSTR (23, 22))
4867         {
4868         case 0:
4869           for (i = 0; i < (full ? 16 : 8); i++)
4870             aarch64_set_vec_u8 (cpu, vd, i,
4871                                 aarch64_get_vec_u8 (cpu, vn, i)
4872                                 > aarch64_get_vec_u8 (cpu, vm, i)
4873                                 ? aarch64_get_vec_u8 (cpu, vn, i)
4874                                 : aarch64_get_vec_u8 (cpu, vm, i));
4875           return;
4876
4877         case 1:
4878           for (i = 0; i < (full ? 8 : 4); i++)
4879             aarch64_set_vec_u16 (cpu, vd, i,
4880                                  aarch64_get_vec_u16 (cpu, vn, i)
4881                                  > aarch64_get_vec_u16 (cpu, vm, i)
4882                                  ? aarch64_get_vec_u16 (cpu, vn, i)
4883                                  : aarch64_get_vec_u16 (cpu, vm, i));
4884           return;
4885
4886         case 2:
4887           for (i = 0; i < (full ? 4 : 2); i++)
4888             aarch64_set_vec_u32 (cpu, vd, i,
4889                                  aarch64_get_vec_u32 (cpu, vn, i)
4890                                  > aarch64_get_vec_u32 (cpu, vm, i)
4891                                  ? aarch64_get_vec_u32 (cpu, vn, i)
4892                                  : aarch64_get_vec_u32 (cpu, vm, i));
4893           return;
4894
4895         case 3:
4896           HALT_UNALLOC;
4897         }
4898     }
4899   else
4900     {
4901       switch (INSTR (23, 22))
4902         {
4903         case 0:
4904           for (i = 0; i < (full ? 16 : 8); i++)
4905             aarch64_set_vec_s8 (cpu, vd, i,
4906                                 aarch64_get_vec_s8 (cpu, vn, i)
4907                                 > aarch64_get_vec_s8 (cpu, vm, i)
4908                                 ? aarch64_get_vec_s8 (cpu, vn, i)
4909                                 : aarch64_get_vec_s8 (cpu, vm, i));
4910           return;
4911
4912         case 1:
4913           for (i = 0; i < (full ? 8 : 4); i++)
4914             aarch64_set_vec_s16 (cpu, vd, i,
4915                                  aarch64_get_vec_s16 (cpu, vn, i)
4916                                  > aarch64_get_vec_s16 (cpu, vm, i)
4917                                  ? aarch64_get_vec_s16 (cpu, vn, i)
4918                                  : aarch64_get_vec_s16 (cpu, vm, i));
4919           return;
4920
4921         case 2:
4922           for (i = 0; i < (full ? 4 : 2); i++)
4923             aarch64_set_vec_s32 (cpu, vd, i,
4924                                  aarch64_get_vec_s32 (cpu, vn, i)
4925                                  > aarch64_get_vec_s32 (cpu, vm, i)
4926                                  ? aarch64_get_vec_s32 (cpu, vn, i)
4927                                  : aarch64_get_vec_s32 (cpu, vm, i));
4928           return;
4929
4930         case 3:
4931           HALT_UNALLOC;
4932         }
4933     }
4934 }
4935
4936 static void
4937 do_vec_min (sim_cpu *cpu)
4938 {
4939   /* instr[31]    = 0
4940      instr[30]    = full/half selector
4941      instr[29]    = SMIN (0) / UMIN (1)
4942      instr[28,24] = 0 1110
4943      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4944      instr[21]    = 1
4945      instr[20,16] = Vn
4946      instr[15,10] = 0110 11
4947      instr[9,5]   = Vm
4948      instr[4.0]   = Vd.  */
4949
4950   unsigned vm = INSTR (20, 16);
4951   unsigned vn = INSTR (9, 5);
4952   unsigned vd = INSTR (4, 0);
4953   unsigned i;
4954   int      full = INSTR (30, 30);
4955
4956   NYI_assert (28, 24, 0x0E);
4957   NYI_assert (21, 21, 1);
4958   NYI_assert (15, 10, 0x1B);
4959
4960   if (INSTR (29, 29))
4961     {
4962       switch (INSTR (23, 22))
4963         {
4964         case 0:
4965           for (i = 0; i < (full ? 16 : 8); i++)
4966             aarch64_set_vec_u8 (cpu, vd, i,
4967                                 aarch64_get_vec_u8 (cpu, vn, i)
4968                                 < aarch64_get_vec_u8 (cpu, vm, i)
4969                                 ? aarch64_get_vec_u8 (cpu, vn, i)
4970                                 : aarch64_get_vec_u8 (cpu, vm, i));
4971           return;
4972
4973         case 1:
4974           for (i = 0; i < (full ? 8 : 4); i++)
4975             aarch64_set_vec_u16 (cpu, vd, i,
4976                                  aarch64_get_vec_u16 (cpu, vn, i)
4977                                  < aarch64_get_vec_u16 (cpu, vm, i)
4978                                  ? aarch64_get_vec_u16 (cpu, vn, i)
4979                                  : aarch64_get_vec_u16 (cpu, vm, i));
4980           return;
4981
4982         case 2:
4983           for (i = 0; i < (full ? 4 : 2); i++)
4984             aarch64_set_vec_u32 (cpu, vd, i,
4985                                  aarch64_get_vec_u32 (cpu, vn, i)
4986                                  < aarch64_get_vec_u32 (cpu, vm, i)
4987                                  ? aarch64_get_vec_u32 (cpu, vn, i)
4988                                  : aarch64_get_vec_u32 (cpu, vm, i));
4989           return;
4990
4991         case 3:
4992           HALT_UNALLOC;
4993         }
4994     }
4995   else
4996     {
4997       switch (INSTR (23, 22))
4998         {
4999         case 0:
5000           for (i = 0; i < (full ? 16 : 8); i++)
5001             aarch64_set_vec_s8 (cpu, vd, i,
5002                                 aarch64_get_vec_s8 (cpu, vn, i)
5003                                 < aarch64_get_vec_s8 (cpu, vm, i)
5004                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5005                                 : aarch64_get_vec_s8 (cpu, vm, i));
5006           return;
5007
5008         case 1:
5009           for (i = 0; i < (full ? 8 : 4); i++)
5010             aarch64_set_vec_s16 (cpu, vd, i,
5011                                  aarch64_get_vec_s16 (cpu, vn, i)
5012                                  < aarch64_get_vec_s16 (cpu, vm, i)
5013                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5014                                  : aarch64_get_vec_s16 (cpu, vm, i));
5015           return;
5016
5017         case 2:
5018           for (i = 0; i < (full ? 4 : 2); i++)
5019             aarch64_set_vec_s32 (cpu, vd, i,
5020                                  aarch64_get_vec_s32 (cpu, vn, i)
5021                                  < aarch64_get_vec_s32 (cpu, vm, i)
5022                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5023                                  : aarch64_get_vec_s32 (cpu, vm, i));
5024           return;
5025
5026         case 3:
5027           HALT_UNALLOC;
5028         }
5029     }
5030 }
5031
5032 static void
5033 do_vec_sub_long (sim_cpu *cpu)
5034 {
5035   /* instr[31]    = 0
5036      instr[30]    = lower (0) / upper (1)
5037      instr[29]    = signed (0) / unsigned (1)
5038      instr[28,24] = 0 1110
5039      instr[23,22] = size: bytes (00), half (01), word (10)
5040      instr[21]    = 1
5041      insrt[20,16] = Vm
5042      instr[15,10] = 0010 00
5043      instr[9,5]   = Vn
5044      instr[4,0]   = V dest.  */
5045
5046   unsigned size = INSTR (23, 22);
5047   unsigned vm = INSTR (20, 16);
5048   unsigned vn = INSTR (9, 5);
5049   unsigned vd = INSTR (4, 0);
5050   unsigned bias = 0;
5051   unsigned i;
5052
5053   NYI_assert (28, 24, 0x0E);
5054   NYI_assert (21, 21, 1);
5055   NYI_assert (15, 10, 0x08);
5056
5057   if (size == 3)
5058     HALT_UNALLOC;
5059
5060   switch (INSTR (30, 29))
5061     {
5062     case 2: /* SSUBL2.  */
5063       bias = 2;
5064     case 0: /* SSUBL.  */
5065       switch (size)
5066         {
5067         case 0:
5068           bias *= 3;
5069           for (i = 0; i < 8; i++)
5070             aarch64_set_vec_s16 (cpu, vd, i,
5071                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5072                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5073           break;
5074
5075         case 1:
5076           bias *= 2;
5077           for (i = 0; i < 4; i++)
5078             aarch64_set_vec_s32 (cpu, vd, i,
5079                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5080                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5081           break;
5082
5083         case 2:
5084           for (i = 0; i < 2; i++)
5085             aarch64_set_vec_s64 (cpu, vd, i,
5086                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5087                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5088           break;
5089
5090         default:
5091           HALT_UNALLOC;
5092         }
5093       break;
5094
5095     case 3: /* USUBL2.  */
5096       bias = 2;
5097     case 1: /* USUBL.  */
5098       switch (size)
5099         {
5100         case 0:
5101           bias *= 3;
5102           for (i = 0; i < 8; i++)
5103             aarch64_set_vec_u16 (cpu, vd, i,
5104                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5105                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5106           break;
5107
5108         case 1:
5109           bias *= 2;
5110           for (i = 0; i < 4; i++)
5111             aarch64_set_vec_u32 (cpu, vd, i,
5112                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5113                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5114           break;
5115
5116         case 2:
5117           for (i = 0; i < 2; i++)
5118             aarch64_set_vec_u64 (cpu, vd, i,
5119                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5120                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5121           break;
5122
5123         default:
5124           HALT_UNALLOC;
5125         }
5126       break;
5127     }
5128 }
5129
5130 static void
5131 do_vec_ADDP (sim_cpu *cpu)
5132 {
5133   /* instr[31]    = 0
5134      instr[30]    = half(0)/full(1)
5135      instr[29,24] = 00 1110
5136      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5137      instr[21]    = 1
5138      insrt[20,16] = Vm
5139      instr[15,10] = 1011 11
5140      instr[9,5]   = Vn
5141      instr[4,0]   = V dest.  */
5142
5143   FRegister copy_vn;
5144   FRegister copy_vm;
5145   unsigned full = INSTR (30, 30);
5146   unsigned size = INSTR (23, 22);
5147   unsigned vm = INSTR (20, 16);
5148   unsigned vn = INSTR (9, 5);
5149   unsigned vd = INSTR (4, 0);
5150   unsigned i, range;
5151
5152   NYI_assert (29, 24, 0x0E);
5153   NYI_assert (21, 21, 1);
5154   NYI_assert (15, 10, 0x2F);
5155
5156   /* Make copies of the source registers in case vd == vn/vm.  */
5157   copy_vn = cpu->fr[vn];
5158   copy_vm = cpu->fr[vm];
5159
5160   switch (size)
5161     {
5162     case 0:
5163       range = full ? 8 : 4;
5164       for (i = 0; i < range; i++)
5165         {
5166           aarch64_set_vec_u8 (cpu, vd, i,
5167                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5168           aarch64_set_vec_u8 (cpu, vd, i + range,
5169                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5170         }
5171       return;
5172
5173     case 1:
5174       range = full ? 4 : 2;
5175       for (i = 0; i < range; i++)
5176         {
5177           aarch64_set_vec_u16 (cpu, vd, i,
5178                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5179           aarch64_set_vec_u16 (cpu, vd, i + range,
5180                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5181         }
5182       return;
5183
5184     case 2:
5185       range = full ? 2 : 1;
5186       for (i = 0; i < range; i++)
5187         {
5188           aarch64_set_vec_u32 (cpu, vd, i,
5189                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5190           aarch64_set_vec_u32 (cpu, vd, i + range,
5191                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5192         }
5193       return;
5194
5195     case 3:
5196       if (! full)
5197         HALT_UNALLOC;
5198       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5199       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5200       return;
5201     }
5202 }
5203
5204 static void
5205 do_vec_UMOV (sim_cpu *cpu)
5206 {
5207   /* instr[31]    = 0
5208      instr[30]    = 32-bit(0)/64-bit(1)
5209      instr[29,21] = 00 1110 000
5210      insrt[20,16] = size & index
5211      instr[15,10] = 0011 11
5212      instr[9,5]   = V source
5213      instr[4,0]   = R dest.  */
5214
5215   unsigned vs = INSTR (9, 5);
5216   unsigned rd = INSTR (4, 0);
5217   unsigned index;
5218
5219   NYI_assert (29, 21, 0x070);
5220   NYI_assert (15, 10, 0x0F);
5221
5222   if (INSTR (16, 16))
5223     {
5224       /* Byte transfer.  */
5225       index = INSTR (20, 17);
5226       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5227                            aarch64_get_vec_u8 (cpu, vs, index));
5228     }
5229   else if (INSTR (17, 17))
5230     {
5231       index = INSTR (20, 18);
5232       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5233                            aarch64_get_vec_u16 (cpu, vs, index));
5234     }
5235   else if (INSTR (18, 18))
5236     {
5237       index = INSTR (20, 19);
5238       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5239                            aarch64_get_vec_u32 (cpu, vs, index));
5240     }
5241   else
5242     {
5243       if (INSTR (30, 30) != 1)
5244         HALT_UNALLOC;
5245
5246       index = INSTR (20, 20);
5247       aarch64_set_reg_u64 (cpu, rd, NO_SP,
5248                            aarch64_get_vec_u64 (cpu, vs, index));
5249     }
5250 }
5251
5252 static void
5253 do_vec_FABS (sim_cpu *cpu)
5254 {
5255   /* instr[31]    = 0
5256      instr[30]    = half(0)/full(1)
5257      instr[29,23] = 00 1110 1
5258      instr[22]    = float(0)/double(1)
5259      instr[21,16] = 10 0000
5260      instr[15,10] = 1111 10
5261      instr[9,5]   = Vn
5262      instr[4,0]   = Vd.  */
5263
5264   unsigned vn = INSTR (9, 5);
5265   unsigned vd = INSTR (4, 0);
5266   unsigned full = INSTR (30, 30);
5267   unsigned i;
5268
5269   NYI_assert (29, 23, 0x1D);
5270   NYI_assert (21, 10, 0x83E);
5271
5272   if (INSTR (22, 22))
5273     {
5274       if (! full)
5275         HALT_NYI;
5276
5277       for (i = 0; i < 2; i++)
5278         aarch64_set_vec_double (cpu, vd, i,
5279                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5280     }
5281   else
5282     {
5283       for (i = 0; i < (full ? 4 : 2); i++)
5284         aarch64_set_vec_float (cpu, vd, i,
5285                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5286     }
5287 }
5288
5289 static void
5290 do_vec_FCVTZS (sim_cpu *cpu)
5291 {
5292   /* instr[31]    = 0
5293      instr[30]    = half (0) / all (1)
5294      instr[29,23] = 00 1110 1
5295      instr[22]    = single (0) / double (1)
5296      instr[21,10] = 10 0001 1011 10
5297      instr[9,5]   = Rn
5298      instr[4,0]   = Rd.  */
5299
5300   unsigned rn = INSTR (9, 5);
5301   unsigned rd = INSTR (4, 0);
5302   unsigned full = INSTR (30, 30);
5303   unsigned i;
5304
5305   NYI_assert (31, 31, 0);
5306   NYI_assert (29, 23, 0x1D);
5307   NYI_assert (21, 10, 0x86E);
5308
5309   if (INSTR (22, 22))
5310     {
5311       if (! full)
5312         HALT_UNALLOC;
5313
5314       for (i = 0; i < 2; i++)
5315         aarch64_set_vec_s64 (cpu, rd, i,
5316                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5317     }
5318   else
5319     for (i = 0; i < (full ? 4 : 2); i++)
5320       aarch64_set_vec_s32 (cpu, rd, i,
5321                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5322 }
5323
5324 static void
5325 do_vec_REV64 (sim_cpu *cpu)
5326 {
5327   /* instr[31]    = 0
5328      instr[30]    = full/half
5329      instr[29,24] = 00 1110
5330      instr[23,22] = size
5331      instr[21,10] = 10 0000 0000 10
5332      instr[9,5]   = Rn
5333      instr[4,0]   = Rd.  */
5334
5335   unsigned rn = INSTR (9, 5);
5336   unsigned rd = INSTR (4, 0);
5337   unsigned size = INSTR (23, 22);
5338   unsigned full = INSTR (30, 30);
5339   unsigned i;
5340   FRegister val;
5341
5342   NYI_assert (29, 24, 0x0E);
5343   NYI_assert (21, 10, 0x802);
5344
5345   switch (size)
5346     {
5347     case 0:
5348       for (i = 0; i < (full ? 16 : 8); i++)
5349         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5350       break;
5351
5352     case 1:
5353       for (i = 0; i < (full ? 8 : 4); i++)
5354         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5355       break;
5356
5357     case 2:
5358       for (i = 0; i < (full ? 4 : 2); i++)
5359         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5360       break;
5361
5362     case 3:
5363       HALT_UNALLOC;
5364     }
5365
5366   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5367   if (full)
5368     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5369 }
5370
5371 static void
5372 do_vec_REV16 (sim_cpu *cpu)
5373 {
5374   /* instr[31]    = 0
5375      instr[30]    = full/half
5376      instr[29,24] = 00 1110
5377      instr[23,22] = size
5378      instr[21,10] = 10 0000 0001 10
5379      instr[9,5]   = Rn
5380      instr[4,0]   = Rd.  */
5381
5382   unsigned rn = INSTR (9, 5);
5383   unsigned rd = INSTR (4, 0);
5384   unsigned size = INSTR (23, 22);
5385   unsigned full = INSTR (30, 30);
5386   unsigned i;
5387   FRegister val;
5388
5389   NYI_assert (29, 24, 0x0E);
5390   NYI_assert (21, 10, 0x806);
5391
5392   switch (size)
5393     {
5394     case 0:
5395       for (i = 0; i < (full ? 16 : 8); i++)
5396         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5397       break;
5398
5399     default:
5400       HALT_UNALLOC;
5401     }
5402
5403   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5404   if (full)
5405     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5406 }
5407
5408 static void
5409 do_vec_op1 (sim_cpu *cpu)
5410 {
5411   /* instr[31]    = 0
5412      instr[30]    = half/full
5413      instr[29,24] = 00 1110
5414      instr[23,21] = ???
5415      instr[20,16] = Vm
5416      instr[15,10] = sub-opcode
5417      instr[9,5]   = Vn
5418      instr[4,0]   = Vd  */
5419   NYI_assert (29, 24, 0x0E);
5420
5421   if (INSTR (21, 21) == 0)
5422     {
5423       if (INSTR (23, 22) == 0)
5424         {
5425           if (INSTR (30, 30) == 1
5426               && INSTR (17, 14) == 0
5427               && INSTR (12, 10) == 7)
5428             return do_vec_ins_2 (cpu);
5429
5430           switch (INSTR (15, 10))
5431             {
5432             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5433             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5434             case 0x07: do_vec_INS (cpu); return;
5435             case 0x0A: do_vec_TRN (cpu); return;
5436
5437             case 0x0F:
5438               if (INSTR (17, 16) == 0)
5439                 {
5440                   do_vec_MOV_into_scalar (cpu);
5441                   return;
5442                 }
5443               break;
5444
5445             case 0x00:
5446             case 0x08:
5447             case 0x10:
5448             case 0x18:
5449               do_vec_TBL (cpu); return;
5450
5451             case 0x06:
5452             case 0x16:
5453               do_vec_UZP (cpu); return;
5454
5455             case 0x0E:
5456             case 0x1E:
5457               do_vec_ZIP (cpu); return;
5458
5459             default:
5460               HALT_NYI;
5461             }
5462         }
5463
5464       switch (INSTR (13, 10))
5465         {
5466         case 0x6: do_vec_UZP (cpu); return;
5467         case 0xE: do_vec_ZIP (cpu); return;
5468         case 0xA: do_vec_TRN (cpu); return;
5469         case 0xF: do_vec_UMOV (cpu); return;
5470         default:  HALT_NYI;
5471         }
5472     }
5473
5474   switch (INSTR (15, 10))
5475     {
5476     case 0x02: do_vec_REV64 (cpu); return;
5477     case 0x06: do_vec_REV16 (cpu); return;
5478
5479     case 0x07:
5480       switch (INSTR (23, 21))
5481         {
5482         case 1: do_vec_AND (cpu); return;
5483         case 3: do_vec_BIC (cpu); return;
5484         case 5: do_vec_ORR (cpu); return;
5485         case 7: do_vec_ORN (cpu); return;
5486         default: HALT_NYI;
5487         }
5488
5489     case 0x08: do_vec_sub_long (cpu); return;
5490     case 0x0a: do_vec_XTN (cpu); return;
5491     case 0x11: do_vec_SSHL (cpu); return;
5492     case 0x19: do_vec_max (cpu); return;
5493     case 0x1B: do_vec_min (cpu); return;
5494     case 0x21: do_vec_add (cpu); return;
5495     case 0x25: do_vec_MLA (cpu); return;
5496     case 0x27: do_vec_mul (cpu); return;
5497     case 0x2F: do_vec_ADDP (cpu); return;
5498     case 0x30: do_vec_mull (cpu); return;
5499     case 0x33: do_vec_FMLA (cpu); return;
5500     case 0x35: do_vec_fadd (cpu); return;
5501
5502     case 0x2E:
5503       switch (INSTR (20, 16))
5504         {
5505         case 0x00: do_vec_ABS (cpu); return;
5506         case 0x01: do_vec_FCVTZS (cpu); return;
5507         case 0x11: do_vec_ADDV (cpu); return;
5508         default: HALT_NYI;
5509         }
5510
5511     case 0x31:
5512     case 0x3B:
5513       do_vec_Fminmax (cpu); return;
5514
5515     case 0x0D:
5516     case 0x0F:
5517     case 0x22:
5518     case 0x23:
5519     case 0x26:
5520     case 0x2A:
5521     case 0x32:
5522     case 0x36:
5523     case 0x39:
5524     case 0x3A:
5525       do_vec_compare (cpu); return;
5526
5527     case 0x3E:
5528       do_vec_FABS (cpu); return;
5529
5530     default:
5531       HALT_NYI;
5532     }
5533 }
5534
5535 static void
5536 do_vec_xtl (sim_cpu *cpu)
5537 {
5538   /* instr[31]    = 0
5539      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5540      instr[28,22] = 0 1111 00
5541      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5542      instr[15,10] = 1010 01
5543      instr[9,5]   = V source
5544      instr[4,0]   = V dest.  */
5545
5546   unsigned vs = INSTR (9, 5);
5547   unsigned vd = INSTR (4, 0);
5548   unsigned i, shift, bias = 0;
5549
5550   NYI_assert (28, 22, 0x3C);
5551   NYI_assert (15, 10, 0x29);
5552
5553   switch (INSTR (30, 29))
5554     {
5555     case 2: /* SXTL2, SSHLL2.  */
5556       bias = 2;
5557     case 0: /* SXTL, SSHLL.  */
5558       if (INSTR (21, 21))
5559         {
5560           int64_t val1, val2;
5561
5562           shift = INSTR (20, 16);
5563           /* Get the source values before setting the destination values
5564              in case the source and destination are the same.  */
5565           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5566           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5567           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5568           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5569         }
5570       else if (INSTR (20, 20))
5571         {
5572           int32_t v[4];
5573           int32_t v1,v2,v3,v4;
5574
5575           shift = INSTR (19, 16);
5576           bias *= 2;
5577           for (i = 0; i < 4; i++)
5578             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5579           for (i = 0; i < 4; i++)
5580             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5581         }
5582       else
5583         {
5584           int16_t v[8];
5585           NYI_assert (19, 19, 1);
5586
5587           shift = INSTR (18, 16);
5588           bias *= 3;
5589           for (i = 0; i < 8; i++)
5590             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5591           for (i = 0; i < 8; i++)
5592             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5593         }
5594       return;
5595
5596     case 3: /* UXTL2, USHLL2.  */
5597       bias = 2;
5598     case 1: /* UXTL, USHLL.  */
5599       if (INSTR (21, 21))
5600         {
5601           uint64_t v1, v2;
5602           shift = INSTR (20, 16);
5603           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5604           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5605           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5606           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5607         }
5608       else if (INSTR (20, 20))
5609         {
5610           uint32_t v[4];
5611           shift = INSTR (19, 16);
5612           bias *= 2;
5613           for (i = 0; i < 4; i++)
5614             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5615           for (i = 0; i < 4; i++)
5616             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5617         }
5618       else
5619         {
5620           uint16_t v[8];
5621           NYI_assert (19, 19, 1);
5622
5623           shift = INSTR (18, 16);
5624           bias *= 3;
5625           for (i = 0; i < 8; i++)
5626             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5627           for (i = 0; i < 8; i++)
5628             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5629         }
5630       return;
5631     }
5632 }
5633
5634 static void
5635 do_vec_SHL (sim_cpu *cpu)
5636 {
5637   /* instr [31]    = 0
5638      instr [30]    = half(0)/full(1)
5639      instr [29,23] = 001 1110
5640      instr [22,16] = size and shift amount
5641      instr [15,10] = 01 0101
5642      instr [9, 5]  = Vs
5643      instr [4, 0]  = Vd.  */
5644
5645   int shift;
5646   int full    = INSTR (30, 30);
5647   unsigned vs = INSTR (9, 5);
5648   unsigned vd = INSTR (4, 0);
5649   unsigned i;
5650
5651   NYI_assert (29, 23, 0x1E);
5652   NYI_assert (15, 10, 0x15);
5653
5654   if (INSTR (22, 22))
5655     {
5656       shift = INSTR (21, 16);
5657
5658       if (full == 0)
5659         HALT_UNALLOC;
5660
5661       for (i = 0; i < 2; i++)
5662         {
5663           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5664           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5665         }
5666
5667       return;
5668     }
5669
5670   if (INSTR (21, 21))
5671     {
5672       shift = INSTR (20, 16);
5673
5674       for (i = 0; i < (full ? 4 : 2); i++)
5675         {
5676           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5677           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5678         }
5679
5680       return;
5681     }
5682
5683   if (INSTR (20, 20))
5684     {
5685       shift = INSTR (19, 16);
5686
5687       for (i = 0; i < (full ? 8 : 4); i++)
5688         {
5689           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5690           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5691         }
5692
5693       return;
5694     }
5695
5696   if (INSTR (19, 19) == 0)
5697     HALT_UNALLOC;
5698
5699   shift = INSTR (18, 16);
5700
5701   for (i = 0; i < (full ? 16 : 8); i++)
5702     {
5703       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5704       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5705     }
5706 }
5707
5708 static void
5709 do_vec_SSHR_USHR (sim_cpu *cpu)
5710 {
5711   /* instr [31]    = 0
5712      instr [30]    = half(0)/full(1)
5713      instr [29]    = signed(0)/unsigned(1)
5714      instr [28,23] = 0 1111 0
5715      instr [22,16] = size and shift amount
5716      instr [15,10] = 0000 01
5717      instr [9, 5]  = Vs
5718      instr [4, 0]  = Vd.  */
5719
5720   int full       = INSTR (30, 30);
5721   int sign       = ! INSTR (29, 29);
5722   unsigned shift = INSTR (22, 16);
5723   unsigned vs    = INSTR (9, 5);
5724   unsigned vd    = INSTR (4, 0);
5725   unsigned i;
5726
5727   NYI_assert (28, 23, 0x1E);
5728   NYI_assert (15, 10, 0x01);
5729
5730   if (INSTR (22, 22))
5731     {
5732       shift = 128 - shift;
5733
5734       if (full == 0)
5735         HALT_UNALLOC;
5736
5737       if (sign)
5738         for (i = 0; i < 2; i++)
5739           {
5740             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5741             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5742           }
5743       else
5744         for (i = 0; i < 2; i++)
5745           {
5746             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5747             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5748           }
5749
5750       return;
5751     }
5752
5753   if (INSTR (21, 21))
5754     {
5755       shift = 64 - shift;
5756
5757       if (sign)
5758         for (i = 0; i < (full ? 4 : 2); i++)
5759           {
5760             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5761             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5762           }
5763       else
5764         for (i = 0; i < (full ? 4 : 2); i++)
5765           {
5766             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5767             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5768           }
5769
5770       return;
5771     }
5772
5773   if (INSTR (20, 20))
5774     {
5775       shift = 32 - shift;
5776
5777       if (sign)
5778         for (i = 0; i < (full ? 8 : 4); i++)
5779           {
5780             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5781             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5782           }
5783       else
5784         for (i = 0; i < (full ? 8 : 4); i++)
5785           {
5786             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5787             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5788           }
5789
5790       return;
5791     }
5792
5793   if (INSTR (19, 19) == 0)
5794     HALT_UNALLOC;
5795
5796   shift = 16 - shift;
5797
5798   if (sign)
5799     for (i = 0; i < (full ? 16 : 8); i++)
5800       {
5801         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5802         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5803       }
5804   else
5805     for (i = 0; i < (full ? 16 : 8); i++)
5806       {
5807         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5808         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5809       }
5810 }
5811
5812 static void
5813 do_vec_MUL_by_element (sim_cpu *cpu)
5814 {
5815   /* instr[31]    = 0
5816      instr[30]    = half/full
5817      instr[29,24] = 00 1111
5818      instr[23,22] = size
5819      instr[21]    = L
5820      instr[20]    = M
5821      instr[19,16] = m
5822      instr[15,12] = 1000
5823      instr[11]    = H
5824      instr[10]    = 0
5825      instr[9,5]   = Vn
5826      instr[4,0]   = Vd  */
5827
5828   unsigned full     = INSTR (30, 30);
5829   unsigned L        = INSTR (21, 21);
5830   unsigned H        = INSTR (11, 11);
5831   unsigned vn       = INSTR (9, 5);
5832   unsigned vd       = INSTR (4, 0);
5833   unsigned size     = INSTR (23, 22);
5834   unsigned index;
5835   unsigned vm;
5836   unsigned e;
5837
5838   NYI_assert (29, 24, 0x0F);
5839   NYI_assert (15, 12, 0x8);
5840   NYI_assert (10, 10, 0);
5841
5842   switch (size)
5843     {
5844     case 1:
5845       {
5846         /* 16 bit products.  */
5847         uint16_t product;
5848         uint16_t element1;
5849         uint16_t element2;
5850
5851         index = (H << 2) | (L << 1) | INSTR (20, 20);
5852         vm = INSTR (19, 16);
5853         element2 = aarch64_get_vec_u16 (cpu, vm, index);
5854
5855         for (e = 0; e < (full ? 8 : 4); e ++)
5856           {
5857             element1 = aarch64_get_vec_u16 (cpu, vn, e);
5858             product  = element1 * element2;
5859             aarch64_set_vec_u16 (cpu, vd, e, product);
5860           }
5861       }
5862       break;
5863
5864     case 2:
5865       {
5866         /* 32 bit products.  */
5867         uint32_t product;
5868         uint32_t element1;
5869         uint32_t element2;
5870
5871         index = (H << 1) | L;
5872         vm = INSTR (20, 16);
5873         element2 = aarch64_get_vec_u32 (cpu, vm, index);
5874
5875         for (e = 0; e < (full ? 4 : 2); e ++)
5876           {
5877             element1 = aarch64_get_vec_u32 (cpu, vn, e);
5878             product  = element1 * element2;
5879             aarch64_set_vec_u32 (cpu, vd, e, product);
5880           }
5881       }
5882       break;
5883
5884     default:
5885       HALT_UNALLOC;
5886     }
5887 }
5888
5889 static void
5890 do_vec_op2 (sim_cpu *cpu)
5891 {
5892   /* instr[31]    = 0
5893      instr[30]    = half/full
5894      instr[29,24] = 00 1111
5895      instr[23]    = ?
5896      instr[22,16] = element size & index
5897      instr[15,10] = sub-opcode
5898      instr[9,5]   = Vm
5899      instr[4,0]   = Vd  */
5900
5901   NYI_assert (29, 24, 0x0F);
5902
5903   if (INSTR (23, 23) != 0)
5904     {
5905       switch (INSTR (15, 10))
5906         {
5907         case 0x20:
5908         case 0x22: do_vec_MUL_by_element (cpu); return;
5909         default:   HALT_NYI;
5910         }
5911     }
5912   else
5913     {
5914       switch (INSTR (15, 10))
5915         {
5916         case 0x01: do_vec_SSHR_USHR (cpu); return;
5917         case 0x15: do_vec_SHL (cpu); return;
5918         case 0x20:
5919         case 0x22: do_vec_MUL_by_element (cpu); return;
5920         case 0x29: do_vec_xtl (cpu); return;
5921         default:   HALT_NYI;
5922         }
5923     }
5924 }
5925
5926 static void
5927 do_vec_neg (sim_cpu *cpu)
5928 {
5929   /* instr[31]    = 0
5930      instr[30]    = full(1)/half(0)
5931      instr[29,24] = 10 1110
5932      instr[23,22] = size: byte(00), half (01), word (10), long (11)
5933      instr[21,10] = 1000 0010 1110
5934      instr[9,5]   = Vs
5935      instr[4,0]   = Vd  */
5936
5937   int    full = INSTR (30, 30);
5938   unsigned vs = INSTR (9, 5);
5939   unsigned vd = INSTR (4, 0);
5940   unsigned i;
5941
5942   NYI_assert (29, 24, 0x2E);
5943   NYI_assert (21, 10, 0x82E);
5944
5945   switch (INSTR (23, 22))
5946     {
5947     case 0:
5948       for (i = 0; i < (full ? 16 : 8); i++)
5949         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5950       return;
5951
5952     case 1:
5953       for (i = 0; i < (full ? 8 : 4); i++)
5954         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5955       return;
5956
5957     case 2:
5958       for (i = 0; i < (full ? 4 : 2); i++)
5959         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5960       return;
5961
5962     case 3:
5963       if (! full)
5964         HALT_NYI;
5965       for (i = 0; i < 2; i++)
5966         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5967       return;
5968     }
5969 }
5970
5971 static void
5972 do_vec_sqrt (sim_cpu *cpu)
5973 {
5974   /* instr[31]    = 0
5975      instr[30]    = full(1)/half(0)
5976      instr[29,23] = 101 1101
5977      instr[22]    = single(0)/double(1)
5978      instr[21,10] = 1000 0111 1110
5979      instr[9,5]   = Vs
5980      instr[4,0]   = Vd.  */
5981
5982   int    full = INSTR (30, 30);
5983   unsigned vs = INSTR (9, 5);
5984   unsigned vd = INSTR (4, 0);
5985   unsigned i;
5986
5987   NYI_assert (29, 23, 0x5B);
5988   NYI_assert (21, 10, 0x87E);
5989
5990   if (INSTR (22, 22) == 0)
5991     for (i = 0; i < (full ? 4 : 2); i++)
5992       aarch64_set_vec_float (cpu, vd, i,
5993                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5994   else
5995     for (i = 0; i < 2; i++)
5996       aarch64_set_vec_double (cpu, vd, i,
5997                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
5998 }
5999
6000 static void
6001 do_vec_mls_indexed (sim_cpu *cpu)
6002 {
6003   /* instr[31]       = 0
6004      instr[30]       = half(0)/full(1)
6005      instr[29,24]    = 10 1111
6006      instr[23,22]    = 16-bit(01)/32-bit(10)
6007      instr[21,20+11] = index (if 16-bit)
6008      instr[21+11]    = index (if 32-bit)
6009      instr[20,16]    = Vm
6010      instr[15,12]    = 0100
6011      instr[11]       = part of index
6012      instr[10]       = 0
6013      instr[9,5]      = Vs
6014      instr[4,0]      = Vd.  */
6015
6016   int    full = INSTR (30, 30);
6017   unsigned vs = INSTR (9, 5);
6018   unsigned vd = INSTR (4, 0);
6019   unsigned vm = INSTR (20, 16);
6020   unsigned i;
6021
6022   NYI_assert (15, 12, 4);
6023   NYI_assert (10, 10, 0);
6024
6025   switch (INSTR (23, 22))
6026     {
6027     case 1:
6028       {
6029         unsigned elem;
6030         uint32_t val;
6031
6032         if (vm > 15)
6033           HALT_NYI;
6034
6035         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6036         val = aarch64_get_vec_u16 (cpu, vm, elem);
6037
6038         for (i = 0; i < (full ? 8 : 4); i++)
6039           aarch64_set_vec_u32 (cpu, vd, i,
6040                                aarch64_get_vec_u32 (cpu, vd, i) -
6041                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6042         return;
6043       }
6044
6045     case 2:
6046       {
6047         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6048         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6049
6050         for (i = 0; i < (full ? 4 : 2); i++)
6051           aarch64_set_vec_u64 (cpu, vd, i,
6052                                aarch64_get_vec_u64 (cpu, vd, i) -
6053                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6054         return;
6055       }
6056
6057     case 0:
6058     case 3:
6059     default:
6060       HALT_NYI;
6061     }
6062 }
6063
6064 static void
6065 do_vec_SUB (sim_cpu *cpu)
6066 {
6067   /* instr [31]    = 0
6068      instr [30]    = half(0)/full(1)
6069      instr [29,24] = 10 1110
6070      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6071      instr [21]    = 1
6072      instr [20,16] = Vm
6073      instr [15,10] = 10 0001
6074      instr [9, 5]  = Vn
6075      instr [4, 0]  = Vd.  */
6076
6077   unsigned full = INSTR (30, 30);
6078   unsigned vm = INSTR (20, 16);
6079   unsigned vn = INSTR (9, 5);
6080   unsigned vd = INSTR (4, 0);
6081   unsigned i;
6082
6083   NYI_assert (29, 24, 0x2E);
6084   NYI_assert (21, 21, 1);
6085   NYI_assert (15, 10, 0x21);
6086
6087   switch (INSTR (23, 22))
6088     {
6089     case 0:
6090       for (i = 0; i < (full ? 16 : 8); i++)
6091         aarch64_set_vec_s8 (cpu, vd, i,
6092                             aarch64_get_vec_s8 (cpu, vn, i)
6093                             - aarch64_get_vec_s8 (cpu, vm, i));
6094       return;
6095
6096     case 1:
6097       for (i = 0; i < (full ? 8 : 4); i++)
6098         aarch64_set_vec_s16 (cpu, vd, i,
6099                              aarch64_get_vec_s16 (cpu, vn, i)
6100                              - aarch64_get_vec_s16 (cpu, vm, i));
6101       return;
6102
6103     case 2:
6104       for (i = 0; i < (full ? 4 : 2); i++)
6105         aarch64_set_vec_s32 (cpu, vd, i,
6106                              aarch64_get_vec_s32 (cpu, vn, i)
6107                              - aarch64_get_vec_s32 (cpu, vm, i));
6108       return;
6109
6110     case 3:
6111       if (full == 0)
6112         HALT_UNALLOC;
6113
6114       for (i = 0; i < 2; i++)
6115         aarch64_set_vec_s64 (cpu, vd, i,
6116                              aarch64_get_vec_s64 (cpu, vn, i)
6117                              - aarch64_get_vec_s64 (cpu, vm, i));
6118       return;
6119     }
6120 }
6121
6122 static void
6123 do_vec_MLS (sim_cpu *cpu)
6124 {
6125   /* instr [31]    = 0
6126      instr [30]    = half(0)/full(1)
6127      instr [29,24] = 10 1110
6128      instr [23,22] = size: byte(00, half(01), word (10)
6129      instr [21]    = 1
6130      instr [20,16] = Vm
6131      instr [15,10] = 10 0101
6132      instr [9, 5]  = Vn
6133      instr [4, 0]  = Vd.  */
6134
6135   unsigned full = INSTR (30, 30);
6136   unsigned vm = INSTR (20, 16);
6137   unsigned vn = INSTR (9, 5);
6138   unsigned vd = INSTR (4, 0);
6139   unsigned i;
6140
6141   NYI_assert (29, 24, 0x2E);
6142   NYI_assert (21, 21, 1);
6143   NYI_assert (15, 10, 0x25);
6144
6145   switch (INSTR (23, 22))
6146     {
6147     case 0:
6148       for (i = 0; i < (full ? 16 : 8); i++)
6149         aarch64_set_vec_u8 (cpu, vd, i,
6150                             (aarch64_get_vec_u8 (cpu, vn, i)
6151                              * aarch64_get_vec_u8 (cpu, vm, i))
6152                             - aarch64_get_vec_u8 (cpu, vd, i));
6153       return;
6154
6155     case 1:
6156       for (i = 0; i < (full ? 8 : 4); i++)
6157         aarch64_set_vec_u16 (cpu, vd, i,
6158                              (aarch64_get_vec_u16 (cpu, vn, i)
6159                               * aarch64_get_vec_u16 (cpu, vm, i))
6160                              - aarch64_get_vec_u16 (cpu, vd, i));
6161       return;
6162
6163     case 2:
6164       for (i = 0; i < (full ? 4 : 2); i++)
6165         aarch64_set_vec_u32 (cpu, vd, i,
6166                              (aarch64_get_vec_u32 (cpu, vn, i)
6167                               * aarch64_get_vec_u32 (cpu, vm, i))
6168                              - aarch64_get_vec_u32 (cpu, vd, i));
6169       return;
6170
6171     default:
6172       HALT_UNALLOC;
6173     }
6174 }
6175
6176 static void
6177 do_vec_FDIV (sim_cpu *cpu)
6178 {
6179   /* instr [31]    = 0
6180      instr [30]    = half(0)/full(1)
6181      instr [29,23] = 10 1110 0
6182      instr [22]    = float()/double(1)
6183      instr [21]    = 1
6184      instr [20,16] = Vm
6185      instr [15,10] = 1111 11
6186      instr [9, 5]  = Vn
6187      instr [4, 0]  = Vd.  */
6188
6189   unsigned full = INSTR (30, 30);
6190   unsigned vm = INSTR (20, 16);
6191   unsigned vn = INSTR (9, 5);
6192   unsigned vd = INSTR (4, 0);
6193   unsigned i;
6194
6195   NYI_assert (29, 23, 0x5C);
6196   NYI_assert (21, 21, 1);
6197   NYI_assert (15, 10, 0x3F);
6198
6199   if (INSTR (22, 22))
6200     {
6201       if (! full)
6202         HALT_UNALLOC;
6203
6204       for (i = 0; i < 2; i++)
6205         aarch64_set_vec_double (cpu, vd, i,
6206                                 aarch64_get_vec_double (cpu, vn, i)
6207                                 / aarch64_get_vec_double (cpu, vm, i));
6208     }
6209   else
6210     for (i = 0; i < (full ? 4 : 2); i++)
6211       aarch64_set_vec_float (cpu, vd, i,
6212                              aarch64_get_vec_float (cpu, vn, i)
6213                              / aarch64_get_vec_float (cpu, vm, i));
6214 }
6215
6216 static void
6217 do_vec_FMUL (sim_cpu *cpu)
6218 {
6219   /* instr [31]    = 0
6220      instr [30]    = half(0)/full(1)
6221      instr [29,23] = 10 1110 0
6222      instr [22]    = float(0)/double(1)
6223      instr [21]    = 1
6224      instr [20,16] = Vm
6225      instr [15,10] = 1101 11
6226      instr [9, 5]  = Vn
6227      instr [4, 0]  = Vd.  */
6228
6229   unsigned full = INSTR (30, 30);
6230   unsigned vm = INSTR (20, 16);
6231   unsigned vn = INSTR (9, 5);
6232   unsigned vd = INSTR (4, 0);
6233   unsigned i;
6234
6235   NYI_assert (29, 23, 0x5C);
6236   NYI_assert (21, 21, 1);
6237   NYI_assert (15, 10, 0x37);
6238
6239   if (INSTR (22, 22))
6240     {
6241       if (! full)
6242         HALT_UNALLOC;
6243
6244       for (i = 0; i < 2; i++)
6245         aarch64_set_vec_double (cpu, vd, i,
6246                                 aarch64_get_vec_double (cpu, vn, i)
6247                                 * aarch64_get_vec_double (cpu, vm, i));
6248     }
6249   else
6250     for (i = 0; i < (full ? 4 : 2); i++)
6251       aarch64_set_vec_float (cpu, vd, i,
6252                              aarch64_get_vec_float (cpu, vn, i)
6253                              * aarch64_get_vec_float (cpu, vm, i));
6254 }
6255
6256 static void
6257 do_vec_FADDP (sim_cpu *cpu)
6258 {
6259   /* instr [31]    = 0
6260      instr [30]    = half(0)/full(1)
6261      instr [29,23] = 10 1110 0
6262      instr [22]    = float(0)/double(1)
6263      instr [21]    = 1
6264      instr [20,16] = Vm
6265      instr [15,10] = 1101 01
6266      instr [9, 5]  = Vn
6267      instr [4, 0]  = Vd.  */
6268
6269   unsigned full = INSTR (30, 30);
6270   unsigned vm = INSTR (20, 16);
6271   unsigned vn = INSTR (9, 5);
6272   unsigned vd = INSTR (4, 0);
6273
6274   NYI_assert (29, 23, 0x5C);
6275   NYI_assert (21, 21, 1);
6276   NYI_assert (15, 10, 0x35);
6277
6278   if (INSTR (22, 22))
6279     {
6280       /* Extract values before adding them incase vd == vn/vm.  */
6281       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6282       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6283       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6284       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6285
6286       if (! full)
6287         HALT_UNALLOC;
6288
6289       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6290       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6291     }
6292   else
6293     {
6294       /* Extract values before adding them incase vd == vn/vm.  */
6295       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6296       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6297       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6298       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6299
6300       if (full)
6301         {
6302           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6303           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6304           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6305           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6306
6307           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6308           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6309           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6310           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6311         }
6312       else
6313         {
6314           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6315           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6316         }
6317     }
6318 }
6319
6320 static void
6321 do_vec_FSQRT (sim_cpu *cpu)
6322 {
6323   /* instr[31]    = 0
6324      instr[30]    = half(0)/full(1)
6325      instr[29,23] = 10 1110 1
6326      instr[22]    = single(0)/double(1)
6327      instr[21,10] = 10 0001 1111 10
6328      instr[9,5]   = Vsrc
6329      instr[4,0]   = Vdest.  */
6330
6331   unsigned vn = INSTR (9, 5);
6332   unsigned vd = INSTR (4, 0);
6333   unsigned full = INSTR (30, 30);
6334   int i;
6335
6336   NYI_assert (29, 23, 0x5D);
6337   NYI_assert (21, 10, 0x87E);
6338
6339   if (INSTR (22, 22))
6340     {
6341       if (! full)
6342         HALT_UNALLOC;
6343
6344       for (i = 0; i < 2; i++)
6345         aarch64_set_vec_double (cpu, vd, i,
6346                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6347     }
6348   else
6349     {
6350       for (i = 0; i < (full ? 4 : 2); i++)
6351         aarch64_set_vec_float (cpu, vd, i,
6352                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6353     }
6354 }
6355
6356 static void
6357 do_vec_FNEG (sim_cpu *cpu)
6358 {
6359   /* instr[31]    = 0
6360      instr[30]    = half (0)/full (1)
6361      instr[29,23] = 10 1110 1
6362      instr[22]    = single (0)/double (1)
6363      instr[21,10] = 10 0000 1111 10
6364      instr[9,5]   = Vsrc
6365      instr[4,0]   = Vdest.  */
6366
6367   unsigned vn = INSTR (9, 5);
6368   unsigned vd = INSTR (4, 0);
6369   unsigned full = INSTR (30, 30);
6370   int i;
6371
6372   NYI_assert (29, 23, 0x5D);
6373   NYI_assert (21, 10, 0x83E);
6374
6375   if (INSTR (22, 22))
6376     {
6377       if (! full)
6378         HALT_UNALLOC;
6379
6380       for (i = 0; i < 2; i++)
6381         aarch64_set_vec_double (cpu, vd, i,
6382                                 - aarch64_get_vec_double (cpu, vn, i));
6383     }
6384   else
6385     {
6386       for (i = 0; i < (full ? 4 : 2); i++)
6387         aarch64_set_vec_float (cpu, vd, i,
6388                                - aarch64_get_vec_float (cpu, vn, i));
6389     }
6390 }
6391
6392 static void
6393 do_vec_NOT (sim_cpu *cpu)
6394 {
6395   /* instr[31]    = 0
6396      instr[30]    = half (0)/full (1)
6397      instr[29,10] = 10 1110 0010 0000 0101 10
6398      instr[9,5]   = Vn
6399      instr[4.0]   = Vd.  */
6400
6401   unsigned vn = INSTR (9, 5);
6402   unsigned vd = INSTR (4, 0);
6403   unsigned i;
6404   int      full = INSTR (30, 30);
6405
6406   NYI_assert (29, 10, 0xB8816);
6407
6408   for (i = 0; i < (full ? 16 : 8); i++)
6409     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6410 }
6411
6412 static unsigned int
6413 clz (uint64_t val, unsigned size)
6414 {
6415   uint64_t mask = 1;
6416   int      count;
6417
6418   mask <<= (size - 1);
6419   count = 0;
6420   do
6421     {
6422       if (val & mask)
6423         break;
6424       mask >>= 1;
6425       count ++;
6426     }
6427   while (mask);
6428
6429   return count;
6430 }
6431
6432 static void
6433 do_vec_CLZ (sim_cpu *cpu)
6434 {
6435   /* instr[31]    = 0
6436      instr[30]    = half (0)/full (1)
6437      instr[29,24] = 10 1110
6438      instr[23,22] = size
6439      instr[21,10] = 10 0000 0100 10
6440      instr[9,5]   = Vn
6441      instr[4.0]   = Vd.  */
6442
6443   unsigned vn = INSTR (9, 5);
6444   unsigned vd = INSTR (4, 0);
6445   unsigned i;
6446   int      full = INSTR (30,30);
6447
6448   NYI_assert (29, 24, 0x2E);
6449   NYI_assert (21, 10, 0x812);
6450
6451   switch (INSTR (23, 22))
6452     {
6453     case 0:
6454       for (i = 0; i < (full ? 16 : 8); i++)
6455         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6456       break;
6457     case 1:
6458       for (i = 0; i < (full ? 8 : 4); i++)
6459         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6460       break;
6461     case 2:
6462       for (i = 0; i < (full ? 4 : 2); i++)
6463         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6464       break;
6465     case 3:
6466       if (! full)
6467         HALT_UNALLOC;
6468       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6469       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6470       break;
6471     }
6472 }
6473
6474 static void
6475 do_vec_MOV_element (sim_cpu *cpu)
6476 {
6477   /* instr[31,21] = 0110 1110 000
6478      instr[20,16] = size & dest index
6479      instr[15]    = 0
6480      instr[14,11] = source index
6481      instr[10]    = 1
6482      instr[9,5]   = Vs
6483      instr[4.0]   = Vd.  */
6484
6485   unsigned vs = INSTR (9, 5);
6486   unsigned vd = INSTR (4, 0);
6487   unsigned src_index;
6488   unsigned dst_index;
6489
6490   NYI_assert (31, 21, 0x370);
6491   NYI_assert (15, 15, 0);
6492   NYI_assert (10, 10, 1);
6493
6494   if (INSTR (16, 16))
6495     {
6496       /* Move a byte.  */
6497       src_index = INSTR (14, 11);
6498       dst_index = INSTR (20, 17);
6499       aarch64_set_vec_u8 (cpu, vd, dst_index,
6500                           aarch64_get_vec_u8 (cpu, vs, src_index));
6501     }
6502   else if (INSTR (17, 17))
6503     {
6504       /* Move 16-bits.  */
6505       NYI_assert (11, 11, 0);
6506       src_index = INSTR (14, 12);
6507       dst_index = INSTR (20, 18);
6508       aarch64_set_vec_u16 (cpu, vd, dst_index,
6509                            aarch64_get_vec_u16 (cpu, vs, src_index));
6510     }
6511   else if (INSTR (18, 18))
6512     {
6513       /* Move 32-bits.  */
6514       NYI_assert (12, 11, 0);
6515       src_index = INSTR (14, 13);
6516       dst_index = INSTR (20, 19);
6517       aarch64_set_vec_u32 (cpu, vd, dst_index,
6518                            aarch64_get_vec_u32 (cpu, vs, src_index));
6519     }
6520   else
6521     {
6522       NYI_assert (19, 19, 1);
6523       NYI_assert (13, 11, 0);
6524       src_index = INSTR (14, 14);
6525       dst_index = INSTR (20, 20);
6526       aarch64_set_vec_u64 (cpu, vd, dst_index,
6527                            aarch64_get_vec_u64 (cpu, vs, src_index));
6528     }
6529 }
6530
6531 static void
6532 do_vec_REV32 (sim_cpu *cpu)
6533 {
6534   /* instr[31]    = 0
6535      instr[30]    = full/half
6536      instr[29,24] = 10 1110
6537      instr[23,22] = size
6538      instr[21,10] = 10 0000 0000 10
6539      instr[9,5]   = Rn
6540      instr[4,0]   = Rd.  */
6541
6542   unsigned rn = INSTR (9, 5);
6543   unsigned rd = INSTR (4, 0);
6544   unsigned size = INSTR (23, 22);
6545   unsigned full = INSTR (30, 30);
6546   unsigned i;
6547   FRegister val;
6548
6549   NYI_assert (29, 24, 0x2E);
6550   NYI_assert (21, 10, 0x802);
6551
6552   switch (size)
6553     {
6554     case 0:
6555       for (i = 0; i < (full ? 16 : 8); i++)
6556         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6557       break;
6558
6559     case 1:
6560       for (i = 0; i < (full ? 8 : 4); i++)
6561         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6562       break;
6563
6564     default:
6565       HALT_UNALLOC;
6566     }
6567
6568   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6569   if (full)
6570     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6571 }
6572
6573 static void
6574 do_vec_EXT (sim_cpu *cpu)
6575 {
6576   /* instr[31]    = 0
6577      instr[30]    = full/half
6578      instr[29,21] = 10 1110 000
6579      instr[20,16] = Vm
6580      instr[15]    = 0
6581      instr[14,11] = source index
6582      instr[10]    = 0
6583      instr[9,5]   = Vn
6584      instr[4.0]   = Vd.  */
6585
6586   unsigned vm = INSTR (20, 16);
6587   unsigned vn = INSTR (9, 5);
6588   unsigned vd = INSTR (4, 0);
6589   unsigned src_index = INSTR (14, 11);
6590   unsigned full = INSTR (30, 30);
6591   unsigned i;
6592   unsigned j;
6593   FRegister val;
6594
6595   NYI_assert (31, 21, 0x370);
6596   NYI_assert (15, 15, 0);
6597   NYI_assert (10, 10, 0);
6598
6599   if (!full && (src_index & 0x8))
6600     HALT_UNALLOC;
6601
6602   j = 0;
6603
6604   for (i = src_index; i < (full ? 16 : 8); i++)
6605     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6606   for (i = 0; i < src_index; i++)
6607     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6608
6609   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6610   if (full)
6611     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6612 }
6613
6614 static void
6615 dexAdvSIMD0 (sim_cpu *cpu)
6616 {
6617   /* instr [28,25] = 0 111.  */
6618   if (    INSTR (15, 10) == 0x07
6619       && (INSTR (9, 5) ==
6620           INSTR (20, 16)))
6621     {
6622       if (INSTR (31, 21) == 0x075
6623           || INSTR (31, 21) == 0x275)
6624         {
6625           do_vec_MOV_whole_vector (cpu);
6626           return;
6627         }
6628     }
6629
6630   if (INSTR (29, 19) == 0x1E0)
6631     {
6632       do_vec_MOV_immediate (cpu);
6633       return;
6634     }
6635
6636   if (INSTR (29, 19) == 0x5E0)
6637     {
6638       do_vec_MVNI (cpu);
6639       return;
6640     }
6641
6642   if (INSTR (29, 19) == 0x1C0
6643       || INSTR (29, 19) == 0x1C1)
6644     {
6645       if (INSTR (15, 10) == 0x03)
6646         {
6647           do_vec_DUP_scalar_into_vector (cpu);
6648           return;
6649         }
6650     }
6651
6652   switch (INSTR (29, 24))
6653     {
6654     case 0x0E: do_vec_op1 (cpu); return;
6655     case 0x0F: do_vec_op2 (cpu); return;
6656
6657     case 0x2E:
6658       if (INSTR (21, 21) == 1)
6659         {
6660           switch (INSTR (15, 10))
6661             {
6662             case 0x02:
6663               do_vec_REV32 (cpu);
6664               return;
6665
6666             case 0x07:
6667               switch (INSTR (23, 22))
6668                 {
6669                 case 0: do_vec_EOR (cpu); return;
6670                 case 1: do_vec_BSL (cpu); return;
6671                 case 2:
6672                 case 3: do_vec_bit (cpu); return;
6673                 }
6674               break;
6675
6676             case 0x08: do_vec_sub_long (cpu); return;
6677             case 0x11: do_vec_USHL (cpu); return;
6678             case 0x12: do_vec_CLZ (cpu); return;
6679             case 0x16: do_vec_NOT (cpu); return;
6680             case 0x19: do_vec_max (cpu); return;
6681             case 0x1B: do_vec_min (cpu); return;
6682             case 0x21: do_vec_SUB (cpu); return;
6683             case 0x25: do_vec_MLS (cpu); return;
6684             case 0x31: do_vec_FminmaxNMP (cpu); return;
6685             case 0x35: do_vec_FADDP (cpu); return;
6686             case 0x37: do_vec_FMUL (cpu); return;
6687             case 0x3F: do_vec_FDIV (cpu); return;
6688
6689             case 0x3E:
6690               switch (INSTR (20, 16))
6691                 {
6692                 case 0x00: do_vec_FNEG (cpu); return;
6693                 case 0x01: do_vec_FSQRT (cpu); return;
6694                 default:   HALT_NYI;
6695                 }
6696
6697             case 0x0D:
6698             case 0x0F:
6699             case 0x22:
6700             case 0x23:
6701             case 0x26:
6702             case 0x2A:
6703             case 0x32:
6704             case 0x36:
6705             case 0x39:
6706             case 0x3A:
6707               do_vec_compare (cpu); return;
6708
6709             default:
6710               break;
6711             }
6712         }
6713
6714       if (INSTR (31, 21) == 0x370)
6715         {
6716           if (INSTR (10, 10))
6717             do_vec_MOV_element (cpu);
6718           else
6719             do_vec_EXT (cpu);
6720           return;
6721         }
6722
6723       switch (INSTR (21, 10))
6724         {
6725         case 0x82E: do_vec_neg (cpu); return;
6726         case 0x87E: do_vec_sqrt (cpu); return;
6727         default:
6728           if (INSTR (15, 10) == 0x30)
6729             {
6730               do_vec_mull (cpu);
6731               return;
6732             }
6733           break;
6734         }
6735       break;
6736
6737     case 0x2f:
6738       switch (INSTR (15, 10))
6739         {
6740         case 0x01: do_vec_SSHR_USHR (cpu); return;
6741         case 0x10:
6742         case 0x12: do_vec_mls_indexed (cpu); return;
6743         case 0x29: do_vec_xtl (cpu); return;
6744         default:
6745           HALT_NYI;
6746         }
6747
6748     default:
6749       break;
6750     }
6751
6752   HALT_NYI;
6753 }
6754
6755 /* 3 sources.  */
6756
6757 /* Float multiply add.  */
6758 static void
6759 fmadds (sim_cpu *cpu)
6760 {
6761   unsigned sa = INSTR (14, 10);
6762   unsigned sm = INSTR (20, 16);
6763   unsigned sn = INSTR ( 9,  5);
6764   unsigned sd = INSTR ( 4,  0);
6765
6766   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6767                         + aarch64_get_FP_float (cpu, sn)
6768                         * aarch64_get_FP_float (cpu, sm));
6769 }
6770
6771 /* Double multiply add.  */
6772 static void
6773 fmaddd (sim_cpu *cpu)
6774 {
6775   unsigned sa = INSTR (14, 10);
6776   unsigned sm = INSTR (20, 16);
6777   unsigned sn = INSTR ( 9,  5);
6778   unsigned sd = INSTR ( 4,  0);
6779
6780   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6781                          + aarch64_get_FP_double (cpu, sn)
6782                          * aarch64_get_FP_double (cpu, sm));
6783 }
6784
6785 /* Float multiply subtract.  */
6786 static void
6787 fmsubs (sim_cpu *cpu)
6788 {
6789   unsigned sa = INSTR (14, 10);
6790   unsigned sm = INSTR (20, 16);
6791   unsigned sn = INSTR ( 9,  5);
6792   unsigned sd = INSTR ( 4,  0);
6793
6794   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6795                         - aarch64_get_FP_float (cpu, sn)
6796                         * aarch64_get_FP_float (cpu, sm));
6797 }
6798
6799 /* Double multiply subtract.  */
6800 static void
6801 fmsubd (sim_cpu *cpu)
6802 {
6803   unsigned sa = INSTR (14, 10);
6804   unsigned sm = INSTR (20, 16);
6805   unsigned sn = INSTR ( 9,  5);
6806   unsigned sd = INSTR ( 4,  0);
6807
6808   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6809                          - aarch64_get_FP_double (cpu, sn)
6810                          * aarch64_get_FP_double (cpu, sm));
6811 }
6812
6813 /* Float negative multiply add.  */
6814 static void
6815 fnmadds (sim_cpu *cpu)
6816 {
6817   unsigned sa = INSTR (14, 10);
6818   unsigned sm = INSTR (20, 16);
6819   unsigned sn = INSTR ( 9,  5);
6820   unsigned sd = INSTR ( 4,  0);
6821
6822   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6823                         + (- aarch64_get_FP_float (cpu, sn))
6824                         * aarch64_get_FP_float (cpu, sm));
6825 }
6826
6827 /* Double negative multiply add.  */
6828 static void
6829 fnmaddd (sim_cpu *cpu)
6830 {
6831   unsigned sa = INSTR (14, 10);
6832   unsigned sm = INSTR (20, 16);
6833   unsigned sn = INSTR ( 9,  5);
6834   unsigned sd = INSTR ( 4,  0);
6835
6836   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6837                          + (- aarch64_get_FP_double (cpu, sn))
6838                          * aarch64_get_FP_double (cpu, sm));
6839 }
6840
6841 /* Float negative multiply subtract.  */
6842 static void
6843 fnmsubs (sim_cpu *cpu)
6844 {
6845   unsigned sa = INSTR (14, 10);
6846   unsigned sm = INSTR (20, 16);
6847   unsigned sn = INSTR ( 9,  5);
6848   unsigned sd = INSTR ( 4,  0);
6849
6850   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6851                         + aarch64_get_FP_float (cpu, sn)
6852                         * aarch64_get_FP_float (cpu, sm));
6853 }
6854
6855 /* Double negative multiply subtract.  */
6856 static void
6857 fnmsubd (sim_cpu *cpu)
6858 {
6859   unsigned sa = INSTR (14, 10);
6860   unsigned sm = INSTR (20, 16);
6861   unsigned sn = INSTR ( 9,  5);
6862   unsigned sd = INSTR ( 4,  0);
6863
6864   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6865                          + aarch64_get_FP_double (cpu, sn)
6866                          * aarch64_get_FP_double (cpu, sm));
6867 }
6868
6869 static void
6870 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6871 {
6872   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
6873      instr[30]    = 0
6874      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
6875      instr[28,25] = 1111
6876      instr[24]    = 1
6877      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6878      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
6879      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
6880
6881   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
6882   /* dispatch on combined type:o1:o2.  */
6883   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
6884
6885   if (M_S != 0)
6886     HALT_UNALLOC;
6887
6888   switch (dispatch)
6889     {
6890     case 0: fmadds (cpu); return;
6891     case 1: fmsubs (cpu); return;
6892     case 2: fnmadds (cpu); return;
6893     case 3: fnmsubs (cpu); return;
6894     case 4: fmaddd (cpu); return;
6895     case 5: fmsubd (cpu); return;
6896     case 6: fnmaddd (cpu); return;
6897     case 7: fnmsubd (cpu); return;
6898     default:
6899       /* type > 1 is currently unallocated.  */
6900       HALT_UNALLOC;
6901     }
6902 }
6903
6904 static void
6905 dexSimpleFPFixedConvert (sim_cpu *cpu)
6906 {
6907   HALT_NYI;
6908 }
6909
6910 static void
6911 dexSimpleFPCondCompare (sim_cpu *cpu)
6912 {
6913   /* instr [31,23] = 0001 1110 0
6914      instr [22]    = type
6915      instr [21]    = 1
6916      instr [20,16] = Rm
6917      instr [15,12] = condition
6918      instr [11,10] = 01
6919      instr [9,5]   = Rn
6920      instr [4]     = 0
6921      instr [3,0]   = nzcv  */
6922
6923   unsigned rm = INSTR (20, 16);
6924   unsigned rn = INSTR (9, 5);
6925
6926   NYI_assert (31, 23, 0x3C);
6927   NYI_assert (11, 10, 0x1);
6928   NYI_assert (4,  4,  0);
6929
6930   if (! testConditionCode (cpu, INSTR (15, 12)))
6931     {
6932       aarch64_set_CPSR (cpu, INSTR (3, 0));
6933       return;
6934     }
6935
6936   if (INSTR (22, 22))
6937     {
6938       /* Double precision.  */
6939       double val1 = aarch64_get_vec_double (cpu, rn, 0);
6940       double val2 = aarch64_get_vec_double (cpu, rm, 0);
6941
6942       /* FIXME: Check for NaNs.  */
6943       if (val1 == val2)
6944         aarch64_set_CPSR (cpu, (Z | C));
6945       else if (val1 < val2)
6946         aarch64_set_CPSR (cpu, N);
6947       else /* val1 > val2 */
6948         aarch64_set_CPSR (cpu, C);
6949     }
6950   else
6951     {
6952       /* Single precision.  */
6953       float val1 = aarch64_get_vec_float (cpu, rn, 0);
6954       float val2 = aarch64_get_vec_float (cpu, rm, 0);
6955
6956       /* FIXME: Check for NaNs.  */
6957       if (val1 == val2)
6958         aarch64_set_CPSR (cpu, (Z | C));
6959       else if (val1 < val2)
6960         aarch64_set_CPSR (cpu, N);
6961       else /* val1 > val2 */
6962         aarch64_set_CPSR (cpu, C);
6963     }
6964 }
6965
6966 /* 2 sources.  */
6967
6968 /* Float add.  */
6969 static void
6970 fadds (sim_cpu *cpu)
6971 {
6972   unsigned sm = INSTR (20, 16);
6973   unsigned sn = INSTR ( 9,  5);
6974   unsigned sd = INSTR ( 4,  0);
6975
6976   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6977                         + aarch64_get_FP_float (cpu, sm));
6978 }
6979
6980 /* Double add.  */
6981 static void
6982 faddd (sim_cpu *cpu)
6983 {
6984   unsigned sm = INSTR (20, 16);
6985   unsigned sn = INSTR ( 9,  5);
6986   unsigned sd = INSTR ( 4,  0);
6987
6988   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6989                          + aarch64_get_FP_double (cpu, sm));
6990 }
6991
6992 /* Float divide.  */
6993 static void
6994 fdivs (sim_cpu *cpu)
6995 {
6996   unsigned sm = INSTR (20, 16);
6997   unsigned sn = INSTR ( 9,  5);
6998   unsigned sd = INSTR ( 4,  0);
6999
7000   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7001                         / aarch64_get_FP_float (cpu, sm));
7002 }
7003
7004 /* Double divide.  */
7005 static void
7006 fdivd (sim_cpu *cpu)
7007 {
7008   unsigned sm = INSTR (20, 16);
7009   unsigned sn = INSTR ( 9,  5);
7010   unsigned sd = INSTR ( 4,  0);
7011
7012   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7013                          / aarch64_get_FP_double (cpu, sm));
7014 }
7015
7016 /* Float multiply.  */
7017 static void
7018 fmuls (sim_cpu *cpu)
7019 {
7020   unsigned sm = INSTR (20, 16);
7021   unsigned sn = INSTR ( 9,  5);
7022   unsigned sd = INSTR ( 4,  0);
7023
7024   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7025                         * aarch64_get_FP_float (cpu, sm));
7026 }
7027
7028 /* Double multiply.  */
7029 static void
7030 fmuld (sim_cpu *cpu)
7031 {
7032   unsigned sm = INSTR (20, 16);
7033   unsigned sn = INSTR ( 9,  5);
7034   unsigned sd = INSTR ( 4,  0);
7035
7036   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7037                          * aarch64_get_FP_double (cpu, sm));
7038 }
7039
7040 /* Float negate and multiply.  */
7041 static void
7042 fnmuls (sim_cpu *cpu)
7043 {
7044   unsigned sm = INSTR (20, 16);
7045   unsigned sn = INSTR ( 9,  5);
7046   unsigned sd = INSTR ( 4,  0);
7047
7048   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7049                                     * aarch64_get_FP_float (cpu, sm)));
7050 }
7051
7052 /* Double negate and multiply.  */
7053 static void
7054 fnmuld (sim_cpu *cpu)
7055 {
7056   unsigned sm = INSTR (20, 16);
7057   unsigned sn = INSTR ( 9,  5);
7058   unsigned sd = INSTR ( 4,  0);
7059
7060   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7061                                      * aarch64_get_FP_double (cpu, sm)));
7062 }
7063
7064 /* Float subtract.  */
7065 static void
7066 fsubs (sim_cpu *cpu)
7067 {
7068   unsigned sm = INSTR (20, 16);
7069   unsigned sn = INSTR ( 9,  5);
7070   unsigned sd = INSTR ( 4,  0);
7071
7072   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7073                         - aarch64_get_FP_float (cpu, sm));
7074 }
7075
7076 /* Double subtract.  */
7077 static void
7078 fsubd (sim_cpu *cpu)
7079 {
7080   unsigned sm = INSTR (20, 16);
7081   unsigned sn = INSTR ( 9,  5);
7082   unsigned sd = INSTR ( 4,  0);
7083
7084   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7085                          - aarch64_get_FP_double (cpu, sm));
7086 }
7087
7088 static void
7089 do_FMINNM (sim_cpu *cpu)
7090 {
7091   /* instr[31,23] = 0 0011 1100
7092      instr[22]    = float(0)/double(1)
7093      instr[21]    = 1
7094      instr[20,16] = Sm
7095      instr[15,10] = 01 1110
7096      instr[9,5]   = Sn
7097      instr[4,0]   = Cpu  */
7098
7099   unsigned sm = INSTR (20, 16);
7100   unsigned sn = INSTR ( 9,  5);
7101   unsigned sd = INSTR ( 4,  0);
7102
7103   NYI_assert (31, 23, 0x03C);
7104   NYI_assert (15, 10, 0x1E);
7105
7106   if (INSTR (22, 22))
7107     aarch64_set_FP_double (cpu, sd,
7108                            dminnm (aarch64_get_FP_double (cpu, sn),
7109                                    aarch64_get_FP_double (cpu, sm)));
7110   else
7111     aarch64_set_FP_float (cpu, sd,
7112                           fminnm (aarch64_get_FP_float (cpu, sn),
7113                                   aarch64_get_FP_float (cpu, sm)));
7114 }
7115
7116 static void
7117 do_FMAXNM (sim_cpu *cpu)
7118 {
7119   /* instr[31,23] = 0 0011 1100
7120      instr[22]    = float(0)/double(1)
7121      instr[21]    = 1
7122      instr[20,16] = Sm
7123      instr[15,10] = 01 1010
7124      instr[9,5]   = Sn
7125      instr[4,0]   = Cpu  */
7126
7127   unsigned sm = INSTR (20, 16);
7128   unsigned sn = INSTR ( 9,  5);
7129   unsigned sd = INSTR ( 4,  0);
7130
7131   NYI_assert (31, 23, 0x03C);
7132   NYI_assert (15, 10, 0x1A);
7133
7134   if (INSTR (22, 22))
7135     aarch64_set_FP_double (cpu, sd,
7136                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7137                                    aarch64_get_FP_double (cpu, sm)));
7138   else
7139     aarch64_set_FP_float (cpu, sd,
7140                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7141                                   aarch64_get_FP_float (cpu, sm)));
7142 }
7143
7144 static void
7145 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7146 {
7147   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7148      instr[30]    = 0
7149      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7150      instr[28,25] = 1111
7151      instr[24]    = 0
7152      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7153      instr[21]    = 1
7154      instr[20,16] = Vm
7155      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7156                                0010 ==> FADD, 0011 ==> FSUB,
7157                                0100 ==> FMAX, 0101 ==> FMIN
7158                                0110 ==> FMAXNM, 0111 ==> FMINNM
7159                                1000 ==> FNMUL, ow ==> UNALLOC
7160      instr[11,10] = 10
7161      instr[9,5]   = Vn
7162      instr[4,0]   = Vd  */
7163
7164   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7165   uint32_t type = INSTR (23, 22);
7166   /* Dispatch on opcode.  */
7167   uint32_t dispatch = INSTR (15, 12);
7168
7169   if (type > 1)
7170     HALT_UNALLOC;
7171
7172   if (M_S != 0)
7173     HALT_UNALLOC;
7174
7175   if (type)
7176     switch (dispatch)
7177       {
7178       case 0: fmuld (cpu); return;
7179       case 1: fdivd (cpu); return;
7180       case 2: faddd (cpu); return;
7181       case 3: fsubd (cpu); return;
7182       case 6: do_FMAXNM (cpu); return;
7183       case 7: do_FMINNM (cpu); return;
7184       case 8: fnmuld (cpu); return;
7185
7186         /* Have not yet implemented fmax and fmin.  */
7187       case 4:
7188       case 5:
7189         HALT_NYI;
7190
7191       default:
7192         HALT_UNALLOC;
7193       }
7194   else /* type == 0 => floats.  */
7195     switch (dispatch)
7196       {
7197       case 0: fmuls (cpu); return;
7198       case 1: fdivs (cpu); return;
7199       case 2: fadds (cpu); return;
7200       case 3: fsubs (cpu); return;
7201       case 6: do_FMAXNM (cpu); return;
7202       case 7: do_FMINNM (cpu); return;
7203       case 8: fnmuls (cpu); return;
7204
7205       case 4:
7206       case 5:
7207         HALT_NYI;
7208
7209       default:
7210         HALT_UNALLOC;
7211       }
7212 }
7213
7214 static void
7215 dexSimpleFPCondSelect (sim_cpu *cpu)
7216 {
7217   /* FCSEL
7218      instr[31,23] = 0 0011 1100
7219      instr[22]    = 0=>single 1=>double
7220      instr[21]    = 1
7221      instr[20,16] = Sm
7222      instr[15,12] = cond
7223      instr[11,10] = 11
7224      instr[9,5]   = Sn
7225      instr[4,0]   = Cpu  */
7226   unsigned sm = INSTR (20, 16);
7227   unsigned sn = INSTR ( 9, 5);
7228   unsigned sd = INSTR ( 4, 0);
7229   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7230
7231   NYI_assert (31, 23, 0x03C);
7232   NYI_assert (11, 10, 0x3);
7233
7234   if (INSTR (22, 22))
7235     aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7236   else
7237     aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7238 }
7239
7240 /* Store 32 bit unscaled signed 9 bit.  */
7241 static void
7242 fsturs (sim_cpu *cpu, int32_t offset)
7243 {
7244   unsigned int rn = INSTR (9, 5);
7245   unsigned int st = INSTR (4, 0);
7246
7247   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7248                        aarch64_get_vec_u32 (cpu, rn, 0));
7249 }
7250
7251 /* Store 64 bit unscaled signed 9 bit.  */
7252 static void
7253 fsturd (sim_cpu *cpu, int32_t offset)
7254 {
7255   unsigned int rn = INSTR (9, 5);
7256   unsigned int st = INSTR (4, 0);
7257
7258   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7259                        aarch64_get_vec_u64 (cpu, rn, 0));
7260 }
7261
7262 /* Store 128 bit unscaled signed 9 bit.  */
7263 static void
7264 fsturq (sim_cpu *cpu, int32_t offset)
7265 {
7266   unsigned int rn = INSTR (9, 5);
7267   unsigned int st = INSTR (4, 0);
7268   FRegister a;
7269
7270   aarch64_get_FP_long_double (cpu, rn, & a);
7271   aarch64_set_mem_long_double (cpu,
7272                                aarch64_get_reg_u64 (cpu, st, 1)
7273                                + offset, a);
7274 }
7275
7276 /* TODO FP move register.  */
7277
7278 /* 32 bit fp to fp move register.  */
7279 static void
7280 ffmovs (sim_cpu *cpu)
7281 {
7282   unsigned int rn = INSTR (9, 5);
7283   unsigned int st = INSTR (4, 0);
7284
7285   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7286 }
7287
7288 /* 64 bit fp to fp move register.  */
7289 static void
7290 ffmovd (sim_cpu *cpu)
7291 {
7292   unsigned int rn = INSTR (9, 5);
7293   unsigned int st = INSTR (4, 0);
7294
7295   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7296 }
7297
7298 /* 32 bit GReg to Vec move register.  */
7299 static void
7300 fgmovs (sim_cpu *cpu)
7301 {
7302   unsigned int rn = INSTR (9, 5);
7303   unsigned int st = INSTR (4, 0);
7304
7305   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7306 }
7307
7308 /* 64 bit g to fp move register.  */
7309 static void
7310 fgmovd (sim_cpu *cpu)
7311 {
7312   unsigned int rn = INSTR (9, 5);
7313   unsigned int st = INSTR (4, 0);
7314
7315   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7316 }
7317
7318 /* 32 bit fp to g move register.  */
7319 static void
7320 gfmovs (sim_cpu *cpu)
7321 {
7322   unsigned int rn = INSTR (9, 5);
7323   unsigned int st = INSTR (4, 0);
7324
7325   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7326 }
7327
7328 /* 64 bit fp to g move register.  */
7329 static void
7330 gfmovd (sim_cpu *cpu)
7331 {
7332   unsigned int rn = INSTR (9, 5);
7333   unsigned int st = INSTR (4, 0);
7334
7335   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7336 }
7337
7338 /* FP move immediate
7339
7340    These install an immediate 8 bit value in the target register
7341    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7342    bit exponent.  */
7343
7344 static void
7345 fmovs (sim_cpu *cpu)
7346 {
7347   unsigned int sd = INSTR (4, 0);
7348   uint32_t imm = INSTR (20, 13);
7349   float f = fp_immediate_for_encoding_32 (imm);
7350
7351   aarch64_set_FP_float (cpu, sd, f);
7352 }
7353
7354 static void
7355 fmovd (sim_cpu *cpu)
7356 {
7357   unsigned int sd = INSTR (4, 0);
7358   uint32_t imm = INSTR (20, 13);
7359   double d = fp_immediate_for_encoding_64 (imm);
7360
7361   aarch64_set_FP_double (cpu, sd, d);
7362 }
7363
7364 static void
7365 dexSimpleFPImmediate (sim_cpu *cpu)
7366 {
7367   /* instr[31,23] == 00111100
7368      instr[22]    == type : single(0)/double(1)
7369      instr[21]    == 1
7370      instr[20,13] == imm8
7371      instr[12,10] == 100
7372      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7373      instr[4,0]   == Rd  */
7374   uint32_t imm5 = INSTR (9, 5);
7375
7376   NYI_assert (31, 23, 0x3C);
7377
7378   if (imm5 != 0)
7379     HALT_UNALLOC;
7380
7381   if (INSTR (22, 22))
7382     fmovd (cpu);
7383   else
7384     fmovs (cpu);
7385 }
7386
7387 /* TODO specific decode and execute for group Load Store.  */
7388
7389 /* TODO FP load/store single register (unscaled offset).  */
7390
7391 /* TODO load 8 bit unscaled signed 9 bit.  */
7392 /* TODO load 16 bit unscaled signed 9 bit.  */
7393
7394 /* Load 32 bit unscaled signed 9 bit.  */
7395 static void
7396 fldurs (sim_cpu *cpu, int32_t offset)
7397 {
7398   unsigned int rn = INSTR (9, 5);
7399   unsigned int st = INSTR (4, 0);
7400
7401   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7402                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7403 }
7404
7405 /* Load 64 bit unscaled signed 9 bit.  */
7406 static void
7407 fldurd (sim_cpu *cpu, int32_t offset)
7408 {
7409   unsigned int rn = INSTR (9, 5);
7410   unsigned int st = INSTR (4, 0);
7411
7412   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7413                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7414 }
7415
7416 /* Load 128 bit unscaled signed 9 bit.  */
7417 static void
7418 fldurq (sim_cpu *cpu, int32_t offset)
7419 {
7420   unsigned int rn = INSTR (9, 5);
7421   unsigned int st = INSTR (4, 0);
7422   FRegister a;
7423   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7424
7425   aarch64_get_mem_long_double (cpu, addr, & a);
7426   aarch64_set_FP_long_double (cpu, st, a);
7427 }
7428
7429 /* TODO store 8 bit unscaled signed 9 bit.  */
7430 /* TODO store 16 bit unscaled signed 9 bit.  */
7431
7432
7433 /* 1 source.  */
7434
7435 /* Float absolute value.  */
7436 static void
7437 fabss (sim_cpu *cpu)
7438 {
7439   unsigned sn = INSTR (9, 5);
7440   unsigned sd = INSTR (4, 0);
7441   float value = aarch64_get_FP_float (cpu, sn);
7442
7443   aarch64_set_FP_float (cpu, sd, fabsf (value));
7444 }
7445
7446 /* Double absolute value.  */
7447 static void
7448 fabcpu (sim_cpu *cpu)
7449 {
7450   unsigned sn = INSTR (9, 5);
7451   unsigned sd = INSTR (4, 0);
7452   double value = aarch64_get_FP_double (cpu, sn);
7453
7454   aarch64_set_FP_double (cpu, sd, fabs (value));
7455 }
7456
7457 /* Float negative value.  */
7458 static void
7459 fnegs (sim_cpu *cpu)
7460 {
7461   unsigned sn = INSTR (9, 5);
7462   unsigned sd = INSTR (4, 0);
7463
7464   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7465 }
7466
7467 /* Double negative value.  */
7468 static void
7469 fnegd (sim_cpu *cpu)
7470 {
7471   unsigned sn = INSTR (9, 5);
7472   unsigned sd = INSTR (4, 0);
7473
7474   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7475 }
7476
7477 /* Float square root.  */
7478 static void
7479 fsqrts (sim_cpu *cpu)
7480 {
7481   unsigned sn = INSTR (9, 5);
7482   unsigned sd = INSTR (4, 0);
7483
7484   aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7485 }
7486
7487 /* Double square root.  */
7488 static void
7489 fsqrtd (sim_cpu *cpu)
7490 {
7491   unsigned sn = INSTR (9, 5);
7492   unsigned sd = INSTR (4, 0);
7493
7494   aarch64_set_FP_double (cpu, sd,
7495                          sqrt (aarch64_get_FP_double (cpu, sn)));
7496 }
7497
7498 /* Convert double to float.  */
7499 static void
7500 fcvtds (sim_cpu *cpu)
7501 {
7502   unsigned sn = INSTR (9, 5);
7503   unsigned sd = INSTR (4, 0);
7504
7505   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7506 }
7507
7508 /* Convert float to double.  */
7509 static void
7510 fcvtcpu (sim_cpu *cpu)
7511 {
7512   unsigned sn = INSTR (9, 5);
7513   unsigned sd = INSTR (4, 0);
7514
7515   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7516 }
7517
7518 static void
7519 do_FRINT (sim_cpu *cpu)
7520 {
7521   /* instr[31,23] = 0001 1110 0
7522      instr[22]    = single(0)/double(1)
7523      instr[21,18] = 1001
7524      instr[17,15] = rounding mode
7525      instr[14,10] = 10000
7526      instr[9,5]   = source
7527      instr[4,0]   = dest  */
7528
7529   float val;
7530   unsigned rs = INSTR (9, 5);
7531   unsigned rd = INSTR (4, 0);
7532   unsigned int rmode = INSTR (17, 15);
7533
7534   NYI_assert (31, 23, 0x03C);
7535   NYI_assert (21, 18, 0x9);
7536   NYI_assert (14, 10, 0x10);
7537
7538   if (rmode == 6 || rmode == 7)
7539     /* FIXME: Add support for rmode == 6 exactness check.  */
7540     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7541
7542   if (INSTR (22, 22))
7543     {
7544       double val = aarch64_get_FP_double (cpu, rs);
7545
7546       switch (rmode)
7547         {
7548         case 0: /* mode N: nearest or even.  */
7549           {
7550             double rval = round (val);
7551
7552             if (val - rval == 0.5)
7553               {
7554                 if (((rval / 2.0) * 2.0) != rval)
7555                   rval += 1.0;
7556               }
7557
7558             aarch64_set_FP_double (cpu, rd, round (val));
7559             return;
7560           }
7561
7562         case 1: /* mode P: towards +inf.  */
7563           if (val < 0.0)
7564             aarch64_set_FP_double (cpu, rd, trunc (val));
7565           else
7566             aarch64_set_FP_double (cpu, rd, round (val));
7567           return;
7568
7569         case 2: /* mode M: towards -inf.  */
7570           if (val < 0.0)
7571             aarch64_set_FP_double (cpu, rd, round (val));
7572           else
7573             aarch64_set_FP_double (cpu, rd, trunc (val));
7574           return;
7575
7576         case 3: /* mode Z: towards 0.  */
7577           aarch64_set_FP_double (cpu, rd, trunc (val));
7578           return;
7579
7580         case 4: /* mode A: away from 0.  */
7581           aarch64_set_FP_double (cpu, rd, round (val));
7582           return;
7583
7584         case 6: /* mode X: use FPCR with exactness check.  */
7585         case 7: /* mode I: use FPCR mode.  */
7586           HALT_NYI;
7587
7588         default:
7589           HALT_UNALLOC;
7590         }
7591     }
7592
7593   val = aarch64_get_FP_float (cpu, rs);
7594
7595   switch (rmode)
7596     {
7597     case 0: /* mode N: nearest or even.  */
7598       {
7599         float rval = roundf (val);
7600
7601         if (val - rval == 0.5)
7602           {
7603             if (((rval / 2.0) * 2.0) != rval)
7604               rval += 1.0;
7605           }
7606
7607         aarch64_set_FP_float (cpu, rd, rval);
7608         return;
7609       }
7610
7611     case 1: /* mode P: towards +inf.  */
7612       if (val < 0.0)
7613         aarch64_set_FP_float (cpu, rd, truncf (val));
7614       else
7615         aarch64_set_FP_float (cpu, rd, roundf (val));
7616       return;
7617
7618     case 2: /* mode M: towards -inf.  */
7619       if (val < 0.0)
7620         aarch64_set_FP_float (cpu, rd, truncf (val));
7621       else
7622         aarch64_set_FP_float (cpu, rd, roundf (val));
7623       return;
7624
7625     case 3: /* mode Z: towards 0.  */
7626       aarch64_set_FP_float (cpu, rd, truncf (val));
7627       return;
7628
7629     case 4: /* mode A: away from 0.  */
7630       aarch64_set_FP_float (cpu, rd, roundf (val));
7631       return;
7632
7633     case 6: /* mode X: use FPCR with exactness check.  */
7634     case 7: /* mode I: use FPCR mode.  */
7635       HALT_NYI;
7636
7637     default:
7638       HALT_UNALLOC;
7639     }
7640 }
7641
7642 /* Convert half to float.  */
7643 static void
7644 do_FCVT_half_to_single (sim_cpu *cpu)
7645 {
7646   unsigned rn = INSTR (9, 5);
7647   unsigned rd = INSTR (4, 0);
7648
7649   NYI_assert (31, 10, 0x7B890);
7650
7651   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
7652 }
7653
7654 /* Convert half to double.  */
7655 static void
7656 do_FCVT_half_to_double (sim_cpu *cpu)
7657 {
7658   unsigned rn = INSTR (9, 5);
7659   unsigned rd = INSTR (4, 0);
7660
7661   NYI_assert (31, 10, 0x7B8B0);
7662
7663   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
7664 }
7665
7666 static void
7667 do_FCVT_single_to_half (sim_cpu *cpu)
7668 {
7669   unsigned rn = INSTR (9, 5);
7670   unsigned rd = INSTR (4, 0);
7671
7672   NYI_assert (31, 10, 0x788F0);
7673
7674   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
7675 }
7676
7677 /* Convert double to half.  */
7678 static void
7679 do_FCVT_double_to_half (sim_cpu *cpu)
7680 {
7681   unsigned rn = INSTR (9, 5);
7682   unsigned rd = INSTR (4, 0);
7683
7684   NYI_assert (31, 10, 0x798F0);
7685
7686   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
7687 }
7688
7689 static void
7690 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7691 {
7692   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7693      instr[30]    = 0
7694      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7695      instr[28,25] = 1111
7696      instr[24]    = 0
7697      instr[23,22] ==> type : 00 ==> source is single,
7698                              01 ==> source is double
7699                              10 ==> UNALLOC
7700                              11 ==> UNALLOC or source is half
7701      instr[21]    = 1
7702      instr[20,15] ==> opcode : with type 00 or 01
7703                                000000 ==> FMOV, 000001 ==> FABS,
7704                                000010 ==> FNEG, 000011 ==> FSQRT,
7705                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7706                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7707                                001000 ==> FRINTN, 001001 ==> FRINTP,
7708                                001010 ==> FRINTM, 001011 ==> FRINTZ,
7709                                001100 ==> FRINTA, 001101 ==> UNALLOC
7710                                001110 ==> FRINTX, 001111 ==> FRINTI
7711                                with type 11
7712                                000100 ==> FCVT (half-to-single)
7713                                000101 ==> FCVT (half-to-double)
7714                                instr[14,10] = 10000.  */
7715
7716   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7717   uint32_t type   = INSTR (23, 22);
7718   uint32_t opcode = INSTR (20, 15);
7719
7720   if (M_S != 0)
7721     HALT_UNALLOC;
7722
7723   if (type == 3)
7724     {
7725       if (opcode == 4)
7726         do_FCVT_half_to_single (cpu);
7727       else if (opcode == 5)
7728         do_FCVT_half_to_double (cpu);
7729       else
7730         HALT_UNALLOC;
7731       return;
7732     }
7733
7734   if (type == 2)
7735     HALT_UNALLOC;
7736
7737   switch (opcode)
7738     {
7739     case 0:
7740       if (type)
7741         ffmovd (cpu);
7742       else
7743         ffmovs (cpu);
7744       return;
7745
7746     case 1:
7747       if (type)
7748         fabcpu (cpu);
7749       else
7750         fabss (cpu);
7751       return;
7752
7753     case 2:
7754       if (type)
7755         fnegd (cpu);
7756       else
7757         fnegs (cpu);
7758       return;
7759
7760     case 3:
7761       if (type)
7762         fsqrtd (cpu);
7763       else
7764         fsqrts (cpu);
7765       return;
7766
7767     case 4:
7768       if (type)
7769         fcvtds (cpu);
7770       else
7771         HALT_UNALLOC;
7772       return;
7773
7774     case 5:
7775       if (type)
7776         HALT_UNALLOC;
7777       fcvtcpu (cpu);
7778       return;
7779
7780     case 8:             /* FRINTN etc.  */
7781     case 9:
7782     case 10:
7783     case 11:
7784     case 12:
7785     case 14:
7786     case 15:
7787        do_FRINT (cpu);
7788        return;
7789
7790     case 7:
7791       if (INSTR (22, 22))
7792         do_FCVT_double_to_half (cpu);
7793       else
7794         do_FCVT_single_to_half (cpu);
7795       return;
7796
7797     case 13:
7798       HALT_NYI;
7799
7800     default:
7801       HALT_UNALLOC;
7802     }
7803 }
7804
7805 /* 32 bit signed int to float.  */
7806 static void
7807 scvtf32 (sim_cpu *cpu)
7808 {
7809   unsigned rn = INSTR (9, 5);
7810   unsigned sd = INSTR (4, 0);
7811
7812   aarch64_set_FP_float
7813     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7814 }
7815
7816 /* signed int to float.  */
7817 static void
7818 scvtf (sim_cpu *cpu)
7819 {
7820   unsigned rn = INSTR (9, 5);
7821   unsigned sd = INSTR (4, 0);
7822
7823   aarch64_set_FP_float
7824     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7825 }
7826
7827 /* 32 bit signed int to double.  */
7828 static void
7829 scvtd32 (sim_cpu *cpu)
7830 {
7831   unsigned rn = INSTR (9, 5);
7832   unsigned sd = INSTR (4, 0);
7833
7834   aarch64_set_FP_double
7835     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7836 }
7837
7838 /* signed int to double.  */
7839 static void
7840 scvtd (sim_cpu *cpu)
7841 {
7842   unsigned rn = INSTR (9, 5);
7843   unsigned sd = INSTR (4, 0);
7844
7845   aarch64_set_FP_double
7846     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7847 }
7848
7849 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
7850 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
7851 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
7852 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
7853 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
7854 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
7855 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7856 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7857
7858 /* Check for FP exception conditions:
7859      NaN raises IO
7860      Infinity raises IO
7861      Out of Range raises IO and IX and saturates value
7862      Denormal raises ID and IX and sets to zero.  */
7863 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
7864   do                                                    \
7865     {                                                   \
7866       switch (fpclassify (F))                           \
7867         {                                               \
7868         case FP_INFINITE:                               \
7869         case FP_NAN:                                    \
7870           aarch64_set_FPSR (cpu, IO);                   \
7871           if (signbit (F))                              \
7872             VALUE = ITYPE##_MAX;                        \
7873           else                                          \
7874             VALUE = ITYPE##_MIN;                        \
7875           break;                                        \
7876                                                         \
7877         case FP_NORMAL:                                 \
7878           if (F >= FTYPE##_##ITYPE##_MAX)               \
7879             {                                           \
7880               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
7881               VALUE = ITYPE##_MAX;                      \
7882             }                                           \
7883           else if (F <= FTYPE##_##ITYPE##_MIN)          \
7884             {                                           \
7885               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
7886               VALUE = ITYPE##_MIN;                      \
7887             }                                           \
7888           break;                                        \
7889                                                         \
7890         case FP_SUBNORMAL:                              \
7891           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
7892           VALUE = 0;                                    \
7893           break;                                        \
7894                                                         \
7895         default:                                        \
7896         case FP_ZERO:                                   \
7897           VALUE = 0;                                    \
7898           break;                                        \
7899         }                                               \
7900     }                                                   \
7901   while (0)
7902
7903 /* 32 bit convert float to signed int truncate towards zero.  */
7904 static void
7905 fcvtszs32 (sim_cpu *cpu)
7906 {
7907   unsigned sn = INSTR (9, 5);
7908   unsigned rd = INSTR (4, 0);
7909   /* TODO : check that this rounds toward zero.  */
7910   float   f = aarch64_get_FP_float (cpu, sn);
7911   int32_t value = (int32_t) f;
7912
7913   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7914
7915   /* Avoid sign extension to 64 bit.  */
7916   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7917 }
7918
7919 /* 64 bit convert float to signed int truncate towards zero.  */
7920 static void
7921 fcvtszs (sim_cpu *cpu)
7922 {
7923   unsigned sn = INSTR (9, 5);
7924   unsigned rd = INSTR (4, 0);
7925   float f = aarch64_get_FP_float (cpu, sn);
7926   int64_t value = (int64_t) f;
7927
7928   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7929
7930   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7931 }
7932
7933 /* 32 bit convert double to signed int truncate towards zero.  */
7934 static void
7935 fcvtszd32 (sim_cpu *cpu)
7936 {
7937   unsigned sn = INSTR (9, 5);
7938   unsigned rd = INSTR (4, 0);
7939   /* TODO : check that this rounds toward zero.  */
7940   double   d = aarch64_get_FP_double (cpu, sn);
7941   int32_t  value = (int32_t) d;
7942
7943   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7944
7945   /* Avoid sign extension to 64 bit.  */
7946   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7947 }
7948
7949 /* 64 bit convert double to signed int truncate towards zero.  */
7950 static void
7951 fcvtszd (sim_cpu *cpu)
7952 {
7953   unsigned sn = INSTR (9, 5);
7954   unsigned rd = INSTR (4, 0);
7955   /* TODO : check that this rounds toward zero.  */
7956   double  d = aarch64_get_FP_double (cpu, sn);
7957   int64_t value;
7958
7959   value = (int64_t) d;
7960
7961   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7962
7963   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7964 }
7965
7966 static void
7967 do_fcvtzu (sim_cpu *cpu)
7968 {
7969   /* instr[31]    = size: 32-bit (0), 64-bit (1)
7970      instr[30,23] = 00111100
7971      instr[22]    = type: single (0)/ double (1)
7972      instr[21]    = enable (0)/disable(1) precision
7973      instr[20,16] = 11001
7974      instr[15,10] = precision
7975      instr[9,5]   = Rs
7976      instr[4,0]   = Rd.  */
7977
7978   unsigned rs = INSTR (9, 5);
7979   unsigned rd = INSTR (4, 0);
7980
7981   NYI_assert (30, 23, 0x3C);
7982   NYI_assert (20, 16, 0x19);
7983
7984   if (INSTR (21, 21) != 1)
7985     /* Convert to fixed point.  */
7986     HALT_NYI;
7987
7988   if (INSTR (31, 31))
7989     {
7990       /* Convert to unsigned 64-bit integer.  */
7991       if (INSTR (22, 22))
7992         {
7993           double  d = aarch64_get_FP_double (cpu, rs);
7994           uint64_t value = (uint64_t) d;
7995
7996           /* Do not raise an exception if we have reached ULONG_MAX.  */
7997           if (value != (1UL << 63))
7998             RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7999
8000           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8001         }
8002       else
8003         {
8004           float  f = aarch64_get_FP_float (cpu, rs);
8005           uint64_t value = (uint64_t) f;
8006
8007           /* Do not raise an exception if we have reached ULONG_MAX.  */
8008           if (value != (1UL << 63))
8009             RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8010
8011           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8012         }
8013     }
8014   else
8015     {
8016       uint32_t value;
8017
8018       /* Convert to unsigned 32-bit integer.  */
8019       if (INSTR (22, 22))
8020         {
8021           double  d = aarch64_get_FP_double (cpu, rs);
8022
8023           value = (uint32_t) d;
8024           /* Do not raise an exception if we have reached UINT_MAX.  */
8025           if (value != (1UL << 31))
8026             RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8027         }
8028       else
8029         {
8030           float  f = aarch64_get_FP_float (cpu, rs);
8031
8032           value = (uint32_t) f;
8033           /* Do not raise an exception if we have reached UINT_MAX.  */
8034           if (value != (1UL << 31))
8035             RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8036         }
8037
8038       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8039     }
8040 }
8041
8042 static void
8043 do_UCVTF (sim_cpu *cpu)
8044 {
8045   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8046      instr[30,23] = 001 1110 0
8047      instr[22]    = type: single (0)/ double (1)
8048      instr[21]    = enable (0)/disable(1) precision
8049      instr[20,16] = 0 0011
8050      instr[15,10] = precision
8051      instr[9,5]   = Rs
8052      instr[4,0]   = Rd.  */
8053
8054   unsigned rs = INSTR (9, 5);
8055   unsigned rd = INSTR (4, 0);
8056
8057   NYI_assert (30, 23, 0x3C);
8058   NYI_assert (20, 16, 0x03);
8059
8060   if (INSTR (21, 21) != 1)
8061     HALT_NYI;
8062
8063   /* FIXME: Add exception raising.  */
8064   if (INSTR (31, 31))
8065     {
8066       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8067
8068       if (INSTR (22, 22))
8069         aarch64_set_FP_double (cpu, rd, (double) value);
8070       else
8071         aarch64_set_FP_float (cpu, rd, (float) value);
8072     }
8073   else
8074     {
8075       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8076
8077       if (INSTR (22, 22))
8078         aarch64_set_FP_double (cpu, rd, (double) value);
8079       else
8080         aarch64_set_FP_float (cpu, rd, (float) value);
8081     }
8082 }
8083
8084 static void
8085 float_vector_move (sim_cpu *cpu)
8086 {
8087   /* instr[31,17] == 100 1111 0101 0111
8088      instr[16]    ==> direction 0=> to GR, 1=> from GR
8089      instr[15,10] => ???
8090      instr[9,5]   ==> source
8091      instr[4,0]   ==> dest.  */
8092
8093   unsigned rn = INSTR (9, 5);
8094   unsigned rd = INSTR (4, 0);
8095
8096   NYI_assert (31, 17, 0x4F57);
8097
8098   if (INSTR (15, 10) != 0)
8099     HALT_UNALLOC;
8100
8101   if (INSTR (16, 16))
8102     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8103   else
8104     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8105 }
8106
8107 static void
8108 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8109 {
8110   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8111      instr[30     = 0
8112      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8113      instr[28,25] = 1111
8114      instr[24]    = 0
8115      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8116      instr[21]    = 1
8117      instr[20,19] = rmode
8118      instr[18,16] = opcode
8119      instr[15,10] = 10 0000  */
8120
8121   uint32_t rmode_opcode;
8122   uint32_t size_type;
8123   uint32_t type;
8124   uint32_t size;
8125   uint32_t S;
8126
8127   if (INSTR (31, 17) == 0x4F57)
8128     {
8129       float_vector_move (cpu);
8130       return;
8131     }
8132
8133   size = INSTR (31, 31);
8134   S = INSTR (29, 29);
8135   if (S != 0)
8136     HALT_UNALLOC;
8137
8138   type = INSTR (23, 22);
8139   if (type > 1)
8140     HALT_UNALLOC;
8141
8142   rmode_opcode = INSTR (20, 16);
8143   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8144
8145   switch (rmode_opcode)
8146     {
8147     case 2:                     /* SCVTF.  */
8148       switch (size_type)
8149         {
8150         case 0: scvtf32 (cpu); return;
8151         case 1: scvtd32 (cpu); return;
8152         case 2: scvtf (cpu); return;
8153         case 3: scvtd (cpu); return;
8154         }
8155
8156     case 6:                     /* FMOV GR, Vec.  */
8157       switch (size_type)
8158         {
8159         case 0:  gfmovs (cpu); return;
8160         case 3:  gfmovd (cpu); return;
8161         default: HALT_UNALLOC;
8162         }
8163
8164     case 7:                     /* FMOV vec, GR.  */
8165       switch (size_type)
8166         {
8167         case 0:  fgmovs (cpu); return;
8168         case 3:  fgmovd (cpu); return;
8169         default: HALT_UNALLOC;
8170         }
8171
8172     case 24:                    /* FCVTZS.  */
8173       switch (size_type)
8174         {
8175         case 0: fcvtszs32 (cpu); return;
8176         case 1: fcvtszd32 (cpu); return;
8177         case 2: fcvtszs (cpu); return;
8178         case 3: fcvtszd (cpu); return;
8179         }
8180
8181     case 25: do_fcvtzu (cpu); return;
8182     case 3:  do_UCVTF (cpu); return;
8183
8184     case 0:     /* FCVTNS.  */
8185     case 1:     /* FCVTNU.  */
8186     case 4:     /* FCVTAS.  */
8187     case 5:     /* FCVTAU.  */
8188     case 8:     /* FCVPTS.  */
8189     case 9:     /* FCVTPU.  */
8190     case 16:    /* FCVTMS.  */
8191     case 17:    /* FCVTMU.  */
8192     default:
8193       HALT_NYI;
8194     }
8195 }
8196
8197 static void
8198 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8199 {
8200   uint32_t flags;
8201
8202   if (isnan (fvalue1) || isnan (fvalue2))
8203     flags = C|V;
8204   else
8205     {
8206       float result = fvalue1 - fvalue2;
8207
8208       if (result == 0.0)
8209         flags = Z|C;
8210       else if (result < 0)
8211         flags = N;
8212       else /* (result > 0).  */
8213         flags = C;
8214     }
8215
8216   aarch64_set_CPSR (cpu, flags);
8217 }
8218
8219 static void
8220 fcmps (sim_cpu *cpu)
8221 {
8222   unsigned sm = INSTR (20, 16);
8223   unsigned sn = INSTR ( 9,  5);
8224
8225   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8226   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8227
8228   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8229 }
8230
8231 /* Float compare to zero -- Invalid Operation exception
8232    only on signaling NaNs.  */
8233 static void
8234 fcmpzs (sim_cpu *cpu)
8235 {
8236   unsigned sn = INSTR ( 9,  5);
8237   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8238
8239   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8240 }
8241
8242 /* Float compare -- Invalid Operation exception on all NaNs.  */
8243 static void
8244 fcmpes (sim_cpu *cpu)
8245 {
8246   unsigned sm = INSTR (20, 16);
8247   unsigned sn = INSTR ( 9,  5);
8248
8249   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8250   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8251
8252   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8253 }
8254
8255 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8256 static void
8257 fcmpzes (sim_cpu *cpu)
8258 {
8259   unsigned sn = INSTR ( 9,  5);
8260   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8261
8262   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8263 }
8264
8265 static void
8266 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8267 {
8268   uint32_t flags;
8269
8270   if (isnan (dval1) || isnan (dval2))
8271     flags = C|V;
8272   else
8273     {
8274       double result = dval1 - dval2;
8275
8276       if (result == 0.0)
8277         flags = Z|C;
8278       else if (result < 0)
8279         flags = N;
8280       else /* (result > 0).  */
8281         flags = C;
8282     }
8283
8284   aarch64_set_CPSR (cpu, flags);
8285 }
8286
8287 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8288 static void
8289 fcmpd (sim_cpu *cpu)
8290 {
8291   unsigned sm = INSTR (20, 16);
8292   unsigned sn = INSTR ( 9,  5);
8293
8294   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8295   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8296
8297   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8298 }
8299
8300 /* Double compare to zero -- Invalid Operation exception
8301    only on signaling NaNs.  */
8302 static void
8303 fcmpzd (sim_cpu *cpu)
8304 {
8305   unsigned sn = INSTR ( 9,  5);
8306   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8307
8308   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8309 }
8310
8311 /* Double compare -- Invalid Operation exception on all NaNs.  */
8312 static void
8313 fcmped (sim_cpu *cpu)
8314 {
8315   unsigned sm = INSTR (20, 16);
8316   unsigned sn = INSTR ( 9,  5);
8317
8318   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8319   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8320
8321   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8322 }
8323
8324 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8325 static void
8326 fcmpzed (sim_cpu *cpu)
8327 {
8328   unsigned sn = INSTR ( 9,  5);
8329   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8330
8331   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8332 }
8333
8334 static void
8335 dexSimpleFPCompare (sim_cpu *cpu)
8336 {
8337   /* assert instr[28,25] == 1111
8338      instr[30:24:21:13,10] = 0011000
8339      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8340      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8341      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8342      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8343      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8344                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8345                               ow ==> UNALLOC  */
8346   uint32_t dispatch;
8347   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8348   uint32_t type = INSTR (23, 22);
8349   uint32_t op = INSTR (15, 14);
8350   uint32_t op2_2_0 = INSTR (2, 0);
8351
8352   if (op2_2_0 != 0)
8353     HALT_UNALLOC;
8354
8355   if (M_S != 0)
8356     HALT_UNALLOC;
8357
8358   if (type > 1)
8359     HALT_UNALLOC;
8360
8361   if (op != 0)
8362     HALT_UNALLOC;
8363
8364   /* dispatch on type and top 2 bits of opcode.  */
8365   dispatch = (type << 2) | INSTR (4, 3);
8366
8367   switch (dispatch)
8368     {
8369     case 0: fcmps (cpu); return;
8370     case 1: fcmpzs (cpu); return;
8371     case 2: fcmpes (cpu); return;
8372     case 3: fcmpzes (cpu); return;
8373     case 4: fcmpd (cpu); return;
8374     case 5: fcmpzd (cpu); return;
8375     case 6: fcmped (cpu); return;
8376     case 7: fcmpzed (cpu); return;
8377     }
8378 }
8379
8380 static void
8381 do_scalar_FADDP (sim_cpu *cpu)
8382 {
8383   /* instr [31,23] = 0111 1110 0
8384      instr [22]    = single(0)/double(1)
8385      instr [21,10] = 11 0000 1101 10
8386      instr [9,5]   = Fn
8387      instr [4,0]   = Fd.  */
8388
8389   unsigned Fn = INSTR (9, 5);
8390   unsigned Fd = INSTR (4, 0);
8391
8392   NYI_assert (31, 23, 0x0FC);
8393   NYI_assert (21, 10, 0xC36);
8394
8395   if (INSTR (22, 22))
8396     {
8397       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8398       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8399
8400       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8401     }
8402   else
8403     {
8404       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8405       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8406
8407       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8408     }
8409 }
8410
8411 /* Floating point absolute difference.  */
8412
8413 static void
8414 do_scalar_FABD (sim_cpu *cpu)
8415 {
8416   /* instr [31,23] = 0111 1110 1
8417      instr [22]    = float(0)/double(1)
8418      instr [21]    = 1
8419      instr [20,16] = Rm
8420      instr [15,10] = 1101 01
8421      instr [9, 5]  = Rn
8422      instr [4, 0]  = Rd.  */
8423
8424   unsigned rm = INSTR (20, 16);
8425   unsigned rn = INSTR (9, 5);
8426   unsigned rd = INSTR (4, 0);
8427
8428   NYI_assert (31, 23, 0x0FD);
8429   NYI_assert (21, 21, 1);
8430   NYI_assert (15, 10, 0x35);
8431
8432   if (INSTR (22, 22))
8433     aarch64_set_FP_double (cpu, rd,
8434                            fabs (aarch64_get_FP_double (cpu, rn)
8435                                  - aarch64_get_FP_double (cpu, rm)));
8436   else
8437     aarch64_set_FP_float (cpu, rd,
8438                           fabsf (aarch64_get_FP_float (cpu, rn)
8439                                  - aarch64_get_FP_float (cpu, rm)));
8440 }
8441
8442 static void
8443 do_scalar_CMGT (sim_cpu *cpu)
8444 {
8445   /* instr [31,21] = 0101 1110 111
8446      instr [20,16] = Rm
8447      instr [15,10] = 00 1101
8448      instr [9, 5]  = Rn
8449      instr [4, 0]  = Rd.  */
8450
8451   unsigned rm = INSTR (20, 16);
8452   unsigned rn = INSTR (9, 5);
8453   unsigned rd = INSTR (4, 0);
8454
8455   NYI_assert (31, 21, 0x2F7);
8456   NYI_assert (15, 10, 0x0D);
8457
8458   aarch64_set_vec_u64 (cpu, rd, 0,
8459                        aarch64_get_vec_u64 (cpu, rn, 0) >
8460                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8461 }
8462
8463 static void
8464 do_scalar_USHR (sim_cpu *cpu)
8465 {
8466   /* instr [31,23] = 0111 1111 0
8467      instr [22,16] = shift amount
8468      instr [15,10] = 0000 01
8469      instr [9, 5]  = Rn
8470      instr [4, 0]  = Rd.  */
8471
8472   unsigned amount = 128 - INSTR (22, 16);
8473   unsigned rn = INSTR (9, 5);
8474   unsigned rd = INSTR (4, 0);
8475
8476   NYI_assert (31, 23, 0x0FE);
8477   NYI_assert (15, 10, 0x01);
8478
8479   aarch64_set_vec_u64 (cpu, rd, 0,
8480                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8481 }
8482
8483 static void
8484 do_scalar_SSHL (sim_cpu *cpu)
8485 {
8486   /* instr [31,21] = 0101 1110 111
8487      instr [20,16] = Rm
8488      instr [15,10] = 0100 01
8489      instr [9, 5]  = Rn
8490      instr [4, 0]  = Rd.  */
8491
8492   unsigned rm = INSTR (20, 16);
8493   unsigned rn = INSTR (9, 5);
8494   unsigned rd = INSTR (4, 0);
8495   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8496
8497   NYI_assert (31, 21, 0x2F7);
8498   NYI_assert (15, 10, 0x11);
8499
8500   if (shift >= 0)
8501     aarch64_set_vec_s64 (cpu, rd, 0,
8502                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8503   else
8504     aarch64_set_vec_s64 (cpu, rd, 0,
8505                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8506 }
8507
8508 static void
8509 do_scalar_shift (sim_cpu *cpu)
8510 {
8511   /* instr [31,23] = 0101 1111 0
8512      instr [22,16] = shift amount
8513      instr [15,10] = 0101 01   [SHL]
8514      instr [15,10] = 0000 01   [SSHR]
8515      instr [9, 5]  = Rn
8516      instr [4, 0]  = Rd.  */
8517
8518   unsigned rn = INSTR (9, 5);
8519   unsigned rd = INSTR (4, 0);
8520   unsigned amount;
8521
8522   NYI_assert (31, 23, 0x0BE);
8523
8524   if (INSTR (22, 22) == 0)
8525     HALT_UNALLOC;
8526
8527   switch (INSTR (15, 10))
8528     {
8529     case 0x01: /* SSHR */
8530       amount = 128 - INSTR (22, 16);
8531       aarch64_set_vec_s64 (cpu, rd, 0,
8532                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8533       return;
8534     case 0x15: /* SHL */
8535       amount = INSTR (22, 16) - 64;
8536       aarch64_set_vec_u64 (cpu, rd, 0,
8537                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8538       return;
8539     default:
8540       HALT_NYI;
8541     }
8542 }
8543
8544 /* FCMEQ FCMGT FCMGE.  */
8545 static void
8546 do_scalar_FCM (sim_cpu *cpu)
8547 {
8548   /* instr [31,30] = 01
8549      instr [29]    = U
8550      instr [28,24] = 1 1110
8551      instr [23]    = E
8552      instr [22]    = size
8553      instr [21]    = 1
8554      instr [20,16] = Rm
8555      instr [15,12] = 1110
8556      instr [11]    = AC
8557      instr [10]    = 1
8558      instr [9, 5]  = Rn
8559      instr [4, 0]  = Rd.  */
8560
8561   unsigned rm = INSTR (20, 16);
8562   unsigned rn = INSTR (9, 5);
8563   unsigned rd = INSTR (4, 0);
8564   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8565   unsigned result;
8566   float val1;
8567   float val2;
8568
8569   NYI_assert (31, 30, 1);
8570   NYI_assert (28, 24, 0x1E);
8571   NYI_assert (21, 21, 1);
8572   NYI_assert (15, 12, 0xE);
8573   NYI_assert (10, 10, 1);
8574
8575   if (INSTR (22, 22))
8576     {
8577       double val1 = aarch64_get_FP_double (cpu, rn);
8578       double val2 = aarch64_get_FP_double (cpu, rm);
8579
8580       switch (EUac)
8581         {
8582         case 0: /* 000 */
8583           result = val1 == val2;
8584           break;
8585
8586         case 3: /* 011 */
8587           val1 = fabs (val1);
8588           val2 = fabs (val2);
8589           /* Fall through. */
8590         case 2: /* 010 */
8591           result = val1 >= val2;
8592           break;
8593
8594         case 7: /* 111 */
8595           val1 = fabs (val1);
8596           val2 = fabs (val2);
8597           /* Fall through. */
8598         case 6: /* 110 */
8599           result = val1 > val2;
8600           break;
8601
8602         default:
8603           HALT_UNALLOC;
8604         }
8605
8606       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8607       return;
8608     }
8609
8610   val1 = aarch64_get_FP_float (cpu, rn);
8611   val2 = aarch64_get_FP_float (cpu, rm);
8612
8613   switch (EUac)
8614     {
8615     case 0: /* 000 */
8616       result = val1 == val2;
8617       break;
8618
8619     case 3: /* 011 */
8620       val1 = fabsf (val1);
8621       val2 = fabsf (val2);
8622       /* Fall through. */
8623     case 2: /* 010 */
8624       result = val1 >= val2;
8625       break;
8626
8627     case 7: /* 111 */
8628       val1 = fabsf (val1);
8629       val2 = fabsf (val2);
8630       /* Fall through. */
8631     case 6: /* 110 */
8632       result = val1 > val2;
8633       break;
8634
8635     default:
8636       HALT_UNALLOC;
8637     }
8638
8639   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8640 }
8641
8642 /* An alias of DUP.  */
8643 static void
8644 do_scalar_MOV (sim_cpu *cpu)
8645 {
8646   /* instr [31,21] = 0101 1110 000
8647      instr [20,16] = imm5
8648      instr [15,10] = 0000 01
8649      instr [9, 5]  = Rn
8650      instr [4, 0]  = Rd.  */
8651
8652   unsigned rn = INSTR (9, 5);
8653   unsigned rd = INSTR (4, 0);
8654   unsigned index;
8655
8656   NYI_assert (31, 21, 0x2F0);
8657   NYI_assert (15, 10, 0x01);
8658
8659   if (INSTR (16, 16))
8660     {
8661       /* 8-bit.  */
8662       index = INSTR (20, 17);
8663       aarch64_set_vec_u8
8664         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8665     }
8666   else if (INSTR (17, 17))
8667     {
8668       /* 16-bit.  */
8669       index = INSTR (20, 18);
8670       aarch64_set_vec_u16
8671         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8672     }
8673   else if (INSTR (18, 18))
8674     {
8675       /* 32-bit.  */
8676       index = INSTR (20, 19);
8677       aarch64_set_vec_u32
8678         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8679     }
8680   else if (INSTR (19, 19))
8681     {
8682       /* 64-bit.  */
8683       index = INSTR (20, 20);
8684       aarch64_set_vec_u64
8685         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8686     }
8687   else
8688     HALT_UNALLOC;
8689 }
8690
8691 static void
8692 do_scalar_NEG (sim_cpu *cpu)
8693 {
8694   /* instr [31,10] = 0111 1110 1110 0000 1011 10
8695      instr [9, 5]  = Rn
8696      instr [4, 0]  = Rd.  */
8697
8698   unsigned rn = INSTR (9, 5);
8699   unsigned rd = INSTR (4, 0);
8700
8701   NYI_assert (31, 10, 0x1FB82E);
8702
8703   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8704 }
8705
8706 static void
8707 do_scalar_USHL (sim_cpu *cpu)
8708 {
8709   /* instr [31,21] = 0111 1110 111
8710      instr [20,16] = Rm
8711      instr [15,10] = 0100 01
8712      instr [9, 5]  = Rn
8713      instr [4, 0]  = Rd.  */
8714
8715   unsigned rm = INSTR (20, 16);
8716   unsigned rn = INSTR (9, 5);
8717   unsigned rd = INSTR (4, 0);
8718   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8719
8720   NYI_assert (31, 21, 0x3F7);
8721   NYI_assert (15, 10, 0x11);
8722
8723   if (shift >= 0)
8724     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8725   else
8726     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8727 }
8728
8729 static void
8730 do_double_add (sim_cpu *cpu)
8731 {
8732   /* instr [31,21] = 0101 1110 111
8733      instr [20,16] = Fn
8734      instr [15,10] = 1000 01
8735      instr [9,5]   = Fm
8736      instr [4,0]   = Fd.  */
8737   unsigned Fd;
8738   unsigned Fm;
8739   unsigned Fn;
8740   double val1;
8741   double val2;
8742
8743   NYI_assert (31, 21, 0x2F7);
8744   NYI_assert (15, 10, 0x21);
8745
8746   Fd = INSTR (4, 0);
8747   Fm = INSTR (9, 5);
8748   Fn = INSTR (20, 16);
8749
8750   val1 = aarch64_get_FP_double (cpu, Fm);
8751   val2 = aarch64_get_FP_double (cpu, Fn);
8752
8753   aarch64_set_FP_double (cpu, Fd, val1 + val2);
8754 }
8755
8756 static void
8757 do_scalar_UCVTF (sim_cpu *cpu)
8758 {
8759   /* instr [31,23] = 0111 1110 0
8760      instr [22]    = single(0)/double(1)
8761      instr [21,10] = 10 0001 1101 10
8762      instr [9,5]   = rn
8763      instr [4,0]   = rd.  */
8764
8765   unsigned rn = INSTR (9, 5);
8766   unsigned rd = INSTR (4, 0);
8767
8768   NYI_assert (31, 23, 0x0FC);
8769   NYI_assert (21, 10, 0x876);
8770
8771   if (INSTR (22, 22))
8772     {
8773       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
8774
8775       aarch64_set_vec_double (cpu, rd, 0, (double) val);
8776     }
8777   else
8778     {
8779       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
8780
8781       aarch64_set_vec_float (cpu, rd, 0, (float) val);
8782     }
8783 }
8784
8785 static void
8786 do_scalar_vec (sim_cpu *cpu)
8787 {
8788   /* instr [30] = 1.  */
8789   /* instr [28,25] = 1111.  */
8790   switch (INSTR (31, 23))
8791     {
8792     case 0xBC:
8793       switch (INSTR (15, 10))
8794         {
8795         case 0x01: do_scalar_MOV (cpu); return;
8796         case 0x39: do_scalar_FCM (cpu); return;
8797         case 0x3B: do_scalar_FCM (cpu); return;
8798         }
8799       break;
8800
8801     case 0xBE: do_scalar_shift (cpu); return;
8802
8803     case 0xFC:
8804       switch (INSTR (15, 10))
8805         {
8806         case 0x36:
8807           switch (INSTR (21, 16))
8808             {
8809             case 0x30: do_scalar_FADDP (cpu); return;
8810             case 0x21: do_scalar_UCVTF (cpu); return;
8811             }
8812           HALT_NYI;
8813         case 0x39: do_scalar_FCM (cpu); return;
8814         case 0x3B: do_scalar_FCM (cpu); return;
8815         }
8816       break;
8817
8818     case 0xFD:
8819       switch (INSTR (15, 10))
8820         {
8821         case 0x0D: do_scalar_CMGT (cpu); return;
8822         case 0x11: do_scalar_USHL (cpu); return;
8823         case 0x2E: do_scalar_NEG (cpu); return;
8824         case 0x35: do_scalar_FABD (cpu); return;
8825         case 0x39: do_scalar_FCM (cpu); return;
8826         case 0x3B: do_scalar_FCM (cpu); return;
8827         default:
8828           HALT_NYI;
8829         }
8830
8831     case 0xFE: do_scalar_USHR (cpu); return;
8832
8833     case 0xBD:
8834       switch (INSTR (15, 10))
8835         {
8836         case 0x21: do_double_add (cpu); return;
8837         case 0x11: do_scalar_SSHL (cpu); return;
8838         default:
8839           HALT_NYI;
8840         }
8841
8842     default:
8843       HALT_NYI;
8844     }
8845 }
8846
8847 static void
8848 dexAdvSIMD1 (sim_cpu *cpu)
8849 {
8850   /* instr [28,25] = 1 111.  */
8851
8852   /* We are currently only interested in the basic
8853      scalar fp routines which all have bit 30 = 0.  */
8854   if (INSTR (30, 30))
8855     do_scalar_vec (cpu);
8856
8857   /* instr[24] is set for FP data processing 3-source and clear for
8858      all other basic scalar fp instruction groups.  */
8859   else if (INSTR (24, 24))
8860     dexSimpleFPDataProc3Source (cpu);
8861
8862   /* instr[21] is clear for floating <-> fixed conversions and set for
8863      all other basic scalar fp instruction groups.  */
8864   else if (!INSTR (21, 21))
8865     dexSimpleFPFixedConvert (cpu);
8866
8867   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8868      11 ==> cond select,  00 ==> other.  */
8869   else
8870     switch (INSTR (11, 10))
8871       {
8872       case 1: dexSimpleFPCondCompare (cpu); return;
8873       case 2: dexSimpleFPDataProc2Source (cpu); return;
8874       case 3: dexSimpleFPCondSelect (cpu); return;
8875
8876       default:
8877         /* Now an ordered cascade of tests.
8878            FP immediate has instr [12] == 1.
8879            FP compare has   instr [13] == 1.
8880            FP Data Proc 1 Source has instr [14] == 1.
8881            FP floating <--> integer conversions has instr [15] == 0.  */
8882         if (INSTR (12, 12))
8883           dexSimpleFPImmediate (cpu);
8884
8885         else if (INSTR (13, 13))
8886           dexSimpleFPCompare (cpu);
8887
8888         else if (INSTR (14, 14))
8889           dexSimpleFPDataProc1Source (cpu);
8890
8891         else if (!INSTR (15, 15))
8892           dexSimpleFPIntegerConvert (cpu);
8893
8894         else
8895           /* If we get here then instr[15] == 1 which means UNALLOC.  */
8896           HALT_UNALLOC;
8897       }
8898 }
8899
8900 /* PC relative addressing.  */
8901
8902 static void
8903 pcadr (sim_cpu *cpu)
8904 {
8905   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8906      instr[30,29] = immlo
8907      instr[23,5] = immhi.  */
8908   uint64_t address;
8909   unsigned rd = INSTR (4, 0);
8910   uint32_t isPage = INSTR (31, 31);
8911   union { int64_t u64; uint64_t s64; } imm;
8912   uint64_t offset;
8913
8914   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8915   offset = imm.u64;
8916   offset = (offset << 2) | INSTR (30, 29);
8917
8918   address = aarch64_get_PC (cpu);
8919
8920   if (isPage)
8921     {
8922       offset <<= 12;
8923       address &= ~0xfff;
8924     }
8925
8926   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8927 }
8928
8929 /* Specific decode and execute for group Data Processing Immediate.  */
8930
8931 static void
8932 dexPCRelAddressing (sim_cpu *cpu)
8933 {
8934   /* assert instr[28,24] = 10000.  */
8935   pcadr (cpu);
8936 }
8937
8938 /* Immediate logical.
8939    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8940    16, 32 or 64 bit sequence pulled out at decode and possibly
8941    inverting it..
8942
8943    N.B. the output register (dest) can normally be Xn or SP
8944    the exception occurs for flag setting instructions which may
8945    only use Xn for the output (dest).  The input register can
8946    never be SP.  */
8947
8948 /* 32 bit and immediate.  */
8949 static void
8950 and32 (sim_cpu *cpu, uint32_t bimm)
8951 {
8952   unsigned rn = INSTR (9, 5);
8953   unsigned rd = INSTR (4, 0);
8954
8955   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8956                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8957 }
8958
8959 /* 64 bit and immediate.  */
8960 static void
8961 and64 (sim_cpu *cpu, uint64_t bimm)
8962 {
8963   unsigned rn = INSTR (9, 5);
8964   unsigned rd = INSTR (4, 0);
8965
8966   aarch64_set_reg_u64 (cpu, rd, SP_OK,
8967                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8968 }
8969
8970 /* 32 bit and immediate set flags.  */
8971 static void
8972 ands32 (sim_cpu *cpu, uint32_t bimm)
8973 {
8974   unsigned rn = INSTR (9, 5);
8975   unsigned rd = INSTR (4, 0);
8976
8977   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8978   uint32_t value2 = bimm;
8979
8980   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8981   set_flags_for_binop32 (cpu, value1 & value2);
8982 }
8983
8984 /* 64 bit and immediate set flags.  */
8985 static void
8986 ands64 (sim_cpu *cpu, uint64_t bimm)
8987 {
8988   unsigned rn = INSTR (9, 5);
8989   unsigned rd = INSTR (4, 0);
8990
8991   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8992   uint64_t value2 = bimm;
8993
8994   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8995   set_flags_for_binop64 (cpu, value1 & value2);
8996 }
8997
8998 /* 32 bit exclusive or immediate.  */
8999 static void
9000 eor32 (sim_cpu *cpu, uint32_t bimm)
9001 {
9002   unsigned rn = INSTR (9, 5);
9003   unsigned rd = INSTR (4, 0);
9004
9005   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9006                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9007 }
9008
9009 /* 64 bit exclusive or immediate.  */
9010 static void
9011 eor64 (sim_cpu *cpu, uint64_t bimm)
9012 {
9013   unsigned rn = INSTR (9, 5);
9014   unsigned rd = INSTR (4, 0);
9015
9016   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9017                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9018 }
9019
9020 /* 32 bit or immediate.  */
9021 static void
9022 orr32 (sim_cpu *cpu, uint32_t bimm)
9023 {
9024   unsigned rn = INSTR (9, 5);
9025   unsigned rd = INSTR (4, 0);
9026
9027   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9028                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9029 }
9030
9031 /* 64 bit or immediate.  */
9032 static void
9033 orr64 (sim_cpu *cpu, uint64_t bimm)
9034 {
9035   unsigned rn = INSTR (9, 5);
9036   unsigned rd = INSTR (4, 0);
9037
9038   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9039                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9040 }
9041
9042 /* Logical shifted register.
9043    These allow an optional LSL, ASR, LSR or ROR to the second source
9044    register with a count up to the register bit count.
9045    N.B register args may not be SP.  */
9046
9047 /* 32 bit AND shifted register.  */
9048 static void
9049 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9050 {
9051   unsigned rm = INSTR (20, 16);
9052   unsigned rn = INSTR (9, 5);
9053   unsigned rd = INSTR (4, 0);
9054
9055   aarch64_set_reg_u64
9056     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9057      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9058 }
9059
9060 /* 64 bit AND shifted register.  */
9061 static void
9062 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9063 {
9064   unsigned rm = INSTR (20, 16);
9065   unsigned rn = INSTR (9, 5);
9066   unsigned rd = INSTR (4, 0);
9067
9068   aarch64_set_reg_u64
9069     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9070      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9071 }
9072
9073 /* 32 bit AND shifted register setting flags.  */
9074 static void
9075 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9076 {
9077   unsigned rm = INSTR (20, 16);
9078   unsigned rn = INSTR (9, 5);
9079   unsigned rd = INSTR (4, 0);
9080
9081   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9082   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9083                                shift, count);
9084
9085   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9086   set_flags_for_binop32 (cpu, value1 & value2);
9087 }
9088
9089 /* 64 bit AND shifted register setting flags.  */
9090 static void
9091 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9092 {
9093   unsigned rm = INSTR (20, 16);
9094   unsigned rn = INSTR (9, 5);
9095   unsigned rd = INSTR (4, 0);
9096
9097   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9098   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9099                                shift, count);
9100
9101   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9102   set_flags_for_binop64 (cpu, value1 & value2);
9103 }
9104
9105 /* 32 bit BIC shifted register.  */
9106 static void
9107 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9108 {
9109   unsigned rm = INSTR (20, 16);
9110   unsigned rn = INSTR (9, 5);
9111   unsigned rd = INSTR (4, 0);
9112
9113   aarch64_set_reg_u64
9114     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9115      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9116 }
9117
9118 /* 64 bit BIC shifted register.  */
9119 static void
9120 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9121 {
9122   unsigned rm = INSTR (20, 16);
9123   unsigned rn = INSTR (9, 5);
9124   unsigned rd = INSTR (4, 0);
9125
9126   aarch64_set_reg_u64
9127     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9128      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9129 }
9130
9131 /* 32 bit BIC shifted register setting flags.  */
9132 static void
9133 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9134 {
9135   unsigned rm = INSTR (20, 16);
9136   unsigned rn = INSTR (9, 5);
9137   unsigned rd = INSTR (4, 0);
9138
9139   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9140   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9141                                  shift, count);
9142
9143   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9144   set_flags_for_binop32 (cpu, value1 & value2);
9145 }
9146
9147 /* 64 bit BIC shifted register setting flags.  */
9148 static void
9149 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9150 {
9151   unsigned rm = INSTR (20, 16);
9152   unsigned rn = INSTR (9, 5);
9153   unsigned rd = INSTR (4, 0);
9154
9155   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9156   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9157                                  shift, count);
9158
9159   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9160   set_flags_for_binop64 (cpu, value1 & value2);
9161 }
9162
9163 /* 32 bit EON shifted register.  */
9164 static void
9165 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9166 {
9167   unsigned rm = INSTR (20, 16);
9168   unsigned rn = INSTR (9, 5);
9169   unsigned rd = INSTR (4, 0);
9170
9171   aarch64_set_reg_u64
9172     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9173      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9174 }
9175
9176 /* 64 bit EON shifted register.  */
9177 static void
9178 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9179 {
9180   unsigned rm = INSTR (20, 16);
9181   unsigned rn = INSTR (9, 5);
9182   unsigned rd = INSTR (4, 0);
9183
9184   aarch64_set_reg_u64
9185     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9186      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9187 }
9188
9189 /* 32 bit EOR shifted register.  */
9190 static void
9191 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9192 {
9193   unsigned rm = INSTR (20, 16);
9194   unsigned rn = INSTR (9, 5);
9195   unsigned rd = INSTR (4, 0);
9196
9197   aarch64_set_reg_u64
9198     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9199      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9200 }
9201
9202 /* 64 bit EOR shifted register.  */
9203 static void
9204 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9205 {
9206   unsigned rm = INSTR (20, 16);
9207   unsigned rn = INSTR (9, 5);
9208   unsigned rd = INSTR (4, 0);
9209
9210   aarch64_set_reg_u64
9211     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9212      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9213 }
9214
9215 /* 32 bit ORR shifted register.  */
9216 static void
9217 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9218 {
9219   unsigned rm = INSTR (20, 16);
9220   unsigned rn = INSTR (9, 5);
9221   unsigned rd = INSTR (4, 0);
9222
9223   aarch64_set_reg_u64
9224     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9225      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9226 }
9227
9228 /* 64 bit ORR shifted register.  */
9229 static void
9230 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9231 {
9232   unsigned rm = INSTR (20, 16);
9233   unsigned rn = INSTR (9, 5);
9234   unsigned rd = INSTR (4, 0);
9235
9236   aarch64_set_reg_u64
9237     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9238      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9239 }
9240
9241 /* 32 bit ORN shifted register.  */
9242 static void
9243 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9244 {
9245   unsigned rm = INSTR (20, 16);
9246   unsigned rn = INSTR (9, 5);
9247   unsigned rd = INSTR (4, 0);
9248
9249   aarch64_set_reg_u64
9250     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9251      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9252 }
9253
9254 /* 64 bit ORN shifted register.  */
9255 static void
9256 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9257 {
9258   unsigned rm = INSTR (20, 16);
9259   unsigned rn = INSTR (9, 5);
9260   unsigned rd = INSTR (4, 0);
9261
9262   aarch64_set_reg_u64
9263     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9264      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9265 }
9266
9267 static void
9268 dexLogicalImmediate (sim_cpu *cpu)
9269 {
9270   /* assert instr[28,23] = 1001000
9271      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9272      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9273      instr[22] = N : used to construct immediate mask
9274      instr[21,16] = immr
9275      instr[15,10] = imms
9276      instr[9,5] = Rn
9277      instr[4,0] = Rd  */
9278
9279   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9280   uint32_t size = INSTR (31, 31);
9281   uint32_t N = INSTR (22, 22);
9282   /* uint32_t immr = INSTR (21, 16);.  */
9283   /* uint32_t imms = INSTR (15, 10);.  */
9284   uint32_t index = INSTR (22, 10);
9285   uint64_t bimm64 = LITable [index];
9286   uint32_t dispatch = INSTR (30, 29);
9287
9288   if (~size & N)
9289     HALT_UNALLOC;
9290
9291   if (!bimm64)
9292     HALT_UNALLOC;
9293
9294   if (size == 0)
9295     {
9296       uint32_t bimm = (uint32_t) bimm64;
9297
9298       switch (dispatch)
9299         {
9300         case 0: and32 (cpu, bimm); return;
9301         case 1: orr32 (cpu, bimm); return;
9302         case 2: eor32 (cpu, bimm); return;
9303         case 3: ands32 (cpu, bimm); return;
9304         }
9305     }
9306   else
9307     {
9308       switch (dispatch)
9309         {
9310         case 0: and64 (cpu, bimm64); return;
9311         case 1: orr64 (cpu, bimm64); return;
9312         case 2: eor64 (cpu, bimm64); return;
9313         case 3: ands64 (cpu, bimm64); return;
9314         }
9315     }
9316   HALT_UNALLOC;
9317 }
9318
9319 /* Immediate move.
9320    The uimm argument is a 16 bit value to be inserted into the
9321    target register the pos argument locates the 16 bit word in the
9322    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9323    3} for 64 bit.
9324    N.B register arg may not be SP so it should be.
9325    accessed using the setGZRegisterXXX accessors.  */
9326
9327 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9328 static void
9329 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9330 {
9331   unsigned rd = INSTR (4, 0);
9332
9333   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9334 }
9335
9336 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9337 static void
9338 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9339 {
9340   unsigned rd = INSTR (4, 0);
9341
9342   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9343 }
9344
9345 /* 32 bit move 16 bit immediate negated.  */
9346 static void
9347 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9348 {
9349   unsigned rd = INSTR (4, 0);
9350
9351   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9352 }
9353
9354 /* 64 bit move 16 bit immediate negated.  */
9355 static void
9356 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9357 {
9358   unsigned rd = INSTR (4, 0);
9359
9360   aarch64_set_reg_u64
9361     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9362                       ^ 0xffffffffffffffffULL));
9363 }
9364
9365 /* 32 bit move 16 bit immediate keep remaining shorts.  */
9366 static void
9367 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9368 {
9369   unsigned rd = INSTR (4, 0);
9370   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9371   uint32_t value = val << (pos * 16);
9372   uint32_t mask = ~(0xffffU << (pos * 16));
9373
9374   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9375 }
9376
9377 /* 64 bit move 16 it immediate keep remaining shorts.  */
9378 static void
9379 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9380 {
9381   unsigned rd = INSTR (4, 0);
9382   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9383   uint64_t value = (uint64_t) val << (pos * 16);
9384   uint64_t mask = ~(0xffffULL << (pos * 16));
9385
9386   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9387 }
9388
9389 static void
9390 dexMoveWideImmediate (sim_cpu *cpu)
9391 {
9392   /* assert instr[28:23] = 100101
9393      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9394      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9395      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9396      instr[20,5] = uimm16
9397      instr[4,0] = Rd  */
9398
9399   /* N.B. the (multiple of 16) shift is applied by the called routine,
9400      we just pass the multiplier.  */
9401
9402   uint32_t imm;
9403   uint32_t size = INSTR (31, 31);
9404   uint32_t op = INSTR (30, 29);
9405   uint32_t shift = INSTR (22, 21);
9406
9407   /* 32 bit can only shift 0 or 1 lot of 16.
9408      anything else is an unallocated instruction.  */
9409   if (size == 0 && (shift > 1))
9410     HALT_UNALLOC;
9411
9412   if (op == 1)
9413     HALT_UNALLOC;
9414
9415   imm = INSTR (20, 5);
9416
9417   if (size == 0)
9418     {
9419       if (op == 0)
9420         movn32 (cpu, imm, shift);
9421       else if (op == 2)
9422         movz32 (cpu, imm, shift);
9423       else
9424         movk32 (cpu, imm, shift);
9425     }
9426   else
9427     {
9428       if (op == 0)
9429         movn64 (cpu, imm, shift);
9430       else if (op == 2)
9431         movz64 (cpu, imm, shift);
9432       else
9433         movk64 (cpu, imm, shift);
9434     }
9435 }
9436
9437 /* Bitfield operations.
9438    These take a pair of bit positions r and s which are in {0..31}
9439    or {0..63} depending on the instruction word size.
9440    N.B register args may not be SP.  */
9441
9442 /* OK, we start with ubfm which just needs to pick
9443    some bits out of source zero the rest and write
9444    the result to dest.  Just need two logical shifts.  */
9445
9446 /* 32 bit bitfield move, left and right of affected zeroed
9447    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9448 static void
9449 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9450 {
9451   unsigned rd;
9452   unsigned rn = INSTR (9, 5);
9453   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9454
9455   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
9456   if (r <= s)
9457     {
9458       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9459          We want only bits s:xxx:r at the bottom of the word
9460          so we LSL bit s up to bit 31 i.e. by 31 - s
9461          and then we LSR to bring bit 31 down to bit s - r
9462          i.e. by 31 + r - s.  */
9463       value <<= 31 - s;
9464       value >>= 31 + r - s;
9465     }
9466   else
9467     {
9468       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9469          We want only bits s:xxx:0 starting at it 31-(r-1)
9470          so we LSL bit s up to bit 31 i.e. by 31 - s
9471          and then we LSL to bring bit 31 down to 31-(r-1)+s
9472          i.e. by r - (s + 1).  */
9473       value <<= 31 - s;
9474       value >>= r - (s + 1);
9475     }
9476
9477   rd = INSTR (4, 0);
9478   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9479 }
9480
9481 /* 64 bit bitfield move, left and right of affected zeroed
9482    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9483 static void
9484 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9485 {
9486   unsigned rd;
9487   unsigned rn = INSTR (9, 5);
9488   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9489
9490   if (r <= s)
9491     {
9492       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9493          We want only bits s:xxx:r at the bottom of the word.
9494          So we LSL bit s up to bit 63 i.e. by 63 - s
9495          and then we LSR to bring bit 63 down to bit s - r
9496          i.e. by 63 + r - s.  */
9497       value <<= 63 - s;
9498       value >>= 63 + r - s;
9499     }
9500   else
9501     {
9502       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9503          We want only bits s:xxx:0 starting at it 63-(r-1).
9504          So we LSL bit s up to bit 63 i.e. by 63 - s
9505          and then we LSL to bring bit 63 down to 63-(r-1)+s
9506          i.e. by r - (s + 1).  */
9507       value <<= 63 - s;
9508       value >>= r - (s + 1);
9509     }
9510
9511   rd = INSTR (4, 0);
9512   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9513 }
9514
9515 /* The signed versions need to insert sign bits
9516    on the left of the inserted bit field. so we do
9517    much the same as the unsigned version except we
9518    use an arithmetic shift right -- this just means
9519    we need to operate on signed values.  */
9520
9521 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
9522 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9523 static void
9524 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9525 {
9526   unsigned rd;
9527   unsigned rn = INSTR (9, 5);
9528   /* as per ubfm32 but use an ASR instead of an LSR.  */
9529   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9530
9531   if (r <= s)
9532     {
9533       value <<= 31 - s;
9534       value >>= 31 + r - s;
9535     }
9536   else
9537     {
9538       value <<= 31 - s;
9539       value >>= r - (s + 1);
9540     }
9541
9542   rd = INSTR (4, 0);
9543   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9544 }
9545
9546 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
9547 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9548 static void
9549 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9550 {
9551   unsigned rd;
9552   unsigned rn = INSTR (9, 5);
9553   /* acpu per ubfm but use an ASR instead of an LSR.  */
9554   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9555
9556   if (r <= s)
9557     {
9558       value <<= 63 - s;
9559       value >>= 63 + r - s;
9560     }
9561   else
9562     {
9563       value <<= 63 - s;
9564       value >>= r - (s + 1);
9565     }
9566
9567   rd = INSTR (4, 0);
9568   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9569 }
9570
9571 /* Finally, these versions leave non-affected bits
9572    as is. so we need to generate the bits as per
9573    ubfm and also generate a mask to pick the
9574    bits from the original and computed values.  */
9575
9576 /* 32 bit bitfield move, non-affected bits left as is.
9577    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
9578 static void
9579 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9580 {
9581   unsigned rn = INSTR (9, 5);
9582   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9583   uint32_t mask = -1;
9584   unsigned rd;
9585   uint32_t value2;
9586
9587   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
9588   if (r <= s)
9589     {
9590       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9591          We want only bits s:xxx:r at the bottom of the word
9592          so we LSL bit s up to bit 31 i.e. by 31 - s
9593          and then we LSR to bring bit 31 down to bit s - r
9594          i.e. by 31 + r - s.  */
9595       value <<= 31 - s;
9596       value >>= 31 + r - s;
9597       /* the mask must include the same bits.  */
9598       mask <<= 31 - s;
9599       mask >>= 31 + r - s;
9600     }
9601   else
9602     {
9603       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9604          We want only bits s:xxx:0 starting at it 31-(r-1)
9605          so we LSL bit s up to bit 31 i.e. by 31 - s
9606          and then we LSL to bring bit 31 down to 31-(r-1)+s
9607          i.e. by r - (s + 1).  */
9608       value <<= 31 - s;
9609       value >>= r - (s + 1);
9610       /* The mask must include the same bits.  */
9611       mask <<= 31 - s;
9612       mask >>= r - (s + 1);
9613     }
9614
9615   rd = INSTR (4, 0);
9616   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9617
9618   value2 &= ~mask;
9619   value2 |= value;
9620
9621   aarch64_set_reg_u64
9622     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9623 }
9624
9625 /* 64 bit bitfield move, non-affected bits left as is.
9626    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
9627 static void
9628 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9629 {
9630   unsigned rd;
9631   unsigned rn = INSTR (9, 5);
9632   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9633   uint64_t mask = 0xffffffffffffffffULL;
9634
9635   if (r <= s)
9636     {
9637       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9638          We want only bits s:xxx:r at the bottom of the word
9639          so we LSL bit s up to bit 63 i.e. by 63 - s
9640          and then we LSR to bring bit 63 down to bit s - r
9641          i.e. by 63 + r - s.  */
9642       value <<= 63 - s;
9643       value >>= 63 + r - s;
9644       /* The mask must include the same bits.  */
9645       mask <<= 63 - s;
9646       mask >>= 63 + r - s;
9647     }
9648   else
9649     {
9650       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9651          We want only bits s:xxx:0 starting at it 63-(r-1)
9652          so we LSL bit s up to bit 63 i.e. by 63 - s
9653          and then we LSL to bring bit 63 down to 63-(r-1)+s
9654          i.e. by r - (s + 1).  */
9655       value <<= 63 - s;
9656       value >>= r - (s + 1);
9657       /* The mask must include the same bits.  */
9658       mask <<= 63 - s;
9659       mask >>= r - (s + 1);
9660     }
9661
9662   rd = INSTR (4, 0);
9663   aarch64_set_reg_u64
9664     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9665 }
9666
9667 static void
9668 dexBitfieldImmediate (sim_cpu *cpu)
9669 {
9670   /* assert instr[28:23] = 100110
9671      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9672      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9673      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9674      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9675      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
9676      instr[9,5] = Rn
9677      instr[4,0] = Rd  */
9678
9679   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9680   uint32_t dispatch;
9681   uint32_t imms;
9682   uint32_t size = INSTR (31, 31);
9683   uint32_t N = INSTR (22, 22);
9684   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
9685   /* or else we have an UNALLOC.  */
9686   uint32_t immr = INSTR (21, 16);
9687
9688   if (~size & N)
9689     HALT_UNALLOC;
9690
9691   if (!size && uimm (immr, 5, 5))
9692     HALT_UNALLOC;
9693
9694   imms = INSTR (15, 10);
9695   if (!size && uimm (imms, 5, 5))
9696     HALT_UNALLOC;
9697
9698   /* Switch on combined size and op.  */
9699   dispatch = INSTR (31, 29);
9700   switch (dispatch)
9701     {
9702     case 0: sbfm32 (cpu, immr, imms); return;
9703     case 1: bfm32 (cpu, immr, imms); return;
9704     case 2: ubfm32 (cpu, immr, imms); return;
9705     case 4: sbfm (cpu, immr, imms); return;
9706     case 5: bfm (cpu, immr, imms); return;
9707     case 6: ubfm (cpu, immr, imms); return;
9708     default: HALT_UNALLOC;
9709     }
9710 }
9711
9712 static void
9713 do_EXTR_32 (sim_cpu *cpu)
9714 {
9715   /* instr[31:21] = 00010011100
9716      instr[20,16] = Rm
9717      instr[15,10] = imms :  0xxxxx for 32 bit
9718      instr[9,5]   = Rn
9719      instr[4,0]   = Rd  */
9720   unsigned rm   = INSTR (20, 16);
9721   unsigned imms = INSTR (15, 10) & 31;
9722   unsigned rn   = INSTR ( 9,  5);
9723   unsigned rd   = INSTR ( 4,  0);
9724   uint64_t val1;
9725   uint64_t val2;
9726
9727   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9728   val1 >>= imms;
9729   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9730   val2 <<= (32 - imms);
9731
9732   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9733 }
9734
9735 static void
9736 do_EXTR_64 (sim_cpu *cpu)
9737 {
9738   /* instr[31:21] = 10010011100
9739      instr[20,16] = Rm
9740      instr[15,10] = imms
9741      instr[9,5]   = Rn
9742      instr[4,0]   = Rd  */
9743   unsigned rm   = INSTR (20, 16);
9744   unsigned imms = INSTR (15, 10) & 63;
9745   unsigned rn   = INSTR ( 9,  5);
9746   unsigned rd   = INSTR ( 4,  0);
9747   uint64_t val;
9748
9749   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9750   val >>= imms;
9751   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9752
9753   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9754 }
9755
9756 static void
9757 dexExtractImmediate (sim_cpu *cpu)
9758 {
9759   /* assert instr[28:23] = 100111
9760      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
9761      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9762      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9763      instr[21]    = op0 : must be 0 or UNALLOC
9764      instr[20,16] = Rm
9765      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
9766      instr[9,5]   = Rn
9767      instr[4,0]   = Rd  */
9768
9769   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9770   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
9771   uint32_t dispatch;
9772   uint32_t size = INSTR (31, 31);
9773   uint32_t N = INSTR (22, 22);
9774   /* 32 bit operations must have imms[5] = 0
9775      or else we have an UNALLOC.  */
9776   uint32_t imms = INSTR (15, 10);
9777
9778   if (size ^ N)
9779     HALT_UNALLOC;
9780
9781   if (!size && uimm (imms, 5, 5))
9782     HALT_UNALLOC;
9783
9784   /* Switch on combined size and op.  */
9785   dispatch = INSTR (31, 29);
9786
9787   if (dispatch == 0)
9788     do_EXTR_32 (cpu);
9789
9790   else if (dispatch == 4)
9791     do_EXTR_64 (cpu);
9792
9793   else if (dispatch == 1)
9794     HALT_NYI;
9795   else
9796     HALT_UNALLOC;
9797 }
9798
9799 static void
9800 dexDPImm (sim_cpu *cpu)
9801 {
9802   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9803      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9804      bits [25,23] of a DPImm are the secondary dispatch vector.  */
9805   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9806
9807   switch (group2)
9808     {
9809     case DPIMM_PCADR_000:
9810     case DPIMM_PCADR_001:
9811       dexPCRelAddressing (cpu);
9812       return;
9813
9814     case DPIMM_ADDSUB_010:
9815     case DPIMM_ADDSUB_011:
9816       dexAddSubtractImmediate (cpu);
9817       return;
9818
9819     case DPIMM_LOG_100:
9820       dexLogicalImmediate (cpu);
9821       return;
9822
9823     case DPIMM_MOV_101:
9824       dexMoveWideImmediate (cpu);
9825       return;
9826
9827     case DPIMM_BITF_110:
9828       dexBitfieldImmediate (cpu);
9829       return;
9830
9831     case DPIMM_EXTR_111:
9832       dexExtractImmediate (cpu);
9833       return;
9834
9835     default:
9836       /* Should never reach here.  */
9837       HALT_NYI;
9838     }
9839 }
9840
9841 static void
9842 dexLoadUnscaledImmediate (sim_cpu *cpu)
9843 {
9844   /* instr[29,24] == 111_00
9845      instr[21] == 0
9846      instr[11,10] == 00
9847      instr[31,30] = size
9848      instr[26] = V
9849      instr[23,22] = opc
9850      instr[20,12] = simm9
9851      instr[9,5] = rn may be SP.  */
9852   /* unsigned rt = INSTR (4, 0);  */
9853   uint32_t V = INSTR (26, 26);
9854   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
9855   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9856
9857   if (!V)
9858     {
9859       /* GReg operations.  */
9860       switch (dispatch)
9861         {
9862         case 0:  sturb (cpu, imm); return;
9863         case 1:  ldurb32 (cpu, imm); return;
9864         case 2:  ldursb64 (cpu, imm); return;
9865         case 3:  ldursb32 (cpu, imm); return;
9866         case 4:  sturh (cpu, imm); return;
9867         case 5:  ldurh32 (cpu, imm); return;
9868         case 6:  ldursh64 (cpu, imm); return;
9869         case 7:  ldursh32 (cpu, imm); return;
9870         case 8:  stur32 (cpu, imm); return;
9871         case 9:  ldur32 (cpu, imm); return;
9872         case 10: ldursw (cpu, imm); return;
9873         case 12: stur64 (cpu, imm); return;
9874         case 13: ldur64 (cpu, imm); return;
9875
9876         case 14:
9877           /* PRFUM NYI.  */
9878           HALT_NYI;
9879
9880         default:
9881         case 11:
9882         case 15:
9883           HALT_UNALLOC;
9884         }
9885     }
9886
9887   /* FReg operations.  */
9888   switch (dispatch)
9889     {
9890     case 2:  fsturq (cpu, imm); return;
9891     case 3:  fldurq (cpu, imm); return;
9892     case 8:  fsturs (cpu, imm); return;
9893     case 9:  fldurs (cpu, imm); return;
9894     case 12: fsturd (cpu, imm); return;
9895     case 13: fldurd (cpu, imm); return;
9896
9897     case 0: /* STUR 8 bit FP.  */
9898     case 1: /* LDUR 8 bit FP.  */
9899     case 4: /* STUR 16 bit FP.  */
9900     case 5: /* LDUR 8 bit FP.  */
9901       HALT_NYI;
9902
9903     default:
9904     case 6:
9905     case 7:
9906     case 10:
9907     case 11:
9908     case 14:
9909     case 15:
9910       HALT_UNALLOC;
9911     }
9912 }
9913
9914 /*  N.B. A preliminary note regarding all the ldrs<x>32
9915     instructions
9916
9917    The signed value loaded by these instructions is cast to unsigned
9918    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9919    64 bit element of the GReg union. this performs a 32 bit sign extension
9920    (as required) but avoids 64 bit sign extension, thus ensuring that the
9921    top half of the register word is zero. this is what the spec demands
9922    when a 32 bit load occurs.  */
9923
9924 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
9925 static void
9926 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9927 {
9928   unsigned int rn = INSTR (9, 5);
9929   unsigned int rt = INSTR (4, 0);
9930
9931   /* The target register may not be SP but the source may be
9932      there is no scaling required for a byte load.  */
9933   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9934   aarch64_set_reg_u64 (cpu, rt, NO_SP,
9935                        (int64_t) aarch64_get_mem_s8 (cpu, address));
9936 }
9937
9938 /* 32 bit load sign-extended byte scaled or unscaled zero-
9939    or sign-extended 32-bit register offset.  */
9940 static void
9941 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9942 {
9943   unsigned int rm = INSTR (20, 16);
9944   unsigned int rn = INSTR (9, 5);
9945   unsigned int rt = INSTR (4, 0);
9946
9947   /* rn may reference SP, rm and rt must reference ZR.  */
9948
9949   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9950   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9951                                  extension);
9952
9953   /* There is no scaling required for a byte load.  */
9954   aarch64_set_reg_u64
9955     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9956                                                    + displacement));
9957 }
9958
9959 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9960    pre- or post-writeback.  */
9961 static void
9962 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9963 {
9964   uint64_t address;
9965   unsigned int rn = INSTR (9, 5);
9966   unsigned int rt = INSTR (4, 0);
9967
9968   if (rn == rt && wb != NoWriteBack)
9969     HALT_UNALLOC;
9970
9971   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9972
9973   if (wb == Pre)
9974       address += offset;
9975
9976   aarch64_set_reg_u64 (cpu, rt, NO_SP,
9977                        (int64_t) aarch64_get_mem_s8 (cpu, address));
9978
9979   if (wb == Post)
9980     address += offset;
9981
9982   if (wb != NoWriteBack)
9983     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9984 }
9985
9986 /* 8 bit store scaled.  */
9987 static void
9988 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9989 {
9990   unsigned st = INSTR (4, 0);
9991   unsigned rn = INSTR (9, 5);
9992
9993   aarch64_set_mem_u8 (cpu,
9994                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9995                       aarch64_get_vec_u8 (cpu, st, 0));
9996 }
9997
9998 /* 8 bit store scaled or unscaled zero- or
9999    sign-extended 8-bit register offset.  */
10000 static void
10001 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10002 {
10003   unsigned rm = INSTR (20, 16);
10004   unsigned rn = INSTR (9, 5);
10005   unsigned st = INSTR (4, 0);
10006
10007   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10008   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10009                                extension);
10010   uint64_t  displacement = scaling == Scaled ? extended : 0;
10011
10012   aarch64_set_mem_u8
10013     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10014 }
10015
10016 /* 16 bit store scaled.  */
10017 static void
10018 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10019 {
10020   unsigned st = INSTR (4, 0);
10021   unsigned rn = INSTR (9, 5);
10022
10023   aarch64_set_mem_u16
10024     (cpu,
10025      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10026      aarch64_get_vec_u16 (cpu, st, 0));
10027 }
10028
10029 /* 16 bit store scaled or unscaled zero-
10030    or sign-extended 16-bit register offset.  */
10031 static void
10032 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10033 {
10034   unsigned rm = INSTR (20, 16);
10035   unsigned rn = INSTR (9, 5);
10036   unsigned st = INSTR (4, 0);
10037
10038   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10039   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10040                                extension);
10041   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10042
10043   aarch64_set_mem_u16
10044     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10045 }
10046
10047 /* 32 bit store scaled unsigned 12 bit.  */
10048 static void
10049 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10050 {
10051   unsigned st = INSTR (4, 0);
10052   unsigned rn = INSTR (9, 5);
10053
10054   aarch64_set_mem_u32
10055     (cpu,
10056      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10057      aarch64_get_vec_u32 (cpu, st, 0));
10058 }
10059
10060 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10061 static void
10062 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10063 {
10064   unsigned rn = INSTR (9, 5);
10065   unsigned st = INSTR (4, 0);
10066
10067   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10068
10069   if (wb != Post)
10070     address += offset;
10071
10072   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10073
10074   if (wb == Post)
10075     address += offset;
10076
10077   if (wb != NoWriteBack)
10078     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10079 }
10080
10081 /* 32 bit store scaled or unscaled zero-
10082    or sign-extended 32-bit register offset.  */
10083 static void
10084 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10085 {
10086   unsigned rm = INSTR (20, 16);
10087   unsigned rn = INSTR (9, 5);
10088   unsigned st = INSTR (4, 0);
10089
10090   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10091   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10092                                extension);
10093   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10094
10095   aarch64_set_mem_u32
10096     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10097 }
10098
10099 /* 64 bit store scaled unsigned 12 bit.  */
10100 static void
10101 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10102 {
10103   unsigned st = INSTR (4, 0);
10104   unsigned rn = INSTR (9, 5);
10105
10106   aarch64_set_mem_u64
10107     (cpu,
10108      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10109      aarch64_get_vec_u64 (cpu, st, 0));
10110 }
10111
10112 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10113 static void
10114 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10115 {
10116   unsigned rn = INSTR (9, 5);
10117   unsigned st = INSTR (4, 0);
10118
10119   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10120
10121   if (wb != Post)
10122     address += offset;
10123
10124   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10125
10126   if (wb == Post)
10127     address += offset;
10128
10129   if (wb != NoWriteBack)
10130     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10131 }
10132
10133 /* 64 bit store scaled or unscaled zero-
10134    or sign-extended 32-bit register offset.  */
10135 static void
10136 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10137 {
10138   unsigned rm = INSTR (20, 16);
10139   unsigned rn = INSTR (9, 5);
10140   unsigned st = INSTR (4, 0);
10141
10142   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10143   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10144                                extension);
10145   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10146
10147   aarch64_set_mem_u64
10148     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10149 }
10150
10151 /* 128 bit store scaled unsigned 12 bit.  */
10152 static void
10153 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10154 {
10155   FRegister a;
10156   unsigned st = INSTR (4, 0);
10157   unsigned rn = INSTR (9, 5);
10158   uint64_t addr;
10159
10160   aarch64_get_FP_long_double (cpu, st, & a);
10161
10162   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10163   aarch64_set_mem_long_double (cpu, addr, a);
10164 }
10165
10166 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10167 static void
10168 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10169 {
10170   FRegister a;
10171   unsigned rn = INSTR (9, 5);
10172   unsigned st = INSTR (4, 0);
10173   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10174
10175   if (wb != Post)
10176     address += offset;
10177
10178   aarch64_get_FP_long_double (cpu, st, & a);
10179   aarch64_set_mem_long_double (cpu, address, a);
10180
10181   if (wb == Post)
10182     address += offset;
10183
10184   if (wb != NoWriteBack)
10185     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10186 }
10187
10188 /* 128 bit store scaled or unscaled zero-
10189    or sign-extended 32-bit register offset.  */
10190 static void
10191 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10192 {
10193   unsigned rm = INSTR (20, 16);
10194   unsigned rn = INSTR (9, 5);
10195   unsigned st = INSTR (4, 0);
10196
10197   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10198   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10199                                extension);
10200   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10201
10202   FRegister a;
10203
10204   aarch64_get_FP_long_double (cpu, st, & a);
10205   aarch64_set_mem_long_double (cpu, address + displacement, a);
10206 }
10207
10208 static void
10209 dexLoadImmediatePrePost (sim_cpu *cpu)
10210 {
10211   /* instr[31,30] = size
10212      instr[29,27] = 111
10213      instr[26]    = V
10214      instr[25,24] = 00
10215      instr[23,22] = opc
10216      instr[21]    = 0
10217      instr[20,12] = simm9
10218      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10219      instr[10]    = 0
10220      instr[9,5]   = Rn may be SP.
10221      instr[4,0]   = Rt */
10222
10223   uint32_t  V        = INSTR (26, 26);
10224   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10225   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10226   WriteBack wb       = INSTR (11, 11);
10227
10228   if (!V)
10229     {
10230       /* GReg operations.  */
10231       switch (dispatch)
10232         {
10233         case 0:  strb_wb (cpu, imm, wb); return;
10234         case 1:  ldrb32_wb (cpu, imm, wb); return;
10235         case 2:  ldrsb_wb (cpu, imm, wb); return;
10236         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10237         case 4:  strh_wb (cpu, imm, wb); return;
10238         case 5:  ldrh32_wb (cpu, imm, wb); return;
10239         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10240         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10241         case 8:  str32_wb (cpu, imm, wb); return;
10242         case 9:  ldr32_wb (cpu, imm, wb); return;
10243         case 10: ldrsw_wb (cpu, imm, wb); return;
10244         case 12: str_wb (cpu, imm, wb); return;
10245         case 13: ldr_wb (cpu, imm, wb); return;
10246
10247         default:
10248         case 11:
10249         case 14:
10250         case 15:
10251           HALT_UNALLOC;
10252         }
10253     }
10254
10255   /* FReg operations.  */
10256   switch (dispatch)
10257     {
10258     case 2:  fstrq_wb (cpu, imm, wb); return;
10259     case 3:  fldrq_wb (cpu, imm, wb); return;
10260     case 8:  fstrs_wb (cpu, imm, wb); return;
10261     case 9:  fldrs_wb (cpu, imm, wb); return;
10262     case 12: fstrd_wb (cpu, imm, wb); return;
10263     case 13: fldrd_wb (cpu, imm, wb); return;
10264
10265     case 0:       /* STUR 8 bit FP.  */
10266     case 1:       /* LDUR 8 bit FP.  */
10267     case 4:       /* STUR 16 bit FP.  */
10268     case 5:       /* LDUR 8 bit FP.  */
10269       HALT_NYI;
10270
10271     default:
10272     case 6:
10273     case 7:
10274     case 10:
10275     case 11:
10276     case 14:
10277     case 15:
10278       HALT_UNALLOC;
10279     }
10280 }
10281
10282 static void
10283 dexLoadRegisterOffset (sim_cpu *cpu)
10284 {
10285   /* instr[31,30] = size
10286      instr[29,27] = 111
10287      instr[26]    = V
10288      instr[25,24] = 00
10289      instr[23,22] = opc
10290      instr[21]    = 1
10291      instr[20,16] = rm
10292      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10293                              110 ==> SXTW, 111 ==> SXTX,
10294                              ow ==> RESERVED
10295      instr[12]    = scaled
10296      instr[11,10] = 10
10297      instr[9,5]   = rn
10298      instr[4,0]   = rt.  */
10299
10300   uint32_t  V = INSTR (26, 26);
10301   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10302   Scaling   scale = INSTR (12, 12);
10303   Extension extensionType = INSTR (15, 13);
10304
10305   /* Check for illegal extension types.  */
10306   if (uimm (extensionType, 1, 1) == 0)
10307     HALT_UNALLOC;
10308
10309   if (extensionType == UXTX || extensionType == SXTX)
10310     extensionType = NoExtension;
10311
10312   if (!V)
10313     {
10314       /* GReg operations.  */
10315       switch (dispatch)
10316         {
10317         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10318         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10319         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10320         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10321         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10322         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10323         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10324         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10325         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10326         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10327         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10328         case 12: str_scale_ext (cpu, scale, extensionType); return;
10329         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10330         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10331
10332         default:
10333         case 11:
10334         case 15:
10335           HALT_UNALLOC;
10336         }
10337     }
10338
10339   /* FReg operations.  */
10340   switch (dispatch)
10341     {
10342     case 1: /* LDUR 8 bit FP.  */
10343       HALT_NYI;
10344     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
10345     case 5: /* LDUR 8 bit FP.  */
10346       HALT_NYI;
10347     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
10348     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10349
10350     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
10351     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
10352     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
10353     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
10354     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10355
10356     default:
10357     case 6:
10358     case 7:
10359     case 10:
10360     case 11:
10361     case 14:
10362     case 15:
10363       HALT_UNALLOC;
10364     }
10365 }
10366
10367 static void
10368 dexLoadUnsignedImmediate (sim_cpu *cpu)
10369 {
10370   /* instr[29,24] == 111_01
10371      instr[31,30] = size
10372      instr[26]    = V
10373      instr[23,22] = opc
10374      instr[21,10] = uimm12 : unsigned immediate offset
10375      instr[9,5]   = rn may be SP.
10376      instr[4,0]   = rt.  */
10377
10378   uint32_t V = INSTR (26,26);
10379   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10380   uint32_t imm = INSTR (21, 10);
10381
10382   if (!V)
10383     {
10384       /* GReg operations.  */
10385       switch (dispatch)
10386         {
10387         case 0:  strb_abs (cpu, imm); return;
10388         case 1:  ldrb32_abs (cpu, imm); return;
10389         case 2:  ldrsb_abs (cpu, imm); return;
10390         case 3:  ldrsb32_abs (cpu, imm); return;
10391         case 4:  strh_abs (cpu, imm); return;
10392         case 5:  ldrh32_abs (cpu, imm); return;
10393         case 6:  ldrsh_abs (cpu, imm); return;
10394         case 7:  ldrsh32_abs (cpu, imm); return;
10395         case 8:  str32_abs (cpu, imm); return;
10396         case 9:  ldr32_abs (cpu, imm); return;
10397         case 10: ldrsw_abs (cpu, imm); return;
10398         case 12: str_abs (cpu, imm); return;
10399         case 13: ldr_abs (cpu, imm); return;
10400         case 14: prfm_abs (cpu, imm); return;
10401
10402         default:
10403         case 11:
10404         case 15:
10405           HALT_UNALLOC;
10406         }
10407     }
10408
10409   /* FReg operations.  */
10410   switch (dispatch)
10411     {
10412     case 0:  fstrb_abs (cpu, imm); return;
10413     case 4:  fstrh_abs (cpu, imm); return;
10414     case 8:  fstrs_abs (cpu, imm); return;
10415     case 12: fstrd_abs (cpu, imm); return;
10416     case 2:  fstrq_abs (cpu, imm); return;
10417
10418     case 1:  fldrb_abs (cpu, imm); return;
10419     case 5:  fldrh_abs (cpu, imm); return;
10420     case 9:  fldrs_abs (cpu, imm); return;
10421     case 13: fldrd_abs (cpu, imm); return;
10422     case 3:  fldrq_abs (cpu, imm); return;
10423
10424     default:
10425     case 6:
10426     case 7:
10427     case 10:
10428     case 11:
10429     case 14:
10430     case 15:
10431       HALT_UNALLOC;
10432     }
10433 }
10434
10435 static void
10436 dexLoadExclusive (sim_cpu *cpu)
10437 {
10438   /* assert instr[29:24] = 001000;
10439      instr[31,30] = size
10440      instr[23] = 0 if exclusive
10441      instr[22] = L : 1 if load, 0 if store
10442      instr[21] = 1 if pair
10443      instr[20,16] = Rs
10444      instr[15] = o0 : 1 if ordered
10445      instr[14,10] = Rt2
10446      instr[9,5] = Rn
10447      instr[4.0] = Rt.  */
10448
10449   switch (INSTR (22, 21))
10450     {
10451     case 2:   ldxr (cpu); return;
10452     case 0:   stxr (cpu); return;
10453     default:  HALT_NYI;
10454     }
10455 }
10456
10457 static void
10458 dexLoadOther (sim_cpu *cpu)
10459 {
10460   uint32_t dispatch;
10461
10462   /* instr[29,25] = 111_0
10463      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10464      instr[21:11,10] is the secondary dispatch.  */
10465   if (INSTR (24, 24))
10466     {
10467       dexLoadUnsignedImmediate (cpu);
10468       return;
10469     }
10470
10471   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10472   switch (dispatch)
10473     {
10474     case 0: dexLoadUnscaledImmediate (cpu); return;
10475     case 1: dexLoadImmediatePrePost (cpu); return;
10476     case 3: dexLoadImmediatePrePost (cpu); return;
10477     case 6: dexLoadRegisterOffset (cpu); return;
10478
10479     default:
10480     case 2:
10481     case 4:
10482     case 5:
10483     case 7:
10484       HALT_NYI;
10485     }
10486 }
10487
10488 static void
10489 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10490 {
10491   unsigned rn = INSTR (14, 10);
10492   unsigned rd = INSTR (9, 5);
10493   unsigned rm = INSTR (4, 0);
10494   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10495
10496   if ((rn == rd || rm == rd) && wb != NoWriteBack)
10497     HALT_UNALLOC; /* ??? */
10498
10499   offset <<= 2;
10500
10501   if (wb != Post)
10502     address += offset;
10503
10504   aarch64_set_mem_u32 (cpu, address,
10505                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
10506   aarch64_set_mem_u32 (cpu, address + 4,
10507                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
10508
10509   if (wb == Post)
10510     address += offset;
10511
10512   if (wb != NoWriteBack)
10513     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10514 }
10515
10516 static void
10517 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10518 {
10519   unsigned rn = INSTR (14, 10);
10520   unsigned rd = INSTR (9, 5);
10521   unsigned rm = INSTR (4, 0);
10522   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10523
10524   if ((rn == rd || rm == rd) && wb != NoWriteBack)
10525     HALT_UNALLOC; /* ??? */
10526
10527   offset <<= 3;
10528
10529   if (wb != Post)
10530     address += offset;
10531
10532   aarch64_set_mem_u64 (cpu, address,
10533                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
10534   aarch64_set_mem_u64 (cpu, address + 8,
10535                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
10536
10537   if (wb == Post)
10538     address += offset;
10539
10540   if (wb != NoWriteBack)
10541     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10542 }
10543
10544 static void
10545 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10546 {
10547   unsigned rn = INSTR (14, 10);
10548   unsigned rd = INSTR (9, 5);
10549   unsigned rm = INSTR (4, 0);
10550   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10551
10552   /* Treat this as unalloc to make sure we don't do it.  */
10553   if (rn == rm)
10554     HALT_UNALLOC;
10555
10556   offset <<= 2;
10557
10558   if (wb != Post)
10559     address += offset;
10560
10561   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10562   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10563
10564   if (wb == Post)
10565     address += offset;
10566
10567   if (wb != NoWriteBack)
10568     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10569 }
10570
10571 static void
10572 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10573 {
10574   unsigned rn = INSTR (14, 10);
10575   unsigned rd = INSTR (9, 5);
10576   unsigned rm = INSTR (4, 0);
10577   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10578
10579   /* Treat this as unalloc to make sure we don't do it.  */
10580   if (rn == rm)
10581     HALT_UNALLOC;
10582
10583   offset <<= 2;
10584
10585   if (wb != Post)
10586     address += offset;
10587
10588   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10589   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10590
10591   if (wb == Post)
10592     address += offset;
10593
10594   if (wb != NoWriteBack)
10595     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10596 }
10597
10598 static void
10599 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10600 {
10601   unsigned rn = INSTR (14, 10);
10602   unsigned rd = INSTR (9, 5);
10603   unsigned rm = INSTR (4, 0);
10604   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10605
10606   /* Treat this as unalloc to make sure we don't do it.  */
10607   if (rn == rm)
10608     HALT_UNALLOC;
10609
10610   offset <<= 3;
10611
10612   if (wb != Post)
10613     address += offset;
10614
10615   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10616   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10617
10618   if (wb == Post)
10619     address += offset;
10620
10621   if (wb != NoWriteBack)
10622     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10623 }
10624
10625 static void
10626 dex_load_store_pair_gr (sim_cpu *cpu)
10627 {
10628   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10629      instr[29,25] = instruction encoding: 101_0
10630      instr[26]    = V : 1 if fp 0 if gp
10631      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10632      instr[22]    = load/store (1=> load)
10633      instr[21,15] = signed, scaled, offset
10634      instr[14,10] = Rn
10635      instr[ 9, 5] = Rd
10636      instr[ 4, 0] = Rm.  */
10637
10638   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10639   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10640
10641   switch (dispatch)
10642     {
10643     case 2: store_pair_u32 (cpu, offset, Post); return;
10644     case 3: load_pair_u32  (cpu, offset, Post); return;
10645     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10646     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
10647     case 6: store_pair_u32 (cpu, offset, Pre); return;
10648     case 7: load_pair_u32  (cpu, offset, Pre); return;
10649
10650     case 11: load_pair_s32  (cpu, offset, Post); return;
10651     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
10652     case 15: load_pair_s32  (cpu, offset, Pre); return;
10653
10654     case 18: store_pair_u64 (cpu, offset, Post); return;
10655     case 19: load_pair_u64  (cpu, offset, Post); return;
10656     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10657     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
10658     case 22: store_pair_u64 (cpu, offset, Pre); return;
10659     case 23: load_pair_u64  (cpu, offset, Pre); return;
10660
10661     default:
10662       HALT_UNALLOC;
10663     }
10664 }
10665
10666 static void
10667 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10668 {
10669   unsigned rn = INSTR (14, 10);
10670   unsigned rd = INSTR (9, 5);
10671   unsigned rm = INSTR (4, 0);
10672   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10673
10674   offset <<= 2;
10675
10676   if (wb != Post)
10677     address += offset;
10678
10679   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
10680   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10681
10682   if (wb == Post)
10683     address += offset;
10684
10685   if (wb != NoWriteBack)
10686     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10687 }
10688
10689 static void
10690 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10691 {
10692   unsigned rn = INSTR (14, 10);
10693   unsigned rd = INSTR (9, 5);
10694   unsigned rm = INSTR (4, 0);
10695   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10696
10697   offset <<= 3;
10698
10699   if (wb != Post)
10700     address += offset;
10701
10702   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
10703   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10704
10705   if (wb == Post)
10706     address += offset;
10707
10708   if (wb != NoWriteBack)
10709     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10710 }
10711
10712 static void
10713 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10714 {
10715   FRegister a;
10716   unsigned rn = INSTR (14, 10);
10717   unsigned rd = INSTR (9, 5);
10718   unsigned rm = INSTR (4, 0);
10719   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10720
10721   offset <<= 4;
10722
10723   if (wb != Post)
10724     address += offset;
10725
10726   aarch64_get_FP_long_double (cpu, rm, & a);
10727   aarch64_set_mem_long_double (cpu, address, a);
10728   aarch64_get_FP_long_double (cpu, rn, & a);
10729   aarch64_set_mem_long_double (cpu, address + 16, a);
10730
10731   if (wb == Post)
10732     address += offset;
10733
10734   if (wb != NoWriteBack)
10735     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10736 }
10737
10738 static void
10739 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10740 {
10741   unsigned rn = INSTR (14, 10);
10742   unsigned rd = INSTR (9, 5);
10743   unsigned rm = INSTR (4, 0);
10744   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10745
10746   if (rm == rn)
10747     HALT_UNALLOC;
10748
10749   offset <<= 2;
10750
10751   if (wb != Post)
10752     address += offset;
10753
10754   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10755   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10756
10757   if (wb == Post)
10758     address += offset;
10759
10760   if (wb != NoWriteBack)
10761     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10762 }
10763
10764 static void
10765 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10766 {
10767   unsigned rn = INSTR (14, 10);
10768   unsigned rd = INSTR (9, 5);
10769   unsigned rm = INSTR (4, 0);
10770   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10771
10772   if (rm == rn)
10773     HALT_UNALLOC;
10774
10775   offset <<= 3;
10776
10777   if (wb != Post)
10778     address += offset;
10779
10780   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10781   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10782
10783   if (wb == Post)
10784     address += offset;
10785
10786   if (wb != NoWriteBack)
10787     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10788 }
10789
10790 static void
10791 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10792 {
10793   FRegister a;
10794   unsigned rn = INSTR (14, 10);
10795   unsigned rd = INSTR (9, 5);
10796   unsigned rm = INSTR (4, 0);
10797   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10798
10799   if (rm == rn)
10800     HALT_UNALLOC;
10801
10802   offset <<= 4;
10803
10804   if (wb != Post)
10805     address += offset;
10806
10807   aarch64_get_mem_long_double (cpu, address, & a);
10808   aarch64_set_FP_long_double (cpu, rm, a);
10809   aarch64_get_mem_long_double (cpu, address + 16, & a);
10810   aarch64_set_FP_long_double (cpu, rn, a);
10811
10812   if (wb == Post)
10813     address += offset;
10814
10815   if (wb != NoWriteBack)
10816     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10817 }
10818
10819 static void
10820 dex_load_store_pair_fp (sim_cpu *cpu)
10821 {
10822   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10823      instr[29,25] = instruction encoding
10824      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10825      instr[22]    = load/store (1=> load)
10826      instr[21,15] = signed, scaled, offset
10827      instr[14,10] = Rn
10828      instr[ 9, 5] = Rd
10829      instr[ 4, 0] = Rm  */
10830
10831   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10832   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10833
10834   switch (dispatch)
10835     {
10836     case 2: store_pair_float (cpu, offset, Post); return;
10837     case 3: load_pair_float  (cpu, offset, Post); return;
10838     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10839     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
10840     case 6: store_pair_float (cpu, offset, Pre); return;
10841     case 7: load_pair_float  (cpu, offset, Pre); return;
10842
10843     case 10: store_pair_double (cpu, offset, Post); return;
10844     case 11: load_pair_double  (cpu, offset, Post); return;
10845     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10846     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
10847     case 14: store_pair_double (cpu, offset, Pre); return;
10848     case 15: load_pair_double  (cpu, offset, Pre); return;
10849
10850     case 18: store_pair_long_double (cpu, offset, Post); return;
10851     case 19: load_pair_long_double  (cpu, offset, Post); return;
10852     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10853     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
10854     case 22: store_pair_long_double (cpu, offset, Pre); return;
10855     case 23: load_pair_long_double  (cpu, offset, Pre); return;
10856
10857     default:
10858       HALT_UNALLOC;
10859     }
10860 }
10861
10862 static inline unsigned
10863 vec_reg (unsigned v, unsigned o)
10864 {
10865   return (v + o) & 0x3F;
10866 }
10867
10868 /* Load multiple N-element structures to N consecutive registers.  */
10869 static void
10870 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10871 {
10872   int      all  = INSTR (30, 30);
10873   unsigned size = INSTR (11, 10);
10874   unsigned vd   = INSTR (4, 0);
10875   unsigned i;
10876
10877   switch (size)
10878     {
10879     case 0: /* 8-bit operations.  */
10880       if (all)
10881         for (i = 0; i < (16 * N); i++)
10882           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10883                               aarch64_get_mem_u8 (cpu, address + i));
10884       else
10885         for (i = 0; i < (8 * N); i++)
10886           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10887                               aarch64_get_mem_u8 (cpu, address + i));
10888       return;
10889
10890     case 1: /* 16-bit operations.  */
10891       if (all)
10892         for (i = 0; i < (8 * N); i++)
10893           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10894                                aarch64_get_mem_u16 (cpu, address + i * 2));
10895       else
10896         for (i = 0; i < (4 * N); i++)
10897           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10898                                aarch64_get_mem_u16 (cpu, address + i * 2));
10899       return;
10900
10901     case 2: /* 32-bit operations.  */
10902       if (all)
10903         for (i = 0; i < (4 * N); i++)
10904           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10905                                aarch64_get_mem_u32 (cpu, address + i * 4));
10906       else
10907         for (i = 0; i < (2 * N); i++)
10908           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10909                                aarch64_get_mem_u32 (cpu, address + i * 4));
10910       return;
10911
10912     case 3: /* 64-bit operations.  */
10913       if (all)
10914         for (i = 0; i < (2 * N); i++)
10915           aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10916                                aarch64_get_mem_u64 (cpu, address + i * 8));
10917       else
10918         for (i = 0; i < N; i++)
10919           aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10920                                aarch64_get_mem_u64 (cpu, address + i * 8));
10921       return;
10922     }
10923 }
10924
10925 /* LD4: load multiple 4-element to four consecutive registers.  */
10926 static void
10927 LD4 (sim_cpu *cpu, uint64_t address)
10928 {
10929   vec_load (cpu, address, 4);
10930 }
10931
10932 /* LD3: load multiple 3-element structures to three consecutive registers.  */
10933 static void
10934 LD3 (sim_cpu *cpu, uint64_t address)
10935 {
10936   vec_load (cpu, address, 3);
10937 }
10938
10939 /* LD2: load multiple 2-element structures to two consecutive registers.  */
10940 static void
10941 LD2 (sim_cpu *cpu, uint64_t address)
10942 {
10943   vec_load (cpu, address, 2);
10944 }
10945
10946 /* Load multiple 1-element structures into one register.  */
10947 static void
10948 LD1_1 (sim_cpu *cpu, uint64_t address)
10949 {
10950   int      all  = INSTR (30, 30);
10951   unsigned size = INSTR (11, 10);
10952   unsigned vd   = INSTR (4, 0);
10953   unsigned i;
10954
10955   switch (size)
10956     {
10957     case 0:
10958       /* LD1 {Vd.16b}, addr, #16 */
10959       /* LD1 {Vd.8b}, addr, #8 */
10960       for (i = 0; i < (all ? 16 : 8); i++)
10961         aarch64_set_vec_u8 (cpu, vd, i,
10962                             aarch64_get_mem_u8 (cpu, address + i));
10963       return;
10964
10965     case 1:
10966       /* LD1 {Vd.8h}, addr, #16 */
10967       /* LD1 {Vd.4h}, addr, #8 */
10968       for (i = 0; i < (all ? 8 : 4); i++)
10969         aarch64_set_vec_u16 (cpu, vd, i,
10970                              aarch64_get_mem_u16 (cpu, address + i * 2));
10971       return;
10972
10973     case 2:
10974       /* LD1 {Vd.4s}, addr, #16 */
10975       /* LD1 {Vd.2s}, addr, #8 */
10976       for (i = 0; i < (all ? 4 : 2); i++)
10977         aarch64_set_vec_u32 (cpu, vd, i,
10978                              aarch64_get_mem_u32 (cpu, address + i * 4));
10979       return;
10980
10981     case 3:
10982       /* LD1 {Vd.2d}, addr, #16 */
10983       /* LD1 {Vd.1d}, addr, #8 */
10984       for (i = 0; i < (all ? 2 : 1); i++)
10985         aarch64_set_vec_u64 (cpu, vd, i,
10986                              aarch64_get_mem_u64 (cpu, address + i * 8));
10987       return;
10988     }
10989 }
10990
10991 /* Load multiple 1-element structures into two registers.  */
10992 static void
10993 LD1_2 (sim_cpu *cpu, uint64_t address)
10994 {
10995   /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10996      So why have two different instructions ?  There must be something
10997      wrong somewhere.  */
10998   vec_load (cpu, address, 2);
10999 }
11000
11001 /* Load multiple 1-element structures into three registers.  */
11002 static void
11003 LD1_3 (sim_cpu *cpu, uint64_t address)
11004 {
11005   /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11006      So why have two different instructions ?  There must be something
11007      wrong somewhere.  */
11008   vec_load (cpu, address, 3);
11009 }
11010
11011 /* Load multiple 1-element structures into four registers.  */
11012 static void
11013 LD1_4 (sim_cpu *cpu, uint64_t address)
11014 {
11015   /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11016      So why have two different instructions ?  There must be something
11017      wrong somewhere.  */
11018   vec_load (cpu, address, 4);
11019 }
11020
11021 /* Store multiple N-element structures to N consecutive registers.  */
11022 static void
11023 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11024 {
11025   int      all  = INSTR (30, 30);
11026   unsigned size = INSTR (11, 10);
11027   unsigned vd   = INSTR (4, 0);
11028   unsigned i;
11029
11030   switch (size)
11031     {
11032     case 0: /* 8-bit operations.  */
11033       if (all)
11034         for (i = 0; i < (16 * N); i++)
11035           aarch64_set_mem_u8
11036             (cpu, address + i,
11037              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11038       else
11039         for (i = 0; i < (8 * N); i++)
11040           aarch64_set_mem_u8
11041             (cpu, address + i,
11042              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11043       return;
11044
11045     case 1: /* 16-bit operations.  */
11046       if (all)
11047         for (i = 0; i < (8 * N); i++)
11048           aarch64_set_mem_u16
11049             (cpu, address + i * 2,
11050              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11051       else
11052         for (i = 0; i < (4 * N); i++)
11053           aarch64_set_mem_u16
11054             (cpu, address + i * 2,
11055              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11056       return;
11057
11058     case 2: /* 32-bit operations.  */
11059       if (all)
11060         for (i = 0; i < (4 * N); i++)
11061           aarch64_set_mem_u32
11062             (cpu, address + i * 4,
11063              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11064       else
11065         for (i = 0; i < (2 * N); i++)
11066           aarch64_set_mem_u32
11067             (cpu, address + i * 4,
11068              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11069       return;
11070
11071     case 3: /* 64-bit operations.  */
11072       if (all)
11073         for (i = 0; i < (2 * N); i++)
11074           aarch64_set_mem_u64
11075             (cpu, address + i * 8,
11076              aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11077       else
11078         for (i = 0; i < N; i++)
11079           aarch64_set_mem_u64
11080             (cpu, address + i * 8,
11081              aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11082       return;
11083     }
11084 }
11085
11086 /* Store multiple 4-element structure to four consecutive registers.  */
11087 static void
11088 ST4 (sim_cpu *cpu, uint64_t address)
11089 {
11090   vec_store (cpu, address, 4);
11091 }
11092
11093 /* Store multiple 3-element structures to three consecutive registers.  */
11094 static void
11095 ST3 (sim_cpu *cpu, uint64_t address)
11096 {
11097   vec_store (cpu, address, 3);
11098 }
11099
11100 /* Store multiple 2-element structures to two consecutive registers.  */
11101 static void
11102 ST2 (sim_cpu *cpu, uint64_t address)
11103 {
11104   vec_store (cpu, address, 2);
11105 }
11106
11107 /* Store multiple 1-element structures into one register.  */
11108 static void
11109 ST1_1 (sim_cpu *cpu, uint64_t address)
11110 {
11111   int      all  = INSTR (30, 30);
11112   unsigned size = INSTR (11, 10);
11113   unsigned vd   = INSTR (4, 0);
11114   unsigned i;
11115
11116   switch (size)
11117     {
11118     case 0:
11119       for (i = 0; i < (all ? 16 : 8); i++)
11120         aarch64_set_mem_u8 (cpu, address + i,
11121                             aarch64_get_vec_u8 (cpu, vd, i));
11122       return;
11123
11124     case 1:
11125       for (i = 0; i < (all ? 8 : 4); i++)
11126         aarch64_set_mem_u16 (cpu, address + i * 2,
11127                              aarch64_get_vec_u16 (cpu, vd, i));
11128       return;
11129
11130     case 2:
11131       for (i = 0; i < (all ? 4 : 2); i++)
11132         aarch64_set_mem_u32 (cpu, address + i * 4,
11133                              aarch64_get_vec_u32 (cpu, vd, i));
11134       return;
11135
11136     case 3:
11137       for (i = 0; i < (all ? 2 : 1); i++)
11138         aarch64_set_mem_u64 (cpu, address + i * 8,
11139                              aarch64_get_vec_u64 (cpu, vd, i));
11140       return;
11141     }
11142 }
11143
11144 /* Store multiple 1-element structures into two registers.  */
11145 static void
11146 ST1_2 (sim_cpu *cpu, uint64_t address)
11147 {
11148   /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11149      So why have two different instructions ?  There must be
11150      something wrong somewhere.  */
11151   vec_store (cpu, address, 2);
11152 }
11153
11154 /* Store multiple 1-element structures into three registers.  */
11155 static void
11156 ST1_3 (sim_cpu *cpu, uint64_t address)
11157 {
11158   /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11159      So why have two different instructions ?  There must be
11160      something wrong somewhere.  */
11161   vec_store (cpu, address, 3);
11162 }
11163
11164 /* Store multiple 1-element structures into four registers.  */
11165 static void
11166 ST1_4 (sim_cpu *cpu, uint64_t address)
11167 {
11168   /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11169      So why have two different instructions ?  There must be
11170      something wrong somewhere.  */
11171   vec_store (cpu, address, 4);
11172 }
11173
11174 static void
11175 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11176 {
11177   /* instr[31]    = 0
11178      instr[30]    = element selector 0=>half, 1=>all elements
11179      instr[29,24] = 00 1101
11180      instr[23]    = 0=>simple, 1=>post
11181      instr[22]    = 1
11182      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11183      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11184                       11111 (immediate post inc)
11185      instr[15,14] = 11
11186      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11187      instr[12]    = 0
11188      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11189                                  10=> word(s), 11=> double(d)
11190      instr[9,5]   = address
11191      instr[4,0]   = Vd  */
11192
11193   unsigned full = INSTR (30, 30);
11194   unsigned vd = INSTR (4, 0);
11195   unsigned size = INSTR (11, 10);
11196   int i;
11197
11198   NYI_assert (29, 24, 0x0D);
11199   NYI_assert (22, 22, 1);
11200   NYI_assert (15, 14, 3);
11201   NYI_assert (12, 12, 0);
11202
11203   switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11204     {
11205     case 0: /* LD1R.  */
11206       switch (size)
11207         {
11208         case 0:
11209           {
11210             uint8_t val = aarch64_get_mem_u8 (cpu, address);
11211             for (i = 0; i < (full ? 16 : 8); i++)
11212               aarch64_set_vec_u8 (cpu, vd, i, val);
11213             break;
11214           }
11215
11216         case 1:
11217           {
11218             uint16_t val = aarch64_get_mem_u16 (cpu, address);
11219             for (i = 0; i < (full ? 8 : 4); i++)
11220               aarch64_set_vec_u16 (cpu, vd, i, val);
11221             break;
11222           }
11223
11224         case 2:
11225           {
11226             uint32_t val = aarch64_get_mem_u32 (cpu, address);
11227             for (i = 0; i < (full ? 4 : 2); i++)
11228               aarch64_set_vec_u32 (cpu, vd, i, val);
11229             break;
11230           }
11231
11232         case 3:
11233           {
11234             uint64_t val = aarch64_get_mem_u64 (cpu, address);
11235             for (i = 0; i < (full ? 2 : 1); i++)
11236               aarch64_set_vec_u64 (cpu, vd, i, val);
11237             break;
11238           }
11239
11240         default:
11241           HALT_UNALLOC;
11242         }
11243       break;
11244
11245     case 1: /* LD2R.  */
11246       switch (size)
11247         {
11248         case 0:
11249           {
11250             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11251             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11252
11253             for (i = 0; i < (full ? 16 : 8); i++)
11254               {
11255                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11256                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11257               }
11258             break;
11259           }
11260
11261         case 1:
11262           {
11263             uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11264             uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11265
11266             for (i = 0; i < (full ? 8 : 4); i++)
11267               {
11268                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11269                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11270               }
11271             break;
11272           }
11273
11274         case 2:
11275           {
11276             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11277             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11278
11279             for (i = 0; i < (full ? 4 : 2); i++)
11280               {
11281                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11282                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11283               }
11284             break;
11285           }
11286
11287         case 3:
11288           {
11289             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11290             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11291
11292             for (i = 0; i < (full ? 2 : 1); i++)
11293               {
11294                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11295                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11296               }
11297             break;
11298           }
11299
11300         default:
11301           HALT_UNALLOC;
11302         }
11303       break;
11304
11305     case 2: /* LD3R.  */
11306       switch (size)
11307         {
11308         case 0:
11309           {
11310             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11311             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11312             uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11313
11314             for (i = 0; i < (full ? 16 : 8); i++)
11315               {
11316                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11317                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11318                 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11319               }
11320           }
11321           break;
11322
11323         case 1:
11324           {
11325             uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11326             uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11327             uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11328
11329             for (i = 0; i < (full ? 8 : 4); i++)
11330               {
11331                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11332                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11333                 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11334               }
11335           }
11336           break;
11337
11338         case 2:
11339           {
11340             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11341             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11342             uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11343
11344             for (i = 0; i < (full ? 4 : 2); i++)
11345               {
11346                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11347                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11348                 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11349               }
11350           }
11351           break;
11352
11353         case 3:
11354           {
11355             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11356             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11357             uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11358
11359             for (i = 0; i < (full ? 2 : 1); i++)
11360               {
11361                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11362                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11363                 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11364               }
11365           }
11366           break;
11367
11368         default:
11369           HALT_UNALLOC;
11370         }
11371       break;
11372
11373     case 3: /* LD4R.  */
11374       switch (size)
11375         {
11376         case 0:
11377           {
11378             uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11379             uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11380             uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11381             uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11382
11383             for (i = 0; i < (full ? 16 : 8); i++)
11384               {
11385                 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11386                 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11387                 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11388                 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11389               }
11390           }
11391           break;
11392
11393         case 1:
11394           {
11395             uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11396             uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11397             uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11398             uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11399
11400             for (i = 0; i < (full ? 8 : 4); i++)
11401               {
11402                 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11403                 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11404                 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11405                 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11406               }
11407           }
11408           break;
11409
11410         case 2:
11411           {
11412             uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11413             uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11414             uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11415             uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11416
11417             for (i = 0; i < (full ? 4 : 2); i++)
11418               {
11419                 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11420                 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11421                 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11422                 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11423               }
11424           }
11425           break;
11426
11427         case 3:
11428           {
11429             uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11430             uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11431             uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11432             uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11433
11434             for (i = 0; i < (full ? 2 : 1); i++)
11435               {
11436                 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11437                 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11438                 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11439                 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11440               }
11441           }
11442           break;
11443
11444         default:
11445           HALT_UNALLOC;
11446         }
11447       break;
11448
11449     default:
11450       HALT_UNALLOC;
11451     }
11452 }
11453
11454 static void
11455 do_vec_load_store (sim_cpu *cpu)
11456 {
11457   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11458
11459      instr[31]    = 0
11460      instr[30]    = element selector 0=>half, 1=>all elements
11461      instr[29,25] = 00110
11462      instr[24]    = ?
11463      instr[23]    = 0=>simple, 1=>post
11464      instr[22]    = 0=>store, 1=>load
11465      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
11466      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11467                     11111 (immediate post inc)
11468      instr[15,12] = elements and destinations.  eg for load:
11469                      0000=>LD4 => load multiple 4-element to
11470                      four consecutive registers
11471                      0100=>LD3 => load multiple 3-element to
11472                      three consecutive registers
11473                      1000=>LD2 => load multiple 2-element to
11474                      two consecutive registers
11475                      0010=>LD1 => load multiple 1-element to
11476                      four consecutive registers
11477                      0110=>LD1 => load multiple 1-element to
11478                      three consecutive registers
11479                      1010=>LD1 => load multiple 1-element to
11480                      two consecutive registers
11481                      0111=>LD1 => load multiple 1-element to
11482                      one register
11483                      1100=>LDR1,LDR2
11484                      1110=>LDR3,LDR4
11485      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11486                                  10=> word(s), 11=> double(d)
11487      instr[9,5]   = Vn, can be SP
11488      instr[4,0]   = Vd  */
11489
11490   int post;
11491   int load;
11492   unsigned vn;
11493   uint64_t address;
11494   int type;
11495
11496   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11497     HALT_NYI;
11498
11499   type = INSTR (15, 12);
11500   if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11501     HALT_NYI;
11502
11503   post = INSTR (23, 23);
11504   load = INSTR (22, 22);
11505   vn = INSTR (9, 5);
11506   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11507
11508   if (post)
11509     {
11510       unsigned vm = INSTR (20, 16);
11511
11512       if (vm == R31)
11513         {
11514           unsigned sizeof_operation;
11515
11516           switch (type)
11517             {
11518             case 0: sizeof_operation = 32; break;
11519             case 4: sizeof_operation = 24; break;
11520             case 8: sizeof_operation = 16; break;
11521
11522             case 0xC:
11523               sizeof_operation = INSTR (21, 21) ? 2 : 1;
11524               sizeof_operation <<= INSTR (11, 10);
11525               break;
11526
11527             case 0xE:
11528               sizeof_operation = INSTR (21, 21) ? 8 : 4;
11529               sizeof_operation <<= INSTR (11, 10);
11530               break;
11531
11532             case 7:
11533               /* One register, immediate offset variant.  */
11534               sizeof_operation = 8;
11535               break;
11536
11537             case 10:
11538               /* Two registers, immediate offset variant.  */
11539               sizeof_operation = 16;
11540               break;
11541
11542             case 6:
11543               /* Three registers, immediate offset variant.  */
11544               sizeof_operation = 24;
11545               break;
11546
11547             case 2:
11548               /* Four registers, immediate offset variant.  */
11549               sizeof_operation = 32;
11550               break;
11551
11552             default:
11553               HALT_UNALLOC;
11554             }
11555
11556           if (INSTR (30, 30))
11557             sizeof_operation *= 2;
11558
11559           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11560         }
11561       else
11562         aarch64_set_reg_u64 (cpu, vn, SP_OK,
11563                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11564     }
11565   else
11566     {
11567       NYI_assert (20, 16, 0);
11568     }
11569
11570   if (load)
11571     {
11572       switch (type)
11573         {
11574         case 0:  LD4 (cpu, address); return;
11575         case 4:  LD3 (cpu, address); return;
11576         case 8:  LD2 (cpu, address); return;
11577         case 2:  LD1_4 (cpu, address); return;
11578         case 6:  LD1_3 (cpu, address); return;
11579         case 10: LD1_2 (cpu, address); return;
11580         case 7:  LD1_1 (cpu, address); return;
11581
11582         case 0xE:
11583         case 0xC: do_vec_LDnR (cpu, address); return;
11584
11585         default:
11586           HALT_NYI;
11587         }
11588     }
11589
11590   /* Stores.  */
11591   switch (type)
11592     {
11593     case 0:  ST4 (cpu, address); return;
11594     case 4:  ST3 (cpu, address); return;
11595     case 8:  ST2 (cpu, address); return;
11596     case 2:  ST1_4 (cpu, address); return;
11597     case 6:  ST1_3 (cpu, address); return;
11598     case 10: ST1_2 (cpu, address); return;
11599     case 7:  ST1_1 (cpu, address); return;
11600     default:
11601       HALT_NYI;
11602     }
11603 }
11604
11605 static void
11606 dexLdSt (sim_cpu *cpu)
11607 {
11608   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11609      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11610              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11611      bits [29,28:26] of a LS are the secondary dispatch vector.  */
11612   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11613
11614   switch (group2)
11615     {
11616     case LS_EXCL_000:
11617       dexLoadExclusive (cpu); return;
11618
11619     case LS_LIT_010:
11620     case LS_LIT_011:
11621       dexLoadLiteral (cpu); return;
11622
11623     case LS_OTHER_110:
11624     case LS_OTHER_111:
11625       dexLoadOther (cpu); return;
11626
11627     case LS_ADVSIMD_001:
11628       do_vec_load_store (cpu); return;
11629
11630     case LS_PAIR_100:
11631       dex_load_store_pair_gr (cpu); return;
11632
11633     case LS_PAIR_101:
11634       dex_load_store_pair_fp (cpu); return;
11635
11636     default:
11637       /* Should never reach here.  */
11638       HALT_NYI;
11639     }
11640 }
11641
11642 /* Specific decode and execute for group Data Processing Register.  */
11643
11644 static void
11645 dexLogicalShiftedRegister (sim_cpu *cpu)
11646 {
11647   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
11648      instr[30,29] = op
11649      instr[28:24] = 01010
11650      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11651      instr[21]    = N
11652      instr[20,16] = Rm
11653      instr[15,10] = count : must be 0xxxxx for 32 bit
11654      instr[9,5]   = Rn
11655      instr[4,0]   = Rd  */
11656
11657   uint32_t size      = INSTR (31, 31);
11658   Shift    shiftType = INSTR (23, 22);
11659   uint32_t count     = INSTR (15, 10);
11660
11661   /* 32 bit operations must have count[5] = 0.
11662      or else we have an UNALLOC.  */
11663   if (size == 0 && uimm (count, 5, 5))
11664     HALT_UNALLOC;
11665
11666   /* Dispatch on size:op:N.  */
11667   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11668     {
11669     case 0: and32_shift  (cpu, shiftType, count); return;
11670     case 1: bic32_shift  (cpu, shiftType, count); return;
11671     case 2: orr32_shift  (cpu, shiftType, count); return;
11672     case 3: orn32_shift  (cpu, shiftType, count); return;
11673     case 4: eor32_shift  (cpu, shiftType, count); return;
11674     case 5: eon32_shift  (cpu, shiftType, count); return;
11675     case 6: ands32_shift (cpu, shiftType, count); return;
11676     case 7: bics32_shift (cpu, shiftType, count); return;
11677     case 8: and64_shift  (cpu, shiftType, count); return;
11678     case 9: bic64_shift  (cpu, shiftType, count); return;
11679     case 10:orr64_shift  (cpu, shiftType, count); return;
11680     case 11:orn64_shift  (cpu, shiftType, count); return;
11681     case 12:eor64_shift  (cpu, shiftType, count); return;
11682     case 13:eon64_shift  (cpu, shiftType, count); return;
11683     case 14:ands64_shift (cpu, shiftType, count); return;
11684     case 15:bics64_shift (cpu, shiftType, count); return;
11685     }
11686 }
11687
11688 /* 32 bit conditional select.  */
11689 static void
11690 csel32 (sim_cpu *cpu, CondCode cc)
11691 {
11692   unsigned rm = INSTR (20, 16);
11693   unsigned rn = INSTR (9, 5);
11694   unsigned rd = INSTR (4, 0);
11695
11696   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11697                        testConditionCode (cpu, cc)
11698                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11699                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11700 }
11701
11702 /* 64 bit conditional select.  */
11703 static void
11704 csel64 (sim_cpu *cpu, CondCode cc)
11705 {
11706   unsigned rm = INSTR (20, 16);
11707   unsigned rn = INSTR (9, 5);
11708   unsigned rd = INSTR (4, 0);
11709
11710   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11711                        testConditionCode (cpu, cc)
11712                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11713                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11714 }
11715
11716 /* 32 bit conditional increment.  */
11717 static void
11718 csinc32 (sim_cpu *cpu, CondCode cc)
11719 {
11720   unsigned rm = INSTR (20, 16);
11721   unsigned rn = INSTR (9, 5);
11722   unsigned rd = INSTR (4, 0);
11723
11724   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11725                        testConditionCode (cpu, cc)
11726                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11727                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11728 }
11729
11730 /* 64 bit conditional increment.  */
11731 static void
11732 csinc64 (sim_cpu *cpu, CondCode cc)
11733 {
11734   unsigned rm = INSTR (20, 16);
11735   unsigned rn = INSTR (9, 5);
11736   unsigned rd = INSTR (4, 0);
11737
11738   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11739                        testConditionCode (cpu, cc)
11740                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11741                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11742 }
11743
11744 /* 32 bit conditional invert.  */
11745 static void
11746 csinv32 (sim_cpu *cpu, CondCode cc)
11747 {
11748   unsigned rm = INSTR (20, 16);
11749   unsigned rn = INSTR (9, 5);
11750   unsigned rd = INSTR (4, 0);
11751
11752   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11753                        testConditionCode (cpu, cc)
11754                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11755                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11756 }
11757
11758 /* 64 bit conditional invert.  */
11759 static void
11760 csinv64 (sim_cpu *cpu, CondCode cc)
11761 {
11762   unsigned rm = INSTR (20, 16);
11763   unsigned rn = INSTR (9, 5);
11764   unsigned rd = INSTR (4, 0);
11765
11766   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11767                        testConditionCode (cpu, cc)
11768                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11769                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11770 }
11771
11772 /* 32 bit conditional negate.  */
11773 static void
11774 csneg32 (sim_cpu *cpu, CondCode cc)
11775 {
11776   unsigned rm = INSTR (20, 16);
11777   unsigned rn = INSTR (9, 5);
11778   unsigned rd = INSTR (4, 0);
11779
11780   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11781                        testConditionCode (cpu, cc)
11782                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11783                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11784 }
11785
11786 /* 64 bit conditional negate.  */
11787 static void
11788 csneg64 (sim_cpu *cpu, CondCode cc)
11789 {
11790   unsigned rm = INSTR (20, 16);
11791   unsigned rn = INSTR (9, 5);
11792   unsigned rd = INSTR (4, 0);
11793
11794   aarch64_set_reg_u64 (cpu, rd, NO_SP,
11795                        testConditionCode (cpu, cc)
11796                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11797                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11798 }
11799
11800 static void
11801 dexCondSelect (sim_cpu *cpu)
11802 {
11803   /* instr[28,21] = 11011011
11804      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
11805      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11806                             100 ==> CSINV, 101 ==> CSNEG,
11807                             _1_ ==> UNALLOC
11808      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11809      instr[15,12] = cond
11810      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
11811
11812   CondCode cc = INSTR (15, 12);
11813   uint32_t S = INSTR (29, 29);
11814   uint32_t op2 = INSTR (11, 10);
11815
11816   if (S == 1)
11817     HALT_UNALLOC;
11818
11819   if (op2 & 0x2)
11820     HALT_UNALLOC;
11821
11822   switch ((INSTR (31, 30) << 1) | op2)
11823     {
11824     case 0: csel32  (cpu, cc); return;
11825     case 1: csinc32 (cpu, cc); return;
11826     case 2: csinv32 (cpu, cc); return;
11827     case 3: csneg32 (cpu, cc); return;
11828     case 4: csel64  (cpu, cc); return;
11829     case 5: csinc64 (cpu, cc); return;
11830     case 6: csinv64 (cpu, cc); return;
11831     case 7: csneg64 (cpu, cc); return;
11832     }
11833 }
11834
11835 /* Some helpers for counting leading 1 or 0 bits.  */
11836
11837 /* Counts the number of leading bits which are the same
11838    in a 32 bit value in the range 1 to 32.  */
11839 static uint32_t
11840 leading32 (uint32_t value)
11841 {
11842   int32_t mask= 0xffff0000;
11843   uint32_t count= 16; /* Counts number of bits set in mask.  */
11844   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
11845   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
11846
11847   while (lo + 1 < hi)
11848     {
11849       int32_t test = (value & mask);
11850
11851       if (test == 0 || test == mask)
11852         {
11853           lo = count;
11854           count = (lo + hi) / 2;
11855           mask >>= (count - lo);
11856         }
11857       else
11858         {
11859           hi = count;
11860           count = (lo + hi) / 2;
11861           mask <<= hi - count;
11862         }
11863     }
11864
11865   if (lo != hi)
11866     {
11867       int32_t test;
11868
11869       mask >>= 1;
11870       test = (value & mask);
11871
11872       if (test == 0 || test == mask)
11873         count = hi;
11874       else
11875         count = lo;
11876     }
11877
11878   return count;
11879 }
11880
11881 /* Counts the number of leading bits which are the same
11882    in a 64 bit value in the range 1 to 64.  */
11883 static uint64_t
11884 leading64 (uint64_t value)
11885 {
11886   int64_t mask= 0xffffffff00000000LL;
11887   uint64_t count = 32; /* Counts number of bits set in mask.  */
11888   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
11889   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
11890
11891   while (lo + 1 < hi)
11892     {
11893       int64_t test = (value & mask);
11894
11895       if (test == 0 || test == mask)
11896         {
11897           lo = count;
11898           count = (lo + hi) / 2;
11899           mask >>= (count - lo);
11900         }
11901       else
11902         {
11903           hi = count;
11904           count = (lo + hi) / 2;
11905           mask <<= hi - count;
11906         }
11907     }
11908
11909   if (lo != hi)
11910     {
11911       int64_t test;
11912
11913       mask >>= 1;
11914       test = (value & mask);
11915
11916       if (test == 0 || test == mask)
11917         count = hi;
11918       else
11919         count = lo;
11920     }
11921
11922   return count;
11923 }
11924
11925 /* Bit operations.  */
11926 /* N.B register args may not be SP.  */
11927
11928 /* 32 bit count leading sign bits.  */
11929 static void
11930 cls32 (sim_cpu *cpu)
11931 {
11932   unsigned rn = INSTR (9, 5);
11933   unsigned rd = INSTR (4, 0);
11934
11935   /* N.B. the result needs to exclude the leading bit.  */
11936   aarch64_set_reg_u64
11937     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11938 }
11939
11940 /* 64 bit count leading sign bits.  */
11941 static void
11942 cls64 (sim_cpu *cpu)
11943 {
11944   unsigned rn = INSTR (9, 5);
11945   unsigned rd = INSTR (4, 0);
11946
11947   /* N.B. the result needs to exclude the leading bit.  */
11948   aarch64_set_reg_u64
11949     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11950 }
11951
11952 /* 32 bit count leading zero bits.  */
11953 static void
11954 clz32 (sim_cpu *cpu)
11955 {
11956   unsigned rn = INSTR (9, 5);
11957   unsigned rd = INSTR (4, 0);
11958   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11959
11960   /* if the sign (top) bit is set then the count is 0.  */
11961   if (pick32 (value, 31, 31))
11962     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11963   else
11964     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11965 }
11966
11967 /* 64 bit count leading zero bits.  */
11968 static void
11969 clz64 (sim_cpu *cpu)
11970 {
11971   unsigned rn = INSTR (9, 5);
11972   unsigned rd = INSTR (4, 0);
11973   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11974
11975   /* if the sign (top) bit is set then the count is 0.  */
11976   if (pick64 (value, 63, 63))
11977     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11978   else
11979     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11980 }
11981
11982 /* 32 bit reverse bits.  */
11983 static void
11984 rbit32 (sim_cpu *cpu)
11985 {
11986   unsigned rn = INSTR (9, 5);
11987   unsigned rd = INSTR (4, 0);
11988   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11989   uint32_t result = 0;
11990   int i;
11991
11992   for (i = 0; i < 32; i++)
11993     {
11994       result <<= 1;
11995       result |= (value & 1);
11996       value >>= 1;
11997     }
11998   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11999 }
12000
12001 /* 64 bit reverse bits.  */
12002 static void
12003 rbit64 (sim_cpu *cpu)
12004 {
12005   unsigned rn = INSTR (9, 5);
12006   unsigned rd = INSTR (4, 0);
12007   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12008   uint64_t result = 0;
12009   int i;
12010
12011   for (i = 0; i < 64; i++)
12012     {
12013       result <<= 1;
12014       result |= (value & 1UL);
12015       value >>= 1;
12016     }
12017   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12018 }
12019
12020 /* 32 bit reverse bytes.  */
12021 static void
12022 rev32 (sim_cpu *cpu)
12023 {
12024   unsigned rn = INSTR (9, 5);
12025   unsigned rd = INSTR (4, 0);
12026   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12027   uint32_t result = 0;
12028   int i;
12029
12030   for (i = 0; i < 4; i++)
12031     {
12032       result <<= 8;
12033       result |= (value & 0xff);
12034       value >>= 8;
12035     }
12036   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12037 }
12038
12039 /* 64 bit reverse bytes.  */
12040 static void
12041 rev64 (sim_cpu *cpu)
12042 {
12043   unsigned rn = INSTR (9, 5);
12044   unsigned rd = INSTR (4, 0);
12045   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12046   uint64_t result = 0;
12047   int i;
12048
12049   for (i = 0; i < 8; i++)
12050     {
12051       result <<= 8;
12052       result |= (value & 0xffULL);
12053       value >>= 8;
12054     }
12055   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12056 }
12057
12058 /* 32 bit reverse shorts.  */
12059 /* N.B.this reverses the order of the bytes in each half word.  */
12060 static void
12061 revh32 (sim_cpu *cpu)
12062 {
12063   unsigned rn = INSTR (9, 5);
12064   unsigned rd = INSTR (4, 0);
12065   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12066   uint32_t result = 0;
12067   int i;
12068
12069   for (i = 0; i < 2; i++)
12070     {
12071       result <<= 8;
12072       result |= (value & 0x00ff00ff);
12073       value >>= 8;
12074     }
12075   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12076 }
12077
12078 /* 64 bit reverse shorts.  */
12079 /* N.B.this reverses the order of the bytes in each half word.  */
12080 static void
12081 revh64 (sim_cpu *cpu)
12082 {
12083   unsigned rn = INSTR (9, 5);
12084   unsigned rd = INSTR (4, 0);
12085   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12086   uint64_t result = 0;
12087   int i;
12088
12089   for (i = 0; i < 2; i++)
12090     {
12091       result <<= 8;
12092       result |= (value & 0x00ff00ff00ff00ffULL);
12093       value >>= 8;
12094     }
12095   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12096 }
12097
12098 static void
12099 dexDataProc1Source (sim_cpu *cpu)
12100 {
12101   /* instr[30]    = 1
12102      instr[28,21] = 111010110
12103      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12104      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12105      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12106      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12107                              000010 ==> REV, 000011 ==> UNALLOC
12108                              000100 ==> CLZ, 000101 ==> CLS
12109                              ow ==> UNALLOC
12110      instr[9,5]   = rn : may not be SP
12111      instr[4,0]   = rd : may not be SP.  */
12112
12113   uint32_t S = INSTR (29, 29);
12114   uint32_t opcode2 = INSTR (20, 16);
12115   uint32_t opcode = INSTR (15, 10);
12116   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12117
12118   if (S == 1)
12119     HALT_UNALLOC;
12120
12121   if (opcode2 != 0)
12122     HALT_UNALLOC;
12123
12124   if (opcode & 0x38)
12125     HALT_UNALLOC;
12126
12127   switch (dispatch)
12128     {
12129     case 0: rbit32 (cpu); return;
12130     case 1: revh32 (cpu); return;
12131     case 2: rev32 (cpu); return;
12132     case 4: clz32 (cpu); return;
12133     case 5: cls32 (cpu); return;
12134     case 8: rbit64 (cpu); return;
12135     case 9: revh64 (cpu); return;
12136     case 10:rev32 (cpu); return;
12137     case 11:rev64 (cpu); return;
12138     case 12:clz64 (cpu); return;
12139     case 13:cls64 (cpu); return;
12140     default: HALT_UNALLOC;
12141     }
12142 }
12143
12144 /* Variable shift.
12145    Shifts by count supplied in register.
12146    N.B register args may not be SP.
12147    These all use the shifted auxiliary function for
12148    simplicity and clarity.  Writing the actual shift
12149    inline would avoid a branch and so be faster but
12150    would also necessitate getting signs right.  */
12151
12152 /* 32 bit arithmetic shift right.  */
12153 static void
12154 asrv32 (sim_cpu *cpu)
12155 {
12156   unsigned rm = INSTR (20, 16);
12157   unsigned rn = INSTR (9, 5);
12158   unsigned rd = INSTR (4, 0);
12159
12160   aarch64_set_reg_u64
12161     (cpu, rd, NO_SP,
12162      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12163                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12164 }
12165
12166 /* 64 bit arithmetic shift right.  */
12167 static void
12168 asrv64 (sim_cpu *cpu)
12169 {
12170   unsigned rm = INSTR (20, 16);
12171   unsigned rn = INSTR (9, 5);
12172   unsigned rd = INSTR (4, 0);
12173
12174   aarch64_set_reg_u64
12175     (cpu, rd, NO_SP,
12176      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12177                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12178 }
12179
12180 /* 32 bit logical shift left.  */
12181 static void
12182 lslv32 (sim_cpu *cpu)
12183 {
12184   unsigned rm = INSTR (20, 16);
12185   unsigned rn = INSTR (9, 5);
12186   unsigned rd = INSTR (4, 0);
12187
12188   aarch64_set_reg_u64
12189     (cpu, rd, NO_SP,
12190      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12191                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12192 }
12193
12194 /* 64 bit arithmetic shift left.  */
12195 static void
12196 lslv64 (sim_cpu *cpu)
12197 {
12198   unsigned rm = INSTR (20, 16);
12199   unsigned rn = INSTR (9, 5);
12200   unsigned rd = INSTR (4, 0);
12201
12202   aarch64_set_reg_u64
12203     (cpu, rd, NO_SP,
12204      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12205                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12206 }
12207
12208 /* 32 bit logical shift right.  */
12209 static void
12210 lsrv32 (sim_cpu *cpu)
12211 {
12212   unsigned rm = INSTR (20, 16);
12213   unsigned rn = INSTR (9, 5);
12214   unsigned rd = INSTR (4, 0);
12215
12216   aarch64_set_reg_u64
12217     (cpu, rd, NO_SP,
12218      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12219                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12220 }
12221
12222 /* 64 bit logical shift right.  */
12223 static void
12224 lsrv64 (sim_cpu *cpu)
12225 {
12226   unsigned rm = INSTR (20, 16);
12227   unsigned rn = INSTR (9, 5);
12228   unsigned rd = INSTR (4, 0);
12229
12230   aarch64_set_reg_u64
12231     (cpu, rd, NO_SP,
12232      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12233                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12234 }
12235
12236 /* 32 bit rotate right.  */
12237 static void
12238 rorv32 (sim_cpu *cpu)
12239 {
12240   unsigned rm = INSTR (20, 16);
12241   unsigned rn = INSTR (9, 5);
12242   unsigned rd = INSTR (4, 0);
12243
12244   aarch64_set_reg_u64
12245     (cpu, rd, NO_SP,
12246      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12247                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12248 }
12249
12250 /* 64 bit rotate right.  */
12251 static void
12252 rorv64 (sim_cpu *cpu)
12253 {
12254   unsigned rm = INSTR (20, 16);
12255   unsigned rn = INSTR (9, 5);
12256   unsigned rd = INSTR (4, 0);
12257
12258   aarch64_set_reg_u64
12259     (cpu, rd, NO_SP,
12260      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12261                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12262 }
12263
12264
12265 /* divide.  */
12266
12267 /* 32 bit signed divide.  */
12268 static void
12269 cpuiv32 (sim_cpu *cpu)
12270 {
12271   unsigned rm = INSTR (20, 16);
12272   unsigned rn = INSTR (9, 5);
12273   unsigned rd = INSTR (4, 0);
12274   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12275   /* TODO : check that this rounds towards zero as required.  */
12276   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12277   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12278
12279   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12280                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12281 }
12282
12283 /* 64 bit signed divide.  */
12284 static void
12285 cpuiv64 (sim_cpu *cpu)
12286 {
12287   unsigned rm = INSTR (20, 16);
12288   unsigned rn = INSTR (9, 5);
12289   unsigned rd = INSTR (4, 0);
12290
12291   /* TODO : check that this rounds towards zero as required.  */
12292   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12293
12294   aarch64_set_reg_s64
12295     (cpu, rd, NO_SP,
12296      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12297 }
12298
12299 /* 32 bit unsigned divide.  */
12300 static void
12301 udiv32 (sim_cpu *cpu)
12302 {
12303   unsigned rm = INSTR (20, 16);
12304   unsigned rn = INSTR (9, 5);
12305   unsigned rd = INSTR (4, 0);
12306
12307   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12308   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12309   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12310
12311   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12312                        divisor ? (uint32_t) (dividend / divisor) : 0);
12313 }
12314
12315 /* 64 bit unsigned divide.  */
12316 static void
12317 udiv64 (sim_cpu *cpu)
12318 {
12319   unsigned rm = INSTR (20, 16);
12320   unsigned rn = INSTR (9, 5);
12321   unsigned rd = INSTR (4, 0);
12322
12323   /* TODO : check that this rounds towards zero as required.  */
12324   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12325
12326   aarch64_set_reg_u64
12327     (cpu, rd, NO_SP,
12328      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12329 }
12330
12331 static void
12332 dexDataProc2Source (sim_cpu *cpu)
12333 {
12334   /* assert instr[30] == 0
12335      instr[28,21] == 11010110
12336      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12337      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12338      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12339                              001000 ==> LSLV, 001001 ==> LSRV
12340                              001010 ==> ASRV, 001011 ==> RORV
12341                              ow ==> UNALLOC.  */
12342
12343   uint32_t dispatch;
12344   uint32_t S = INSTR (29, 29);
12345   uint32_t opcode = INSTR (15, 10);
12346
12347   if (S == 1)
12348     HALT_UNALLOC;
12349
12350   if (opcode & 0x34)
12351     HALT_UNALLOC;
12352
12353   dispatch = (  (INSTR (31, 31) << 3)
12354               | (uimm (opcode, 3, 3) << 2)
12355               |  uimm (opcode, 1, 0));
12356   switch (dispatch)
12357     {
12358     case 2:  udiv32 (cpu); return;
12359     case 3:  cpuiv32 (cpu); return;
12360     case 4:  lslv32 (cpu); return;
12361     case 5:  lsrv32 (cpu); return;
12362     case 6:  asrv32 (cpu); return;
12363     case 7:  rorv32 (cpu); return;
12364     case 10: udiv64 (cpu); return;
12365     case 11: cpuiv64 (cpu); return;
12366     case 12: lslv64 (cpu); return;
12367     case 13: lsrv64 (cpu); return;
12368     case 14: asrv64 (cpu); return;
12369     case 15: rorv64 (cpu); return;
12370     default: HALT_UNALLOC;
12371     }
12372 }
12373
12374
12375 /* Multiply.  */
12376
12377 /* 32 bit multiply and add.  */
12378 static void
12379 madd32 (sim_cpu *cpu)
12380 {
12381   unsigned rm = INSTR (20, 16);
12382   unsigned ra = INSTR (14, 10);
12383   unsigned rn = INSTR (9, 5);
12384   unsigned rd = INSTR (4, 0);
12385
12386   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12387                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12388                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12389                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12390 }
12391
12392 /* 64 bit multiply and add.  */
12393 static void
12394 madd64 (sim_cpu *cpu)
12395 {
12396   unsigned rm = INSTR (20, 16);
12397   unsigned ra = INSTR (14, 10);
12398   unsigned rn = INSTR (9, 5);
12399   unsigned rd = INSTR (4, 0);
12400
12401   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12402                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12403                        + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12404                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12405 }
12406
12407 /* 32 bit multiply and sub.  */
12408 static void
12409 msub32 (sim_cpu *cpu)
12410 {
12411   unsigned rm = INSTR (20, 16);
12412   unsigned ra = INSTR (14, 10);
12413   unsigned rn = INSTR (9, 5);
12414   unsigned rd = INSTR (4, 0);
12415
12416   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12417                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12418                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12419                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12420 }
12421
12422 /* 64 bit multiply and sub.  */
12423 static void
12424 msub64 (sim_cpu *cpu)
12425 {
12426   unsigned rm = INSTR (20, 16);
12427   unsigned ra = INSTR (14, 10);
12428   unsigned rn = INSTR (9, 5);
12429   unsigned rd = INSTR (4, 0);
12430
12431   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12432                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12433                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12434                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12435 }
12436
12437 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
12438 static void
12439 smaddl (sim_cpu *cpu)
12440 {
12441   unsigned rm = INSTR (20, 16);
12442   unsigned ra = INSTR (14, 10);
12443   unsigned rn = INSTR (9, 5);
12444   unsigned rd = INSTR (4, 0);
12445
12446   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12447      obtain a 64 bit product.  */
12448   aarch64_set_reg_s64
12449     (cpu, rd, NO_SP,
12450      aarch64_get_reg_s64 (cpu, ra, NO_SP)
12451      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12452      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12453 }
12454
12455 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
12456 static void
12457 smsubl (sim_cpu *cpu)
12458 {
12459   unsigned rm = INSTR (20, 16);
12460   unsigned ra = INSTR (14, 10);
12461   unsigned rn = INSTR (9, 5);
12462   unsigned rd = INSTR (4, 0);
12463
12464   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12465      obtain a 64 bit product.  */
12466   aarch64_set_reg_s64
12467     (cpu, rd, NO_SP,
12468      aarch64_get_reg_s64 (cpu, ra, NO_SP)
12469      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12470      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12471 }
12472
12473 /* Integer Multiply/Divide.  */
12474
12475 /* First some macros and a helper function.  */
12476 /* Macros to test or access elements of 64 bit words.  */
12477
12478 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
12479 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12480 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
12481 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12482 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
12483 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12484
12485 /* Offset of sign bit in 64 bit signed integger.  */
12486 #define SIGN_SHIFT_U64 63
12487 /* The sign bit itself -- also identifies the minimum negative int value.  */
12488 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12489 /* Return true if a 64 bit signed int presented as an unsigned int is the
12490    most negative value.  */
12491 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12492 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12493    int has its sign bit set to false.  */
12494 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12495 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12496    an unsigned int has its sign bit set or not.  */
12497 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12498 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
12499 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12500
12501 /* Multiply two 64 bit ints and return.
12502    the hi 64 bits of the 128 bit product.  */
12503
12504 static uint64_t
12505 mul64hi (uint64_t value1, uint64_t value2)
12506 {
12507   uint64_t resultmid1;
12508   uint64_t result;
12509   uint64_t value1_lo = lowWordToU64 (value1);
12510   uint64_t value1_hi = highWordToU64 (value1) ;
12511   uint64_t value2_lo = lowWordToU64 (value2);
12512   uint64_t value2_hi = highWordToU64 (value2);
12513
12514   /* Cross-multiply and collect results.  */
12515
12516   uint64_t xproductlo = value1_lo * value2_lo;
12517   uint64_t xproductmid1 = value1_lo * value2_hi;
12518   uint64_t xproductmid2 = value1_hi * value2_lo;
12519   uint64_t xproducthi = value1_hi * value2_hi;
12520   uint64_t carry = 0;
12521   /* Start accumulating 64 bit results.  */
12522   /* Drop bottom half of lowest cross-product.  */
12523   uint64_t resultmid = xproductlo >> 32;
12524   /* Add in middle products.  */
12525   resultmid = resultmid + xproductmid1;
12526
12527   /* Check for overflow.  */
12528   if (resultmid < xproductmid1)
12529     /* Carry over 1 into top cross-product.  */
12530     carry++;
12531
12532   resultmid1  = resultmid + xproductmid2;
12533
12534   /* Check for overflow.  */
12535   if (resultmid1 < xproductmid2)
12536     /* Carry over 1 into top cross-product.  */
12537     carry++;
12538
12539   /* Drop lowest 32 bits of middle cross-product.  */
12540   result = resultmid1 >> 32;
12541
12542   /* Add top cross-product plus and any carry.  */
12543   result += xproducthi + carry;
12544
12545   return result;
12546 }
12547
12548 /* Signed multiply high, source, source2 :
12549    64 bit, dest <-- high 64-bit of result.  */
12550 static void
12551 smulh (sim_cpu *cpu)
12552 {
12553   uint64_t uresult;
12554   int64_t  result;
12555   unsigned rm = INSTR (20, 16);
12556   unsigned rn = INSTR (9, 5);
12557   unsigned rd = INSTR (4, 0);
12558   GReg     ra = INSTR (14, 10);
12559   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12560   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12561   uint64_t uvalue1;
12562   uint64_t uvalue2;
12563   int64_t  signum = 1;
12564
12565   if (ra != R31)
12566     HALT_UNALLOC;
12567
12568   /* Convert to unsigned and use the unsigned mul64hi routine
12569      the fix the sign up afterwards.  */
12570   if (value1 < 0)
12571     {
12572       signum *= -1L;
12573       uvalue1 = -value1;
12574     }
12575   else
12576     {
12577       uvalue1 = value1;
12578     }
12579
12580   if (value2 < 0)
12581     {
12582       signum *= -1L;
12583       uvalue2 = -value2;
12584     }
12585   else
12586     {
12587       uvalue2 = value2;
12588     }
12589
12590   uresult = mul64hi (uvalue1, uvalue2);
12591   result = uresult;
12592   result *= signum;
12593
12594   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12595 }
12596
12597 /* Unsigned multiply add long -- source, source2 :
12598    32 bit, source3 : 64 bit.  */
12599 static void
12600 umaddl (sim_cpu *cpu)
12601 {
12602   unsigned rm = INSTR (20, 16);
12603   unsigned ra = INSTR (14, 10);
12604   unsigned rn = INSTR (9, 5);
12605   unsigned rd = INSTR (4, 0);
12606
12607   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12608      obtain a 64 bit product.  */
12609   aarch64_set_reg_u64
12610     (cpu, rd, NO_SP,
12611      aarch64_get_reg_u64 (cpu, ra, NO_SP)
12612      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12613      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12614 }
12615
12616 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
12617 static void
12618 umsubl (sim_cpu *cpu)
12619 {
12620   unsigned rm = INSTR (20, 16);
12621   unsigned ra = INSTR (14, 10);
12622   unsigned rn = INSTR (9, 5);
12623   unsigned rd = INSTR (4, 0);
12624
12625   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12626      obtain a 64 bit product.  */
12627   aarch64_set_reg_u64
12628     (cpu, rd, NO_SP,
12629      aarch64_get_reg_u64 (cpu, ra, NO_SP)
12630      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12631      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12632 }
12633
12634 /* Unsigned multiply high, source, source2 :
12635    64 bit, dest <-- high 64-bit of result.  */
12636 static void
12637 umulh (sim_cpu *cpu)
12638 {
12639   unsigned rm = INSTR (20, 16);
12640   unsigned rn = INSTR (9, 5);
12641   unsigned rd = INSTR (4, 0);
12642   GReg     ra = INSTR (14, 10);
12643
12644   if (ra != R31)
12645     HALT_UNALLOC;
12646
12647   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12648                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12649                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12650 }
12651
12652 static void
12653 dexDataProc3Source (sim_cpu *cpu)
12654 {
12655   /* assert instr[28,24] == 11011.  */
12656   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12657      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12658      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12659      instr[15] = o0 : 0/1 ==> ok
12660      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
12661                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12662                               0100 ==> SMULH,                   (64 bit only)
12663                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12664                               1100 ==> UMULH                    (64 bit only)
12665                               ow ==> UNALLOC.  */
12666
12667   uint32_t dispatch;
12668   uint32_t size = INSTR (31, 31);
12669   uint32_t op54 = INSTR (30, 29);
12670   uint32_t op31 = INSTR (23, 21);
12671   uint32_t o0 = INSTR (15, 15);
12672
12673   if (op54 != 0)
12674     HALT_UNALLOC;
12675
12676   if (size == 0)
12677     {
12678       if (op31 != 0)
12679         HALT_UNALLOC;
12680
12681       if (o0 == 0)
12682         madd32 (cpu);
12683       else
12684         msub32 (cpu);
12685       return;
12686     }
12687
12688   dispatch = (op31 << 1) | o0;
12689
12690   switch (dispatch)
12691     {
12692     case 0:  madd64 (cpu); return;
12693     case 1:  msub64 (cpu); return;
12694     case 2:  smaddl (cpu); return;
12695     case 3:  smsubl (cpu); return;
12696     case 4:  smulh (cpu); return;
12697     case 10: umaddl (cpu); return;
12698     case 11: umsubl (cpu); return;
12699     case 12: umulh (cpu); return;
12700     default: HALT_UNALLOC;
12701     }
12702 }
12703
12704 static void
12705 dexDPReg (sim_cpu *cpu)
12706 {
12707   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12708      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12709      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
12710   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12711
12712   switch (group2)
12713     {
12714     case DPREG_LOG_000:
12715     case DPREG_LOG_001:
12716       dexLogicalShiftedRegister (cpu); return;
12717
12718     case DPREG_ADDSHF_010:
12719       dexAddSubtractShiftedRegister (cpu); return;
12720
12721     case DPREG_ADDEXT_011:
12722       dexAddSubtractExtendedRegister (cpu); return;
12723
12724     case DPREG_ADDCOND_100:
12725       {
12726         /* This set bundles a variety of different operations.  */
12727         /* Check for.  */
12728         /* 1) add/sub w carry.  */
12729         uint32_t mask1 = 0x1FE00000U;
12730         uint32_t val1  = 0x1A000000U;
12731         /* 2) cond compare register/immediate.  */
12732         uint32_t mask2 = 0x1FE00000U;
12733         uint32_t val2  = 0x1A400000U;
12734         /* 3) cond select.  */
12735         uint32_t mask3 = 0x1FE00000U;
12736         uint32_t val3  = 0x1A800000U;
12737         /* 4) data proc 1/2 source.  */
12738         uint32_t mask4 = 0x1FE00000U;
12739         uint32_t val4  = 0x1AC00000U;
12740
12741         if ((aarch64_get_instr (cpu) & mask1) == val1)
12742           dexAddSubtractWithCarry (cpu);
12743
12744         else if ((aarch64_get_instr (cpu) & mask2) == val2)
12745           CondCompare (cpu);
12746
12747         else if ((aarch64_get_instr (cpu) & mask3) == val3)
12748           dexCondSelect (cpu);
12749
12750         else if ((aarch64_get_instr (cpu) & mask4) == val4)
12751           {
12752             /* Bit 30 is clear for data proc 2 source
12753                and set for data proc 1 source.  */
12754             if (aarch64_get_instr (cpu)  & (1U << 30))
12755               dexDataProc1Source (cpu);
12756             else
12757               dexDataProc2Source (cpu);
12758           }
12759
12760         else
12761           /* Should not reach here.  */
12762           HALT_NYI;
12763
12764         return;
12765       }
12766
12767     case DPREG_3SRC_110:
12768       dexDataProc3Source (cpu); return;
12769
12770     case DPREG_UNALLOC_101:
12771       HALT_UNALLOC;
12772
12773     case DPREG_3SRC_111:
12774       dexDataProc3Source (cpu); return;
12775
12776     default:
12777       /* Should never reach here.  */
12778       HALT_NYI;
12779     }
12780 }
12781
12782 /* Unconditional Branch immediate.
12783    Offset is a PC-relative byte offset in the range +/- 128MiB.
12784    The offset is assumed to be raw from the decode i.e. the
12785    simulator is expected to scale them from word offsets to byte.  */
12786
12787 /* Unconditional branch.  */
12788 static void
12789 buc (sim_cpu *cpu, int32_t offset)
12790 {
12791   aarch64_set_next_PC_by_offset (cpu, offset);
12792 }
12793
12794 static unsigned stack_depth = 0;
12795
12796 /* Unconditional branch and link -- writes return PC to LR.  */
12797 static void
12798 bl (sim_cpu *cpu, int32_t offset)
12799 {
12800   aarch64_save_LR (cpu);
12801   aarch64_set_next_PC_by_offset (cpu, offset);
12802
12803   if (TRACE_BRANCH_P (cpu))
12804     {
12805       ++ stack_depth;
12806       TRACE_BRANCH (cpu,
12807                     " %*scall %" PRIx64 " [%s]"
12808                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12809                     stack_depth, " ", aarch64_get_next_PC (cpu),
12810                     aarch64_get_func (aarch64_get_next_PC (cpu)),
12811                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
12812                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
12813                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
12814                     );
12815     }
12816 }
12817
12818 /* Unconditional Branch register.
12819    Branch/return address is in source register.  */
12820
12821 /* Unconditional branch.  */
12822 static void
12823 br (sim_cpu *cpu)
12824 {
12825   unsigned rn = INSTR (9, 5);
12826   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12827 }
12828
12829 /* Unconditional branch and link -- writes return PC to LR.  */
12830 static void
12831 blr (sim_cpu *cpu)
12832 {
12833   unsigned rn = INSTR (9, 5);
12834
12835   /* The pseudo code in the spec says we update LR before fetching.
12836      the value from the rn.  */
12837   aarch64_save_LR (cpu);
12838   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12839
12840   if (TRACE_BRANCH_P (cpu))
12841     {
12842       ++ stack_depth;
12843       TRACE_BRANCH (cpu,
12844                     " %*scall %" PRIx64 " [%s]"
12845                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12846                     stack_depth, " ", aarch64_get_next_PC (cpu),
12847                     aarch64_get_func (aarch64_get_next_PC (cpu)),
12848                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
12849                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
12850                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
12851                     );
12852     }
12853 }
12854
12855 /* Return -- assembler will default source to LR this is functionally
12856    equivalent to br but, presumably, unlike br it side effects the
12857    branch predictor.  */
12858 static void
12859 ret (sim_cpu *cpu)
12860 {
12861   unsigned rn = INSTR (9, 5);
12862   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12863
12864   if (TRACE_BRANCH_P (cpu))
12865     {
12866       TRACE_BRANCH (cpu,
12867                     " %*sreturn [result: %" PRIx64 "]",
12868                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12869       -- stack_depth;
12870     }
12871 }
12872
12873 /* NOP -- we implement this and call it from the decode in case we
12874    want to intercept it later.  */
12875
12876 static void
12877 nop (sim_cpu *cpu)
12878 {
12879 }
12880
12881 /* Data synchronization barrier.  */
12882
12883 static void
12884 dsb (sim_cpu *cpu)
12885 {
12886 }
12887
12888 /* Data memory barrier.  */
12889
12890 static void
12891 dmb (sim_cpu *cpu)
12892 {
12893 }
12894
12895 /* Instruction synchronization barrier.  */
12896
12897 static void
12898 isb (sim_cpu *cpu)
12899 {
12900 }
12901
12902 static void
12903 dexBranchImmediate (sim_cpu *cpu)
12904 {
12905   /* assert instr[30,26] == 00101
12906      instr[31] ==> 0 == B, 1 == BL
12907      instr[25,0] == imm26 branch offset counted in words.  */
12908
12909   uint32_t top = INSTR (31, 31);
12910   /* We have a 26 byte signed word offset which we need to pass to the
12911      execute routine as a signed byte offset.  */
12912   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12913
12914   if (top)
12915     bl (cpu, offset);
12916   else
12917     buc (cpu, offset);
12918 }
12919
12920 /* Control Flow.  */
12921
12922 /* Conditional branch
12923
12924    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12925    a bit position in the range 0 .. 63
12926
12927    cc is a CondCode enum value as pulled out of the decode
12928
12929    N.B. any offset register (source) can only be Xn or Wn.  */
12930
12931 static void
12932 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12933 {
12934   /* the test returns TRUE if CC is met.  */
12935   if (testConditionCode (cpu, cc))
12936     aarch64_set_next_PC_by_offset (cpu, offset);
12937 }
12938
12939 /* 32 bit branch on register non-zero.  */
12940 static void
12941 cbnz32 (sim_cpu *cpu, int32_t offset)
12942 {
12943   unsigned rt = INSTR (4, 0);
12944
12945   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12946     aarch64_set_next_PC_by_offset (cpu, offset);
12947 }
12948
12949 /* 64 bit branch on register zero.  */
12950 static void
12951 cbnz (sim_cpu *cpu, int32_t offset)
12952 {
12953   unsigned rt = INSTR (4, 0);
12954
12955   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12956     aarch64_set_next_PC_by_offset (cpu, offset);
12957 }
12958
12959 /* 32 bit branch on register non-zero.  */
12960 static void
12961 cbz32 (sim_cpu *cpu, int32_t offset)
12962 {
12963   unsigned rt = INSTR (4, 0);
12964
12965   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12966     aarch64_set_next_PC_by_offset (cpu, offset);
12967 }
12968
12969 /* 64 bit branch on register zero.  */
12970 static void
12971 cbz (sim_cpu *cpu, int32_t offset)
12972 {
12973   unsigned rt = INSTR (4, 0);
12974
12975   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12976     aarch64_set_next_PC_by_offset (cpu, offset);
12977 }
12978
12979 /* Branch on register bit test non-zero -- one size fits all.  */
12980 static void
12981 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
12982 {
12983   unsigned rt = INSTR (4, 0);
12984
12985   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12986     aarch64_set_next_PC_by_offset (cpu, offset);
12987 }
12988
12989 /* branch on register bit test zero -- one size fits all.  */
12990 static void
12991 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
12992 {
12993   unsigned rt = INSTR (4, 0);
12994
12995   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12996     aarch64_set_next_PC_by_offset (cpu, offset);
12997 }
12998
12999 static void
13000 dexCompareBranchImmediate (sim_cpu *cpu)
13001 {
13002   /* instr[30,25] = 01 1010
13003      instr[31]    = size : 0 ==> 32, 1 ==> 64
13004      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13005      instr[23,5]  = simm19 branch offset counted in words
13006      instr[4,0]   = rt  */
13007
13008   uint32_t size = INSTR (31, 31);
13009   uint32_t op   = INSTR (24, 24);
13010   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13011
13012   if (size == 0)
13013     {
13014       if (op == 0)
13015         cbz32 (cpu, offset);
13016       else
13017         cbnz32 (cpu, offset);
13018     }
13019   else
13020     {
13021       if (op == 0)
13022         cbz (cpu, offset);
13023       else
13024         cbnz (cpu, offset);
13025     }
13026 }
13027
13028 static void
13029 dexTestBranchImmediate (sim_cpu *cpu)
13030 {
13031   /* instr[31]    = b5 : bit 5 of test bit idx
13032      instr[30,25] = 01 1011
13033      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13034      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13035      instr[18,5]  = simm14 : signed offset counted in words
13036      instr[4,0]   = uimm5  */
13037
13038   uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
13039   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13040
13041   NYI_assert (30, 25, 0x1b);
13042
13043   if (INSTR (24, 24) == 0)
13044     tbz (cpu, pos, offset);
13045   else
13046     tbnz (cpu, pos, offset);
13047 }
13048
13049 static void
13050 dexCondBranchImmediate (sim_cpu *cpu)
13051 {
13052   /* instr[31,25] = 010 1010
13053      instr[24]    = op1; op => 00 ==> B.cond
13054      instr[23,5]  = simm19 : signed offset counted in words
13055      instr[4]     = op0
13056      instr[3,0]   = cond  */
13057
13058   int32_t offset;
13059   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13060
13061   NYI_assert (31, 25, 0x2a);
13062
13063   if (op != 0)
13064     HALT_UNALLOC;
13065
13066   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13067
13068   bcc (cpu, offset, INSTR (3, 0));
13069 }
13070
13071 static void
13072 dexBranchRegister (sim_cpu *cpu)
13073 {
13074   /* instr[31,25] = 110 1011
13075      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13076      instr[20,16] = op2 : must be 11111
13077      instr[15,10] = op3 : must be 000000
13078      instr[4,0]   = op2 : must be 11111.  */
13079
13080   uint32_t op = INSTR (24, 21);
13081   uint32_t op2 = INSTR (20, 16);
13082   uint32_t op3 = INSTR (15, 10);
13083   uint32_t op4 = INSTR (4, 0);
13084
13085   NYI_assert (31, 25, 0x6b);
13086
13087   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13088     HALT_UNALLOC;
13089
13090   if (op == 0)
13091     br (cpu);
13092
13093   else if (op == 1)
13094     blr (cpu);
13095
13096   else if (op == 2)
13097     ret (cpu);
13098
13099   else
13100     {
13101       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13102       /* anything else is unallocated.  */
13103       uint32_t rn = INSTR (4, 0);
13104
13105       if (rn != 0x1f)
13106         HALT_UNALLOC;
13107
13108       if (op == 4 || op == 5)
13109         HALT_NYI;
13110
13111       HALT_UNALLOC;
13112     }
13113 }
13114
13115 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13116    but this may not be available.  So instead we define the values we need
13117    here.  */
13118 #define AngelSVC_Reason_Open            0x01
13119 #define AngelSVC_Reason_Close           0x02
13120 #define AngelSVC_Reason_Write           0x05
13121 #define AngelSVC_Reason_Read            0x06
13122 #define AngelSVC_Reason_IsTTY           0x09
13123 #define AngelSVC_Reason_Seek            0x0A
13124 #define AngelSVC_Reason_FLen            0x0C
13125 #define AngelSVC_Reason_Remove          0x0E
13126 #define AngelSVC_Reason_Rename          0x0F
13127 #define AngelSVC_Reason_Clock           0x10
13128 #define AngelSVC_Reason_Time            0x11
13129 #define AngelSVC_Reason_System          0x12
13130 #define AngelSVC_Reason_Errno           0x13
13131 #define AngelSVC_Reason_GetCmdLine      0x15
13132 #define AngelSVC_Reason_HeapInfo        0x16
13133 #define AngelSVC_Reason_ReportException 0x18
13134 #define AngelSVC_Reason_Elapsed         0x30
13135
13136
13137 static void
13138 handle_halt (sim_cpu *cpu, uint32_t val)
13139 {
13140   uint64_t result = 0;
13141
13142   if (val != 0xf000)
13143     {
13144       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13145       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13146                        sim_stopped, SIM_SIGTRAP);
13147     }
13148
13149   /* We have encountered an Angel SVC call.  See if we can process it.  */
13150   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13151     {
13152     case AngelSVC_Reason_HeapInfo:
13153       {
13154         /* Get the values.  */
13155         uint64_t stack_top = aarch64_get_stack_start (cpu);
13156         uint64_t heap_base = aarch64_get_heap_start (cpu);
13157
13158         /* Get the pointer  */
13159         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13160         ptr = aarch64_get_mem_u64 (cpu, ptr);
13161
13162         /* Fill in the memory block.  */
13163         /* Start addr of heap.  */
13164         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13165         /* End addr of heap.  */
13166         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13167         /* Lowest stack addr.  */
13168         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13169         /* Initial stack addr.  */
13170         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13171
13172         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13173       }
13174       break;
13175
13176     case AngelSVC_Reason_Open:
13177       {
13178         /* Get the pointer  */
13179         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13180         /* FIXME: For now we just assume that we will only be asked
13181            to open the standard file descriptors.  */
13182         static int fd = 0;
13183         result = fd ++;
13184
13185         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13186       }
13187       break;
13188
13189     case AngelSVC_Reason_Close:
13190       {
13191         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13192         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13193         result = 0;
13194       }
13195       break;
13196
13197     case AngelSVC_Reason_Errno:
13198       result = 0;
13199       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13200       break;
13201
13202     case AngelSVC_Reason_Clock:
13203       result =
13204 #ifdef CLOCKS_PER_SEC
13205         (CLOCKS_PER_SEC >= 100)
13206         ? (clock () / (CLOCKS_PER_SEC / 100))
13207         : ((clock () * 100) / CLOCKS_PER_SEC)
13208 #else
13209         /* Presume unix... clock() returns microseconds.  */
13210         (clock () / 10000)
13211 #endif
13212         ;
13213         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13214       break;
13215
13216     case AngelSVC_Reason_GetCmdLine:
13217       {
13218         /* Get the pointer  */
13219         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13220         ptr = aarch64_get_mem_u64 (cpu, ptr);
13221
13222         /* FIXME: No command line for now.  */
13223         aarch64_set_mem_u64 (cpu, ptr, 0);
13224         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13225       }
13226       break;
13227
13228     case AngelSVC_Reason_IsTTY:
13229       result = 1;
13230         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13231       break;
13232
13233     case AngelSVC_Reason_Write:
13234       {
13235         /* Get the pointer  */
13236         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13237         /* Get the write control block.  */
13238         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13239         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13240         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13241
13242         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13243                        PRIx64 " on descriptor %" PRIx64,
13244                        len, buf, fd);
13245
13246         if (len > 1280)
13247           {
13248             TRACE_SYSCALL (cpu,
13249                            " AngelSVC: Write: Suspiciously long write: %ld",
13250                            (long) len);
13251             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13252                              sim_stopped, SIM_SIGBUS);
13253           }
13254         else if (fd == 1)
13255           {
13256             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13257           }
13258         else if (fd == 2)
13259           {
13260             TRACE (cpu, 0, "\n");
13261             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13262                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13263             TRACE (cpu, 0, "\n");
13264           }
13265         else
13266           {
13267             TRACE_SYSCALL (cpu,
13268                            " AngelSVC: Write: Unexpected file handle: %d",
13269                            (int) fd);
13270             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13271                              sim_stopped, SIM_SIGABRT);
13272           }
13273       }
13274       break;
13275
13276     case AngelSVC_Reason_ReportException:
13277       {
13278         /* Get the pointer  */
13279         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13280         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13281         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13282         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13283
13284         TRACE_SYSCALL (cpu,
13285                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13286                        type, state);
13287
13288         if (type == 0x20026)
13289           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13290                            sim_exited, state);
13291         else
13292           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13293                            sim_stopped, SIM_SIGINT);
13294       }
13295       break;
13296
13297     case AngelSVC_Reason_Read:
13298     case AngelSVC_Reason_FLen:
13299     case AngelSVC_Reason_Seek:
13300     case AngelSVC_Reason_Remove:
13301     case AngelSVC_Reason_Time:
13302     case AngelSVC_Reason_System:
13303     case AngelSVC_Reason_Rename:
13304     case AngelSVC_Reason_Elapsed:
13305     default:
13306       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13307                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13308       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13309                        sim_stopped, SIM_SIGTRAP);
13310     }
13311
13312   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13313 }
13314
13315 static void
13316 dexExcpnGen (sim_cpu *cpu)
13317 {
13318   /* instr[31:24] = 11010100
13319      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13320                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13321      instr[20,5]  = imm16
13322      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13323      instr[1,0]   = LL : discriminates opc  */
13324
13325   uint32_t opc = INSTR (23, 21);
13326   uint32_t imm16 = INSTR (20, 5);
13327   uint32_t opc2 = INSTR (4, 2);
13328   uint32_t LL;
13329
13330   NYI_assert (31, 24, 0xd4);
13331
13332   if (opc2 != 0)
13333     HALT_UNALLOC;
13334
13335   LL = INSTR (1, 0);
13336
13337   /* We only implement HLT and BRK for now.  */
13338   if (opc == 1 && LL == 0)
13339     {
13340       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13341       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13342                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13343     }
13344
13345   if (opc == 2 && LL == 0)
13346     handle_halt (cpu, imm16);
13347
13348   else if (opc == 0 || opc == 5)
13349     HALT_NYI;
13350
13351   else
13352     HALT_UNALLOC;
13353 }
13354
13355 /* Stub for accessing system registers.  */
13356
13357 static uint64_t
13358 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13359             unsigned crm, unsigned op2)
13360 {
13361   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13362     /* DCZID_EL0 - the Data Cache Zero ID register.
13363        We do not support DC ZVA at the moment, so
13364        we return a value with the disable bit set.
13365        We implement support for the DCZID register since
13366        it is used by the C library's memset function.  */
13367     return ((uint64_t) 1) << 4;
13368
13369   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13370     /* Cache Type Register.  */
13371     return 0x80008000UL;
13372
13373   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13374     /* TPIDR_EL0 - thread pointer id.  */
13375     return aarch64_get_thread_id (cpu);
13376
13377   if (op1 == 3 && crm == 4 && op2 == 0)
13378     return aarch64_get_FPCR (cpu);
13379
13380   if (op1 == 3 && crm == 4 && op2 == 1)
13381     return aarch64_get_FPSR (cpu);
13382
13383   else if (op1 == 3 && crm == 2 && op2 == 0)
13384     return aarch64_get_CPSR (cpu);
13385
13386   HALT_NYI;
13387 }
13388
13389 static void
13390 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13391             unsigned crm, unsigned op2, uint64_t val)
13392 {
13393   if (op1 == 3 && crm == 4 && op2 == 0)
13394     aarch64_set_FPCR (cpu, val);
13395
13396   else if (op1 == 3 && crm == 4 && op2 == 1)
13397     aarch64_set_FPSR (cpu, val);
13398
13399   else if (op1 == 3 && crm == 2 && op2 == 0)
13400     aarch64_set_CPSR (cpu, val);
13401
13402   else
13403     HALT_NYI;
13404 }
13405
13406 static void
13407 do_mrs (sim_cpu *cpu)
13408 {
13409   /* instr[31:20] = 1101 0101 0001 1
13410      instr[19]    = op0
13411      instr[18,16] = op1
13412      instr[15,12] = CRn
13413      instr[11,8]  = CRm
13414      instr[7,5]   = op2
13415      instr[4,0]   = Rt  */
13416   unsigned sys_op0 = INSTR (19, 19) + 2;
13417   unsigned sys_op1 = INSTR (18, 16);
13418   unsigned sys_crn = INSTR (15, 12);
13419   unsigned sys_crm = INSTR (11, 8);
13420   unsigned sys_op2 = INSTR (7, 5);
13421   unsigned rt = INSTR (4, 0);
13422
13423   aarch64_set_reg_u64 (cpu, rt, NO_SP,
13424                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13425 }
13426
13427 static void
13428 do_MSR_immediate (sim_cpu *cpu)
13429 {
13430   /* instr[31:19] = 1101 0101 0000 0
13431      instr[18,16] = op1
13432      instr[15,12] = 0100
13433      instr[11,8]  = CRm
13434      instr[7,5]   = op2
13435      instr[4,0]   = 1 1111  */
13436
13437   unsigned op1 = INSTR (18, 16);
13438   /*unsigned crm = INSTR (11, 8);*/
13439   unsigned op2 = INSTR (7, 5);
13440
13441   NYI_assert (31, 19, 0x1AA0);
13442   NYI_assert (15, 12, 0x4);
13443   NYI_assert (4,  0,  0x1F);
13444
13445   if (op1 == 0)
13446     {
13447       if (op2 == 5)
13448         HALT_NYI; /* set SPSel.  */
13449       else
13450         HALT_UNALLOC;
13451     }
13452   else if (op1 == 3)
13453     {
13454       if (op2 == 6)
13455         HALT_NYI; /* set DAIFset.  */
13456       else if (op2 == 7)
13457         HALT_NYI; /* set DAIFclr.  */
13458       else
13459         HALT_UNALLOC;
13460     }
13461   else
13462     HALT_UNALLOC;
13463 }
13464
13465 static void
13466 do_MSR_reg (sim_cpu *cpu)
13467 {
13468   /* instr[31:20] = 1101 0101 0001
13469      instr[19]    = op0
13470      instr[18,16] = op1
13471      instr[15,12] = CRn
13472      instr[11,8]  = CRm
13473      instr[7,5]   = op2
13474      instr[4,0]   = Rt  */
13475
13476   unsigned sys_op0 = INSTR (19, 19) + 2;
13477   unsigned sys_op1 = INSTR (18, 16);
13478   unsigned sys_crn = INSTR (15, 12);
13479   unsigned sys_crm = INSTR (11, 8);
13480   unsigned sys_op2 = INSTR (7, 5);
13481   unsigned rt = INSTR (4, 0);
13482
13483   NYI_assert (31, 20, 0xD51);
13484
13485   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13486               aarch64_get_reg_u64 (cpu, rt, NO_SP));
13487 }
13488
13489 static void
13490 do_SYS (sim_cpu *cpu)
13491 {
13492   /* instr[31,19] = 1101 0101 0000 1
13493      instr[18,16] = op1
13494      instr[15,12] = CRn
13495      instr[11,8]  = CRm
13496      instr[7,5]   = op2
13497      instr[4,0]   = Rt  */
13498   NYI_assert (31, 19, 0x1AA1);
13499
13500   /* FIXME: For now we just silently accept system ops.  */
13501 }
13502
13503 static void
13504 dexSystem (sim_cpu *cpu)
13505 {
13506   /* instr[31:22] = 1101 01010 0
13507      instr[21]    = L
13508      instr[20,19] = op0
13509      instr[18,16] = op1
13510      instr[15,12] = CRn
13511      instr[11,8]  = CRm
13512      instr[7,5]   = op2
13513      instr[4,0]   = uimm5  */
13514
13515   /* We are interested in HINT, DSB, DMB and ISB
13516
13517      Hint #0 encodes NOOP (this is the only hint we care about)
13518      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13519      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13520
13521      DSB, DMB, ISB are data store barrier, data memory barrier and
13522      instruction store barrier, respectively, where
13523
13524      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13525      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13526      CRm<3:2> ==> domain, CRm<1:0> ==> types,
13527      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13528               10 ==> InerShareable, 11 ==> FullSystem
13529      types :  01 ==> Reads, 10 ==> Writes,
13530               11 ==> All, 00 ==> All (domain == FullSystem).  */
13531
13532   unsigned rt = INSTR (4, 0);
13533
13534   NYI_assert (31, 22, 0x354);
13535
13536   switch (INSTR (21, 12))
13537     {
13538     case 0x032:
13539       if (rt == 0x1F)
13540         {
13541           /* NOP has CRm != 0000 OR.  */
13542           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
13543           uint32_t crm = INSTR (11, 8);
13544           uint32_t op2 = INSTR (7, 5);
13545
13546           if (crm != 0 || (op2 == 0 || op2 > 5))
13547             {
13548               /* Actually call nop method so we can reimplement it later.  */
13549               nop (cpu);
13550               return;
13551             }
13552         }
13553       HALT_NYI;
13554
13555     case 0x033:
13556       {
13557         uint32_t op2 =  INSTR (7, 5);
13558
13559         switch (op2)
13560           {
13561           case 2: HALT_NYI;
13562           case 4: dsb (cpu); return;
13563           case 5: dmb (cpu); return;
13564           case 6: isb (cpu); return;
13565           default: HALT_UNALLOC;
13566         }
13567       }
13568
13569     case 0x3B0:
13570     case 0x3B4:
13571     case 0x3BD:
13572       do_mrs (cpu);
13573       return;
13574
13575     case 0x0B7:
13576       do_SYS (cpu); /* DC is an alias of SYS.  */
13577       return;
13578
13579     default:
13580       if (INSTR (21, 20) == 0x1)
13581         do_MSR_reg (cpu);
13582       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13583         do_MSR_immediate (cpu);
13584       else
13585         HALT_NYI;
13586       return;
13587     }
13588 }
13589
13590 static void
13591 dexBr (sim_cpu *cpu)
13592 {
13593   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13594      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13595      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
13596   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13597
13598   switch (group2)
13599     {
13600     case BR_IMM_000:
13601       return dexBranchImmediate (cpu);
13602
13603     case BR_IMMCMP_001:
13604       /* Compare has bit 25 clear while test has it set.  */
13605       if (!INSTR (25, 25))
13606         dexCompareBranchImmediate (cpu);
13607       else
13608         dexTestBranchImmediate (cpu);
13609       return;
13610
13611     case BR_IMMCOND_010:
13612       /* This is a conditional branch if bit 25 is clear otherwise
13613          unallocated.  */
13614       if (!INSTR (25, 25))
13615         dexCondBranchImmediate (cpu);
13616       else
13617         HALT_UNALLOC;
13618       return;
13619
13620     case BR_UNALLOC_011:
13621       HALT_UNALLOC;
13622
13623     case BR_IMM_100:
13624       dexBranchImmediate (cpu);
13625       return;
13626
13627     case BR_IMMCMP_101:
13628       /* Compare has bit 25 clear while test has it set.  */
13629       if (!INSTR (25, 25))
13630         dexCompareBranchImmediate (cpu);
13631       else
13632         dexTestBranchImmediate (cpu);
13633       return;
13634
13635     case BR_REG_110:
13636       /* Unconditional branch reg has bit 25 set.  */
13637       if (INSTR (25, 25))
13638         dexBranchRegister (cpu);
13639
13640       /* This includes both Excpn Gen, System and unalloc operations.
13641          We need to decode the Excpn Gen operation BRK so we can plant
13642          debugger entry points.
13643          Excpn Gen operations have instr [24] = 0.
13644          we need to decode at least one of the System operations NOP
13645          which is an alias for HINT #0.
13646          System operations have instr [24,22] = 100.  */
13647       else if (INSTR (24, 24) == 0)
13648         dexExcpnGen (cpu);
13649
13650       else if (INSTR (24, 22) == 4)
13651         dexSystem (cpu);
13652
13653       else
13654         HALT_UNALLOC;
13655
13656       return;
13657
13658     case BR_UNALLOC_111:
13659       HALT_UNALLOC;
13660
13661     default:
13662       /* Should never reach here.  */
13663       HALT_NYI;
13664     }
13665 }
13666
13667 static void
13668 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13669 {
13670   /* We need to check if gdb wants an in here.  */
13671   /* checkBreak (cpu);.  */
13672
13673   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13674
13675   switch (group)
13676     {
13677     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
13678     case GROUP_LDST_0100:     dexLdSt (cpu); break;
13679     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
13680     case GROUP_LDST_0110:     dexLdSt (cpu); break;
13681     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
13682     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
13683     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
13684     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
13685     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
13686     case GROUP_LDST_1100:     dexLdSt (cpu); break;
13687     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
13688     case GROUP_LDST_1110:     dexLdSt (cpu); break;
13689     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
13690
13691     case GROUP_UNALLOC_0001:
13692     case GROUP_UNALLOC_0010:
13693     case GROUP_UNALLOC_0011:
13694       HALT_UNALLOC;
13695
13696     default:
13697       /* Should never reach here.  */
13698       HALT_NYI;
13699     }
13700 }
13701
13702 static bfd_boolean
13703 aarch64_step (sim_cpu *cpu)
13704 {
13705   uint64_t pc = aarch64_get_PC (cpu);
13706
13707   if (pc == TOP_LEVEL_RETURN_PC)
13708     return FALSE;
13709
13710   aarch64_set_next_PC (cpu, pc + 4);
13711   aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13712
13713   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13714               aarch64_get_instr (cpu));
13715   TRACE_DISASM (cpu, pc);
13716
13717   aarch64_decode_and_execute (cpu, pc);
13718
13719   return TRUE;
13720 }
13721
13722 void
13723 aarch64_run (SIM_DESC sd)
13724 {
13725   sim_cpu *cpu = STATE_CPU (sd, 0);
13726
13727   while (aarch64_step (cpu))
13728     aarch64_update_PC (cpu);
13729
13730   sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13731                    sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13732 }
13733
13734 void
13735 aarch64_init (sim_cpu *cpu, uint64_t pc)
13736 {
13737   uint64_t sp = aarch64_get_stack_start (cpu);
13738
13739   /* Install SP, FP and PC and set LR to -20
13740      so we can detect a top-level return.  */
13741   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13742   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13743   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13744   aarch64_set_next_PC (cpu, pc);
13745   aarch64_update_PC (cpu);
13746   aarch64_init_LIT_table ();
13747 }