gdb/gdbserver/linux-aarch64-low.c

   1 /* GNU/Linux/AArch64 specific low level interface, for the remote server for
   2    GDB.
   3
   4    Copyright (C) 2009-2015 Free Software Foundation, Inc.
   5    Contributed by ARM Ltd.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "server.h"
  23 #include "linux-low.h"
  24 #include "nat/aarch64-linux.h"
  25 #include "nat/aarch64-linux-hw-point.h"
  26 #include "arch/aarch64-insn.h"
  27 #include "linux-aarch32-low.h"
  28 #include "elf/common.h"
  29 #include "ax.h"
  30 #include "tracepoint.h"
  31
  32 #include <signal.h>
  33 #include <sys/user.h>
  34 #include "nat/gdb_ptrace.h"
  35 #include <asm/ptrace.h>
  36 #include <inttypes.h>
  37 #include <endian.h>
  38 #include <sys/uio.h>
  39
  40 #include "gdb_proc_service.h"
  41
  42 /* Defined in auto-generated files.  */
  43 void init_registers_aarch64 (void);
  44 extern const struct target_desc *tdesc_aarch64;
  45
  46 #ifdef HAVE_SYS_REG_H
  47 #include <sys/reg.h>
  48 #endif
  49
  50 #define AARCH64_X_REGS_NUM 31
  51 #define AARCH64_V_REGS_NUM 32
  52 #define AARCH64_X0_REGNO    0
  53 #define AARCH64_SP_REGNO   31
  54 #define AARCH64_PC_REGNO   32
  55 #define AARCH64_CPSR_REGNO 33
  56 #define AARCH64_V0_REGNO   34
  57 #define AARCH64_FPSR_REGNO (AARCH64_V0_REGNO + AARCH64_V_REGS_NUM)
  58 #define AARCH64_FPCR_REGNO (AARCH64_V0_REGNO + AARCH64_V_REGS_NUM + 1)
  59
  60 #define AARCH64_NUM_REGS (AARCH64_V0_REGNO + AARCH64_V_REGS_NUM + 2)
  61
  62 /* Per-process arch-specific data we want to keep.  */
  63
  64 struct arch_process_info
  65 {
  66   /* Hardware breakpoint/watchpoint data.
  67      The reason for them to be per-process rather than per-thread is
  68      due to the lack of information in the gdbserver environment;
  69      gdbserver is not told that whether a requested hardware
  70      breakpoint/watchpoint is thread specific or not, so it has to set
  71      each hw bp/wp for every thread in the current process.  The
  72      higher level bp/wp management in gdb will resume a thread if a hw
  73      bp/wp trap is not expected for it.  Since the hw bp/wp setting is
  74      same for each thread, it is reasonable for the data to live here.
  75      */
  76   struct aarch64_debug_reg_state debug_reg_state;
  77 };
  78
  79 /* Return true if the size of register 0 is 8 byte.  */
  80
  81 static int
  82 is_64bit_tdesc (void)
  83 {
  84   struct regcache *regcache = get_thread_regcache (current_thread, 0);
  85
  86   return register_size (regcache->tdesc, 0) == 8;
  87 }
  88
  89 /* Implementation of linux_target_ops method "cannot_store_register".  */
  90
  91 static int
  92 aarch64_cannot_store_register (int regno)
  93 {
  94   return regno >= AARCH64_NUM_REGS;
  95 }
  96
  97 /* Implementation of linux_target_ops method "cannot_fetch_register".  */
  98
  99 static int
 100 aarch64_cannot_fetch_register (int regno)
 101 {
 102   return regno >= AARCH64_NUM_REGS;
 103 }
 104
 105 static void
 106 aarch64_fill_gregset (struct regcache *regcache, void *buf)
 107 {
 108   struct user_pt_regs *regset = buf;
 109   int i;
 110
 111   for (i = 0; i < AARCH64_X_REGS_NUM; i++)
 112     collect_register (regcache, AARCH64_X0_REGNO + i, &regset->regs[i]);
 113   collect_register (regcache, AARCH64_SP_REGNO, &regset->sp);
 114   collect_register (regcache, AARCH64_PC_REGNO, &regset->pc);
 115   collect_register (regcache, AARCH64_CPSR_REGNO, &regset->pstate);
 116 }
 117
 118 static void
 119 aarch64_store_gregset (struct regcache *regcache, const void *buf)
 120 {
 121   const struct user_pt_regs *regset = buf;
 122   int i;
 123
 124   for (i = 0; i < AARCH64_X_REGS_NUM; i++)
 125     supply_register (regcache, AARCH64_X0_REGNO + i, &regset->regs[i]);
 126   supply_register (regcache, AARCH64_SP_REGNO, &regset->sp);
 127   supply_register (regcache, AARCH64_PC_REGNO, &regset->pc);
 128   supply_register (regcache, AARCH64_CPSR_REGNO, &regset->pstate);
 129 }
 130
 131 static void
 132 aarch64_fill_fpregset (struct regcache *regcache, void *buf)
 133 {
 134   struct user_fpsimd_state *regset = buf;
 135   int i;
 136
 137   for (i = 0; i < AARCH64_V_REGS_NUM; i++)
 138     collect_register (regcache, AARCH64_V0_REGNO + i, &regset->vregs[i]);
 139   collect_register (regcache, AARCH64_FPSR_REGNO, &regset->fpsr);
 140   collect_register (regcache, AARCH64_FPCR_REGNO, &regset->fpcr);
 141 }
 142
 143 static void
 144 aarch64_store_fpregset (struct regcache *regcache, const void *buf)
 145 {
 146   const struct user_fpsimd_state *regset = buf;
 147   int i;
 148
 149   for (i = 0; i < AARCH64_V_REGS_NUM; i++)
 150     supply_register (regcache, AARCH64_V0_REGNO + i, &regset->vregs[i]);
 151   supply_register (regcache, AARCH64_FPSR_REGNO, &regset->fpsr);
 152   supply_register (regcache, AARCH64_FPCR_REGNO, &regset->fpcr);
 153 }
 154
 155 /* Enable miscellaneous debugging output.  The name is historical - it
 156    was originally used to debug LinuxThreads support.  */
 157 extern int debug_threads;
 158
 159 /* Implementation of linux_target_ops method "get_pc".  */
 160
 161 static CORE_ADDR
 162 aarch64_get_pc (struct regcache *regcache)
 163 {
 164   if (register_size (regcache->tdesc, 0) == 8)
 165     {
 166       unsigned long pc;
 167
 168       collect_register_by_name (regcache, "pc", &pc);
 169       if (debug_threads)
 170         debug_printf ("stop pc is %08lx\n", pc);
 171       return pc;
 172     }
 173   else
 174     {
 175       unsigned int pc;
 176
 177       collect_register_by_name (regcache, "pc", &pc);
 178       if (debug_threads)
 179         debug_printf ("stop pc is %04x\n", pc);
 180       return pc;
 181     }
 182 }
 183
 184 /* Implementation of linux_target_ops method "set_pc".  */
 185
 186 static void
 187 aarch64_set_pc (struct regcache *regcache, CORE_ADDR pc)
 188 {
 189   if (register_size (regcache->tdesc, 0) == 8)
 190     {
 191       unsigned long newpc = pc;
 192       supply_register_by_name (regcache, "pc", &newpc);
 193     }
 194   else
 195     {
 196       unsigned int newpc = pc;
 197       supply_register_by_name (regcache, "pc", &newpc);
 198     }
 199 }
 200
 201 #define aarch64_breakpoint_len 4
 202
 203 /* AArch64 BRK software debug mode instruction.
 204    This instruction needs to match gdb/aarch64-tdep.c
 205    (aarch64_default_breakpoint).  */
 206 static const gdb_byte aarch64_breakpoint[] = {0x00, 0x00, 0x20, 0xd4};
 207
 208 /* Implementation of linux_target_ops method "breakpoint_at".  */
 209
 210 static int
 211 aarch64_breakpoint_at (CORE_ADDR where)
 212 {
 213   gdb_byte insn[aarch64_breakpoint_len];
 214
 215   (*the_target->read_memory) (where, (unsigned char *) &insn,
 216                               aarch64_breakpoint_len);
 217   if (memcmp (insn, aarch64_breakpoint, aarch64_breakpoint_len) == 0)
 218     return 1;
 219
 220   return 0;
 221 }
 222
 223 static void
 224 aarch64_init_debug_reg_state (struct aarch64_debug_reg_state *state)
 225 {
 226   int i;
 227
 228   for (i = 0; i < AARCH64_HBP_MAX_NUM; ++i)
 229     {
 230       state->dr_addr_bp[i] = 0;
 231       state->dr_ctrl_bp[i] = 0;
 232       state->dr_ref_count_bp[i] = 0;
 233     }
 234
 235   for (i = 0; i < AARCH64_HWP_MAX_NUM; ++i)
 236     {
 237       state->dr_addr_wp[i] = 0;
 238       state->dr_ctrl_wp[i] = 0;
 239       state->dr_ref_count_wp[i] = 0;
 240     }
 241 }
 242
 243 /* Return the pointer to the debug register state structure in the
 244    current process' arch-specific data area.  */
 245
 246 struct aarch64_debug_reg_state *
 247 aarch64_get_debug_reg_state (pid_t pid)
 248 {
 249   struct process_info *proc = find_process_pid (pid);
 250
 251   return &proc->priv->arch_private->debug_reg_state;
 252 }
 253
 254 /* Implementation of linux_target_ops method "supports_z_point_type".  */
 255
 256 static int
 257 aarch64_supports_z_point_type (char z_type)
 258 {
 259   switch (z_type)
 260     {
 261     case Z_PACKET_SW_BP:
 262       {
 263         if (!extended_protocol && is_64bit_tdesc ())
 264           {
 265             /* Only enable Z0 packet in non-multi-arch debugging.  If
 266                extended protocol is used, don't enable Z0 packet because
 267                GDBserver may attach to 32-bit process.  */
 268             return 1;
 269           }
 270         else
 271           {
 272             /* Disable Z0 packet so that GDBserver doesn't have to handle
 273                different breakpoint instructions (aarch64, arm, thumb etc)
 274                in multi-arch debugging.  */
 275             return 0;
 276           }
 277       }
 278     case Z_PACKET_HW_BP:
 279     case Z_PACKET_WRITE_WP:
 280     case Z_PACKET_READ_WP:
 281     case Z_PACKET_ACCESS_WP:
 282       return 1;
 283     default:
 284       return 0;
 285     }
 286 }
 287
 288 /* Implementation of linux_target_ops method "insert_point".
 289
 290    It actually only records the info of the to-be-inserted bp/wp;
 291    the actual insertion will happen when threads are resumed.  */
 292
 293 static int
 294 aarch64_insert_point (enum raw_bkpt_type type, CORE_ADDR addr,
 295                       int len, struct raw_breakpoint *bp)
 296 {
 297   int ret;
 298   enum target_hw_bp_type targ_type;
 299   struct aarch64_debug_reg_state *state
 300     = aarch64_get_debug_reg_state (pid_of (current_thread));
 301
 302   if (show_debug_regs)
 303     fprintf (stderr, "insert_point on entry (addr=0x%08lx, len=%d)\n",
 304              (unsigned long) addr, len);
 305
 306   /* Determine the type from the raw breakpoint type.  */
 307   targ_type = raw_bkpt_type_to_target_hw_bp_type (type);
 308
 309   if (targ_type != hw_execute)
 310     {
 311       if (aarch64_linux_region_ok_for_watchpoint (addr, len))
 312         ret = aarch64_handle_watchpoint (targ_type, addr, len,
 313                                          1 /* is_insert */, state);
 314       else
 315         ret = -1;
 316     }
 317   else
 318     ret =
 319       aarch64_handle_breakpoint (targ_type, addr, len, 1 /* is_insert */,
 320                                  state);
 321
 322   if (show_debug_regs)
 323     aarch64_show_debug_reg_state (state, "insert_point", addr, len,
 324                                   targ_type);
 325
 326   return ret;
 327 }
 328
 329 /* Implementation of linux_target_ops method "remove_point".
 330
 331    It actually only records the info of the to-be-removed bp/wp,
 332    the actual removal will be done when threads are resumed.  */
 333
 334 static int
 335 aarch64_remove_point (enum raw_bkpt_type type, CORE_ADDR addr,
 336                       int len, struct raw_breakpoint *bp)
 337 {
 338   int ret;
 339   enum target_hw_bp_type targ_type;
 340   struct aarch64_debug_reg_state *state
 341     = aarch64_get_debug_reg_state (pid_of (current_thread));
 342
 343   if (show_debug_regs)
 344     fprintf (stderr, "remove_point on entry (addr=0x%08lx, len=%d)\n",
 345              (unsigned long) addr, len);
 346
 347   /* Determine the type from the raw breakpoint type.  */
 348   targ_type = raw_bkpt_type_to_target_hw_bp_type (type);
 349
 350   /* Set up state pointers.  */
 351   if (targ_type != hw_execute)
 352     ret =
 353       aarch64_handle_watchpoint (targ_type, addr, len, 0 /* is_insert */,
 354                                  state);
 355   else
 356     ret =
 357       aarch64_handle_breakpoint (targ_type, addr, len, 0 /* is_insert */,
 358                                  state);
 359
 360   if (show_debug_regs)
 361     aarch64_show_debug_reg_state (state, "remove_point", addr, len,
 362                                   targ_type);
 363
 364   return ret;
 365 }
 366
 367 /* Implementation of linux_target_ops method "stopped_data_address".  */
 368
 369 static CORE_ADDR
 370 aarch64_stopped_data_address (void)
 371 {
 372   siginfo_t siginfo;
 373   int pid, i;
 374   struct aarch64_debug_reg_state *state;
 375
 376   pid = lwpid_of (current_thread);
 377
 378   /* Get the siginfo.  */
 379   if (ptrace (PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0)
 380     return (CORE_ADDR) 0;
 381
 382   /* Need to be a hardware breakpoint/watchpoint trap.  */
 383   if (siginfo.si_signo != SIGTRAP
 384       || (siginfo.si_code & 0xffff) != 0x0004 /* TRAP_HWBKPT */)
 385     return (CORE_ADDR) 0;
 386
 387   /* Check if the address matches any watched address.  */
 388   state = aarch64_get_debug_reg_state (pid_of (current_thread));
 389   for (i = aarch64_num_wp_regs - 1; i >= 0; --i)
 390     {
 391       const unsigned int len = aarch64_watchpoint_length (state->dr_ctrl_wp[i]);
 392       const CORE_ADDR addr_trap = (CORE_ADDR) siginfo.si_addr;
 393       const CORE_ADDR addr_watch = state->dr_addr_wp[i];
 394       if (state->dr_ref_count_wp[i]
 395           && DR_CONTROL_ENABLED (state->dr_ctrl_wp[i])
 396           && addr_trap >= addr_watch
 397           && addr_trap < addr_watch + len)
 398         return addr_trap;
 399     }
 400
 401   return (CORE_ADDR) 0;
 402 }
 403
 404 /* Implementation of linux_target_ops method "stopped_by_watchpoint".  */
 405
 406 static int
 407 aarch64_stopped_by_watchpoint (void)
 408 {
 409   if (aarch64_stopped_data_address () != 0)
 410     return 1;
 411   else
 412     return 0;
 413 }
 414
 415 /* Fetch the thread-local storage pointer for libthread_db.  */
 416
 417 ps_err_e
 418 ps_get_thread_area (const struct ps_prochandle *ph,
 419                     lwpid_t lwpid, int idx, void **base)
 420 {
 421   return aarch64_ps_get_thread_area (ph, lwpid, idx, base,
 422                                      is_64bit_tdesc ());
 423 }
 424
 425 /* Implementation of linux_target_ops method "siginfo_fixup".  */
 426
 427 static int
 428 aarch64_linux_siginfo_fixup (siginfo_t *native, void *inf, int direction)
 429 {
 430   /* Is the inferior 32-bit?  If so, then fixup the siginfo object.  */
 431   if (!is_64bit_tdesc ())
 432     {
 433       if (direction == 0)
 434         aarch64_compat_siginfo_from_siginfo ((struct compat_siginfo *) inf,
 435                                              native);
 436       else
 437         aarch64_siginfo_from_compat_siginfo (native,
 438                                              (struct compat_siginfo *) inf);
 439
 440       return 1;
 441     }
 442
 443   return 0;
 444 }
 445
 446 /* Implementation of linux_target_ops method "linux_new_process".  */
 447
 448 static struct arch_process_info *
 449 aarch64_linux_new_process (void)
 450 {
 451   struct arch_process_info *info = XCNEW (struct arch_process_info);
 452
 453   aarch64_init_debug_reg_state (&info->debug_reg_state);
 454
 455   return info;
 456 }
 457
 458 /* Implementation of linux_target_ops method "linux_new_fork".  */
 459
 460 static void
 461 aarch64_linux_new_fork (struct process_info *parent,
 462                         struct process_info *child)
 463 {
 464   /* These are allocated by linux_add_process.  */
 465   gdb_assert (parent->priv != NULL
 466               && parent->priv->arch_private != NULL);
 467   gdb_assert (child->priv != NULL
 468               && child->priv->arch_private != NULL);
 469
 470   /* Linux kernel before 2.6.33 commit
 471      72f674d203cd230426437cdcf7dd6f681dad8b0d
 472      will inherit hardware debug registers from parent
 473      on fork/vfork/clone.  Newer Linux kernels create such tasks with
 474      zeroed debug registers.
 475
 476      GDB core assumes the child inherits the watchpoints/hw
 477      breakpoints of the parent, and will remove them all from the
 478      forked off process.  Copy the debug registers mirrors into the
 479      new process so that all breakpoints and watchpoints can be
 480      removed together.  The debug registers mirror will become zeroed
 481      in the end before detaching the forked off process, thus making
 482      this compatible with older Linux kernels too.  */
 483
 484   *child->priv->arch_private = *parent->priv->arch_private;
 485 }
 486
 487 /* Return the right target description according to the ELF file of
 488    current thread.  */
 489
 490 static const struct target_desc *
 491 aarch64_linux_read_description (void)
 492 {
 493   unsigned int machine;
 494   int is_elf64;
 495   int tid;
 496
 497   tid = lwpid_of (current_thread);
 498
 499   is_elf64 = linux_pid_exe_is_elf_64_file (tid, &machine);
 500
 501   if (is_elf64)
 502     return tdesc_aarch64;
 503   else
 504     return tdesc_arm_with_neon;
 505 }
 506
 507 /* Implementation of linux_target_ops method "arch_setup".  */
 508
 509 static void
 510 aarch64_arch_setup (void)
 511 {
 512   current_process ()->tdesc = aarch64_linux_read_description ();
 513
 514   aarch64_linux_get_debug_reg_capacity (lwpid_of (current_thread));
 515 }
 516
 517 static struct regset_info aarch64_regsets[] =
 518 {
 519   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_PRSTATUS,
 520     sizeof (struct user_pt_regs), GENERAL_REGS,
 521     aarch64_fill_gregset, aarch64_store_gregset },
 522   { PTRACE_GETREGSET, PTRACE_SETREGSET, NT_FPREGSET,
 523     sizeof (struct user_fpsimd_state), FP_REGS,
 524     aarch64_fill_fpregset, aarch64_store_fpregset
 525   },
 526   { 0, 0, 0, -1, -1, NULL, NULL }
 527 };
 528
 529 static struct regsets_info aarch64_regsets_info =
 530   {
 531     aarch64_regsets, /* regsets */
 532     0, /* num_regsets */
 533     NULL, /* disabled_regsets */
 534   };
 535
 536 static struct regs_info regs_info_aarch64 =
 537   {
 538     NULL, /* regset_bitmap */
 539     NULL, /* usrregs */
 540     &aarch64_regsets_info,
 541   };
 542
 543 /* Implementation of linux_target_ops method "regs_info".  */
 544
 545 static const struct regs_info *
 546 aarch64_regs_info (void)
 547 {
 548   if (is_64bit_tdesc ())
 549     return &regs_info_aarch64;
 550   else
 551     return &regs_info_aarch32;
 552 }
 553
 554 /* Implementation of linux_target_ops method "supports_tracepoints".  */
 555
 556 static int
 557 aarch64_supports_tracepoints (void)
 558 {
 559   if (current_thread == NULL)
 560     return 1;
 561   else
 562     {
 563       /* We don't support tracepoints on aarch32 now.  */
 564       return is_64bit_tdesc ();
 565     }
 566 }
 567
 568 /* Implementation of linux_target_ops method "get_thread_area".  */
 569
 570 static int
 571 aarch64_get_thread_area (int lwpid, CORE_ADDR *addrp)
 572 {
 573   struct iovec iovec;
 574   uint64_t reg;
 575
 576   iovec.iov_base = &reg;
 577   iovec.iov_len = sizeof (reg);
 578
 579   if (ptrace (PTRACE_GETREGSET, lwpid, NT_ARM_TLS, &iovec) != 0)
 580     return -1;
 581
 582   *addrp = reg;
 583
 584   return 0;
 585 }
 586
 587 /* Extract a signed value from a bit field within an instruction
 588    encoding.
 589
 590    INSN is the instruction opcode.
 591
 592    WIDTH specifies the width of the bit field to extract (in bits).
 593
 594    OFFSET specifies the least significant bit of the field where bits
 595    are numbered zero counting from least to most significant.  */
 596
 597 static int32_t
 598 extract_signed_bitfield (uint32_t insn, unsigned width, unsigned offset)
 599 {
 600   unsigned shift_l = sizeof (int32_t) * 8 - (offset + width);
 601   unsigned shift_r = sizeof (int32_t) * 8 - width;
 602
 603   return ((int32_t) insn << shift_l) >> shift_r;
 604 }
 605
 606 /* Decode an opcode if it represents an LDR or LDRSW instruction taking a
 607    literal offset from the current PC.
 608
 609    ADDR specifies the address of the opcode.
 610    INSN specifies the opcode to test.
 611    IS_W is set if the instruction is LDRSW.
 612    IS64 receives size field from the decoded instruction.
 613    RT receives the 'rt' field from the decoded instruction.
 614    OFFSET receives the 'imm' field from the decoded instruction.
 615
 616    Return 1 if the opcodes matches and is decoded, otherwise 0.  */
 617
 618 int
 619 aarch64_decode_ldr_literal (CORE_ADDR addr, uint32_t insn, int *is_w,
 620                             int *is64, unsigned *rt, int32_t *offset)
 621 {
 622   /* LDR    0T01 1000 iiii iiii iiii iiii iiir rrrr */
 623   /* LDRSW  1001 1000 iiii iiii iiii iiii iiir rrrr */
 624   if ((insn & 0x3f000000) == 0x18000000)
 625     {
 626       *is_w = (insn >> 31) & 0x1;
 627
 628       if (*is_w)
 629         {
 630           /* LDRSW always takes a 64-bit destination registers.  */
 631           *is64 = 1;
 632         }
 633       else
 634         *is64 = (insn >> 30) & 0x1;
 635
 636       *rt = (insn >> 0) & 0x1f;
 637       *offset = extract_signed_bitfield (insn, 19, 5) << 2;
 638
 639       if (aarch64_debug)
 640         debug_printf ("decode: %s 0x%x %s %s%u, #?\n",
 641                       core_addr_to_string_nz (addr), insn,
 642                       *is_w ? "ldrsw" : "ldr",
 643                       *is64 ? "x" : "w", *rt);
 644
 645       return 1;
 646     }
 647
 648   return 0;
 649 }
 650
 651 /* List of opcodes that we need for building the jump pad and relocating
 652    an instruction.  */
 653
 654 enum aarch64_opcodes
 655 {
 656   /* B              0001 01ii iiii iiii iiii iiii iiii iiii */
 657   /* BL             1001 01ii iiii iiii iiii iiii iiii iiii */
 658   /* B.COND         0101 0100 iiii iiii iiii iiii iii0 cccc */
 659   /* CBZ            s011 0100 iiii iiii iiii iiii iiir rrrr */
 660   /* CBNZ           s011 0101 iiii iiii iiii iiii iiir rrrr */
 661   /* TBZ            b011 0110 bbbb biii iiii iiii iiir rrrr */
 662   /* TBNZ           b011 0111 bbbb biii iiii iiii iiir rrrr */
 663   B               = 0x14000000,
 664   BL              = 0x80000000 | B,
 665   BCOND           = 0x40000000 | B,
 666   CBZ             = 0x20000000 | B,
 667   CBNZ            = 0x21000000 | B,
 668   TBZ             = 0x36000000 | B,
 669   TBNZ            = 0x37000000 | B,
 670   /* BLR            1101 0110 0011 1111 0000 00rr rrr0 0000 */
 671   BLR             = 0xd63f0000,
 672   /* RET            1101 0110 0101 1111 0000 00rr rrr0 0000 */
 673   RET             = 0xd65f0000,
 674   /* STP            s010 100o o0ii iiii irrr rrrr rrrr rrrr */
 675   /* LDP            s010 100o o1ii iiii irrr rrrr rrrr rrrr */
 676   /* STP (SIMD&VFP) ss10 110o o0ii iiii irrr rrrr rrrr rrrr */
 677   /* LDP (SIMD&VFP) ss10 110o o1ii iiii irrr rrrr rrrr rrrr */
 678   STP             = 0x28000000,
 679   LDP             = 0x28400000,
 680   STP_SIMD_VFP    = 0x04000000 | STP,
 681   LDP_SIMD_VFP    = 0x04000000 | LDP,
 682   /* STR            ss11 100o 00xi iiii iiii xxrr rrrr rrrr */
 683   /* LDR            ss11 100o 01xi iiii iiii xxrr rrrr rrrr */
 684   /* LDRSW          1011 100o 10xi iiii iiii xxrr rrrr rrrr */
 685   STR             = 0x38000000,
 686   LDR             = 0x00400000 | STR,
 687   LDRSW           = 0x80800000 | STR,
 688   /* LDAXR          ss00 1000 0101 1111 1111 11rr rrrr rrrr */
 689   LDAXR           = 0x085ffc00,
 690   /* STXR           ss00 1000 000r rrrr 0111 11rr rrrr rrrr */
 691   STXR            = 0x08007c00,
 692   /* STLR           ss00 1000 1001 1111 1111 11rr rrrr rrrr */
 693   STLR            = 0x089ffc00,
 694   /* MOV            s101 0010 1xxi iiii iiii iiii iiir rrrr */
 695   /* MOVK           s111 0010 1xxi iiii iiii iiii iiir rrrr */
 696   MOV             = 0x52800000,
 697   MOVK            = 0x20000000 | MOV,
 698   /* ADD            s00o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
 699   /* SUB            s10o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
 700   /* SUBS           s11o ooo1 xxxx xxxx xxxx xxxx xxxx xxxx */
 701   ADD             = 0x01000000,
 702   SUB             = 0x40000000 | ADD,
 703   SUBS            = 0x20000000 | SUB,
 704   /* AND            s000 1010 xx0x xxxx xxxx xxxx xxxx xxxx */
 705   /* ORR            s010 1010 xx0x xxxx xxxx xxxx xxxx xxxx */
 706   /* ORN            s010 1010 xx1x xxxx xxxx xxxx xxxx xxxx */
 707   /* EOR            s100 1010 xx0x xxxx xxxx xxxx xxxx xxxx */
 708   AND             = 0x0a000000,
 709   ORR             = 0x20000000 | AND,
 710   ORN             = 0x00200000 | ORR,
 711   EOR             = 0x40000000 | AND,
 712   /* LSLV           s001 1010 110r rrrr 0010 00rr rrrr rrrr */
 713   /* LSRV           s001 1010 110r rrrr 0010 01rr rrrr rrrr */
 714   /* ASRV           s001 1010 110r rrrr 0010 10rr rrrr rrrr */
 715   LSLV             = 0x1ac02000,
 716   LSRV             = 0x00000400 | LSLV,
 717   ASRV             = 0x00000800 | LSLV,
 718   /* SBFM           s001 0011 0nii iiii iiii iirr rrrr rrrr */
 719   SBFM            = 0x13000000,
 720   /* UBFM           s101 0011 0nii iiii iiii iirr rrrr rrrr */
 721   UBFM            = 0x40000000 | SBFM,
 722   /* CSINC          s001 1010 100r rrrr cccc 01rr rrrr rrrr */
 723   CSINC           = 0x9a800400,
 724   /* MUL            s001 1011 000r rrrr 0111 11rr rrrr rrrr */
 725   MUL             = 0x1b007c00,
 726   /* MSR (register) 1101 0101 0001 oooo oooo oooo ooor rrrr */
 727   /* MRS            1101 0101 0011 oooo oooo oooo ooor rrrr */
 728   MSR             = 0xd5100000,
 729   MRS             = 0x00200000 | MSR,
 730   /* HINT           1101 0101 0000 0011 0010 oooo ooo1 1111 */
 731   HINT            = 0xd503201f,
 732   SEVL            = (5 << 5) | HINT,
 733   WFE             = (2 << 5) | HINT,
 734   NOP             = (0 << 5) | HINT,
 735 };
 736
 737 /* List of condition codes that we need.  */
 738
 739 enum aarch64_condition_codes
 740 {
 741   EQ = 0x0,
 742   NE = 0x1,
 743   LO = 0x3,
 744   GE = 0xa,
 745   LT = 0xb,
 746   GT = 0xc,
 747   LE = 0xd,
 748 };
 749
 750 /* Representation of a general purpose register of the form xN or wN.
 751
 752    This type is used by emitting functions that take registers as operands.  */
 753
 754 struct aarch64_register
 755 {
 756   unsigned num;
 757   int is64;
 758 };
 759
 760 /* Representation of an operand.  At this time, it only supports register
 761    and immediate types.  */
 762
 763 struct aarch64_operand
 764 {
 765   /* Type of the operand.  */
 766   enum
 767     {
 768       OPERAND_IMMEDIATE,
 769       OPERAND_REGISTER,
 770     } type;
 771   /* Value of the operand according to the type.  */
 772   union
 773     {
 774       uint32_t imm;
 775       struct aarch64_register reg;
 776     };
 777 };
 778
 779 /* List of registers that we are currently using, we can add more here as
 780    we need to use them.  */
 781
 782 /* General purpose scratch registers (64 bit).  */
 783 static const struct aarch64_register x0 = { 0, 1 };
 784 static const struct aarch64_register x1 = { 1, 1 };
 785 static const struct aarch64_register x2 = { 2, 1 };
 786 static const struct aarch64_register x3 = { 3, 1 };
 787 static const struct aarch64_register x4 = { 4, 1 };
 788
 789 /* General purpose scratch registers (32 bit).  */
 790 static const struct aarch64_register w0 = { 0, 0 };
 791 static const struct aarch64_register w2 = { 2, 0 };
 792
 793 /* Intra-procedure scratch registers.  */
 794 static const struct aarch64_register ip0 = { 16, 1 };
 795
 796 /* Special purpose registers.  */
 797 static const struct aarch64_register fp = { 29, 1 };
 798 static const struct aarch64_register lr = { 30, 1 };
 799 static const struct aarch64_register sp = { 31, 1 };
 800 static const struct aarch64_register xzr = { 31, 1 };
 801
 802 /* Dynamically allocate a new register.  If we know the register
 803    statically, we should make it a global as above instead of using this
 804    helper function.  */
 805
 806 static struct aarch64_register
 807 aarch64_register (unsigned num, int is64)
 808 {
 809   return (struct aarch64_register) { num, is64 };
 810 }
 811
 812 /* Helper function to create a register operand, for instructions with
 813    different types of operands.
 814
 815    For example:
 816    p += emit_mov (p, x0, register_operand (x1));  */
 817
 818 static struct aarch64_operand
 819 register_operand (struct aarch64_register reg)
 820 {
 821   struct aarch64_operand operand;
 822
 823   operand.type = OPERAND_REGISTER;
 824   operand.reg = reg;
 825
 826   return operand;
 827 }
 828
 829 /* Helper function to create an immediate operand, for instructions with
 830    different types of operands.
 831
 832    For example:
 833    p += emit_mov (p, x0, immediate_operand (12));  */
 834
 835 static struct aarch64_operand
 836 immediate_operand (uint32_t imm)
 837 {
 838   struct aarch64_operand operand;
 839
 840   operand.type = OPERAND_IMMEDIATE;
 841   operand.imm = imm;
 842
 843   return operand;
 844 }
 845
 846 /* Representation of a memory operand, used for load and store
 847    instructions.
 848
 849    The types correspond to the following variants:
 850
 851    MEMORY_OPERAND_OFFSET:    LDR rt, [rn, #offset]
 852    MEMORY_OPERAND_PREINDEX:  LDR rt, [rn, #index]!
 853    MEMORY_OPERAND_POSTINDEX: LDR rt, [rn], #index  */
 854
 855 struct aarch64_memory_operand
 856 {
 857   /* Type of the operand.  */
 858   enum
 859     {
 860       MEMORY_OPERAND_OFFSET,
 861       MEMORY_OPERAND_PREINDEX,
 862       MEMORY_OPERAND_POSTINDEX,
 863     } type;
 864   /* Index from the base register.  */
 865   int32_t index;
 866 };
 867
 868 /* Helper function to create an offset memory operand.
 869
 870    For example:
 871    p += emit_ldr (p, x0, sp, offset_memory_operand (16));  */
 872
 873 static struct aarch64_memory_operand
 874 offset_memory_operand (int32_t offset)
 875 {
 876   return (struct aarch64_memory_operand) { MEMORY_OPERAND_OFFSET, offset };
 877 }
 878
 879 /* Helper function to create a pre-index memory operand.
 880
 881    For example:
 882    p += emit_ldr (p, x0, sp, preindex_memory_operand (16));  */
 883
 884 static struct aarch64_memory_operand
 885 preindex_memory_operand (int32_t index)
 886 {
 887   return (struct aarch64_memory_operand) { MEMORY_OPERAND_PREINDEX, index };
 888 }
 889
 890 /* Helper function to create a post-index memory operand.
 891
 892    For example:
 893    p += emit_ldr (p, x0, sp, postindex_memory_operand (16));  */
 894
 895 static struct aarch64_memory_operand
 896 postindex_memory_operand (int32_t index)
 897 {
 898   return (struct aarch64_memory_operand) { MEMORY_OPERAND_POSTINDEX, index };
 899 }
 900
 901 /* System control registers.  These special registers can be written and
 902    read with the MRS and MSR instructions.
 903
 904    - NZCV: Condition flags.  GDB refers to this register under the CPSR
 905            name.
 906    - FPSR: Floating-point status register.
 907    - FPCR: Floating-point control registers.
 908    - TPIDR_EL0: Software thread ID register.  */
 909
 910 enum aarch64_system_control_registers
 911 {
 912   /*          op0           op1           crn          crm          op2  */
 913   NZCV =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x2 << 3) | 0x0,
 914   FPSR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x1,
 915   FPCR =      (0x1 << 14) | (0x3 << 11) | (0x4 << 7) | (0x4 << 3) | 0x0,
 916   TPIDR_EL0 = (0x1 << 14) | (0x3 << 11) | (0xd << 7) | (0x0 << 3) | 0x2
 917 };
 918
 919 /* Helper macro to mask and shift a value into a bitfield.  */
 920
 921 #define ENCODE(val, size, offset) \
 922   ((uint32_t) ((val & ((1ULL << size) - 1)) << offset))
 923
 924 /* Write a 32-bit unsigned integer INSN info *BUF.  Return the number of
 925    instructions written (aka. 1).  */
 926
 927 static int
 928 emit_insn (uint32_t *buf, uint32_t insn)
 929 {
 930   *buf = insn;
 931   return 1;
 932 }
 933
 934 /* Write a B or BL instruction into *BUF.
 935
 936      B  #offset
 937      BL #offset
 938
 939    IS_BL specifies if the link register should be updated.
 940    OFFSET is the immediate offset from the current PC.  It is
 941    byte-addressed but should be 4 bytes aligned.  It has a limited range of
 942    +/- 128MB (26 bits << 2).  */
 943
 944 static int
 945 emit_b (uint32_t *buf, int is_bl, int32_t offset)
 946 {
 947   uint32_t imm26 = ENCODE (offset >> 2, 26, 0);
 948
 949   if (is_bl)
 950     return emit_insn (buf, BL | imm26);
 951   else
 952     return emit_insn (buf, B | imm26);
 953 }
 954
 955 /* Write a BCOND instruction into *BUF.
 956
 957      B.COND #offset
 958
 959    COND specifies the condition field.
 960    OFFSET is the immediate offset from the current PC.  It is
 961    byte-addressed but should be 4 bytes aligned.  It has a limited range of
 962    +/- 1MB (19 bits << 2).  */
 963
 964 static int
 965 emit_bcond (uint32_t *buf, unsigned cond, int32_t offset)
 966 {
 967   return emit_insn (buf, BCOND | ENCODE (offset >> 2, 19, 5)
 968                     | ENCODE (cond, 4, 0));
 969 }
 970
 971 /* Write a CBZ or CBNZ instruction into *BUF.
 972
 973      CBZ  rt, #offset
 974      CBNZ rt, #offset
 975
 976    IS_CBNZ distinguishes between CBZ and CBNZ instructions.
 977    RN is the register to test.
 978    OFFSET is the immediate offset from the current PC.  It is
 979    byte-addressed but should be 4 bytes aligned.  It has a limited range of
 980    +/- 1MB (19 bits << 2).  */
 981
 982 static int
 983 emit_cb (uint32_t *buf, int is_cbnz, struct aarch64_register rt,
 984          int32_t offset)
 985 {
 986   uint32_t imm19 = ENCODE (offset >> 2, 19, 5);
 987   uint32_t sf = ENCODE (rt.is64, 1, 31);
 988
 989   if (is_cbnz)
 990     return emit_insn (buf, CBNZ | sf | imm19 | ENCODE (rt.num, 5, 0));
 991   else
 992     return emit_insn (buf, CBZ | sf | imm19 | ENCODE (rt.num, 5, 0));
 993 }
 994
 995 /* Write a TBZ or TBNZ instruction into *BUF.
 996
 997      TBZ  rt, #bit, #offset
 998      TBNZ rt, #bit, #offset
 999
1000    IS_TBNZ distinguishes between TBZ and TBNZ instructions.
1001    RT is the register to test.
1002    BIT is the index of the bit to test in register RT.
1003    OFFSET is the immediate offset from the current PC.  It is
1004    byte-addressed but should be 4 bytes aligned.  It has a limited range of
1005    +/- 32KB (14 bits << 2).  */
1006
1007 static int
1008 emit_tb (uint32_t *buf, int is_tbnz, unsigned bit,
1009          struct aarch64_register rt, int32_t offset)
1010 {
1011   uint32_t imm14 = ENCODE (offset >> 2, 14, 5);
1012   uint32_t b40 = ENCODE (bit, 5, 19);
1013   uint32_t b5 = ENCODE (bit >> 5, 1, 31);
1014
1015   if (is_tbnz)
1016     return emit_insn (buf, TBNZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0));
1017   else
1018     return emit_insn (buf, TBZ | b5 | b40 | imm14 | ENCODE (rt.num, 5, 0));
1019 }
1020
1021 /* Write a BLR instruction into *BUF.
1022
1023      BLR rn
1024
1025    RN is the register to branch to.  */
1026
1027 static int
1028 emit_blr (uint32_t *buf, struct aarch64_register rn)
1029 {
1030   return emit_insn (buf, BLR | ENCODE (rn.num, 5, 5));
1031 }
1032
1033 /* Write a RET instruction into *BUF.
1034
1035      RET xn
1036
1037    RN is the register to branch to.  */
1038
1039 static int
1040 emit_ret (uint32_t *buf, struct aarch64_register rn)
1041 {
1042   return emit_insn (buf, RET | ENCODE (rn.num, 5, 5));
1043 }
1044
1045 static int
1046 emit_load_store_pair (uint32_t *buf, enum aarch64_opcodes opcode,
1047                       struct aarch64_register rt,
1048                       struct aarch64_register rt2,
1049                       struct aarch64_register rn,
1050                       struct aarch64_memory_operand operand)
1051 {
1052   uint32_t opc;
1053   uint32_t pre_index;
1054   uint32_t write_back;
1055
1056   if (rt.is64)
1057     opc = ENCODE (2, 2, 30);
1058   else
1059     opc = ENCODE (0, 2, 30);
1060
1061   switch (operand.type)
1062     {
1063     case MEMORY_OPERAND_OFFSET:
1064       {
1065         pre_index = ENCODE (1, 1, 24);
1066         write_back = ENCODE (0, 1, 23);
1067         break;
1068       }
1069     case MEMORY_OPERAND_POSTINDEX:
1070       {
1071         pre_index = ENCODE (0, 1, 24);
1072         write_back = ENCODE (1, 1, 23);
1073         break;
1074       }
1075     case MEMORY_OPERAND_PREINDEX:
1076       {
1077         pre_index = ENCODE (1, 1, 24);
1078         write_back = ENCODE (1, 1, 23);
1079         break;
1080       }
1081     default:
1082       return 0;
1083     }
1084
1085   return emit_insn (buf, opcode | opc | pre_index | write_back
1086                     | ENCODE (operand.index >> 3, 7, 15) | ENCODE (rt2.num, 5, 10)
1087                     | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1088 }
1089
1090 /* Write a STP instruction into *BUF.
1091
1092      STP rt, rt2, [rn, #offset]
1093      STP rt, rt2, [rn, #index]!
1094      STP rt, rt2, [rn], #index
1095
1096    RT and RT2 are the registers to store.
1097    RN is the base address register.
1098    OFFSET is the immediate to add to the base address.  It is limited to a
1099    -512 .. 504 range (7 bits << 3).  */
1100
1101 static int
1102 emit_stp (uint32_t *buf, struct aarch64_register rt,
1103           struct aarch64_register rt2, struct aarch64_register rn,
1104           struct aarch64_memory_operand operand)
1105 {
1106   return emit_load_store_pair (buf, STP, rt, rt2, rn, operand);
1107 }
1108
1109 /* Write a LDP instruction into *BUF.
1110
1111      LDP rt, rt2, [rn, #offset]
1112      LDP rt, rt2, [rn, #index]!
1113      LDP rt, rt2, [rn], #index
1114
1115    RT and RT2 are the registers to store.
1116    RN is the base address register.
1117    OFFSET is the immediate to add to the base address.  It is limited to a
1118    -512 .. 504 range (7 bits << 3).  */
1119
1120 static int
1121 emit_ldp (uint32_t *buf, struct aarch64_register rt,
1122           struct aarch64_register rt2, struct aarch64_register rn,
1123           struct aarch64_memory_operand operand)
1124 {
1125   return emit_load_store_pair (buf, LDP, rt, rt2, rn, operand);
1126 }
1127
1128 /* Write a LDP (SIMD&VFP) instruction using Q registers into *BUF.
1129
1130      LDP qt, qt2, [rn, #offset]
1131
1132    RT and RT2 are the Q registers to store.
1133    RN is the base address register.
1134    OFFSET is the immediate to add to the base address.  It is limited to
1135    -1024 .. 1008 range (7 bits << 4).  */
1136
1137 static int
1138 emit_ldp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
1139                    struct aarch64_register rn, int32_t offset)
1140 {
1141   uint32_t opc = ENCODE (2, 2, 30);
1142   uint32_t pre_index = ENCODE (1, 1, 24);
1143
1144   return emit_insn (buf, LDP_SIMD_VFP | opc | pre_index
1145                     | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10)
1146                     | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
1147 }
1148
1149 /* Write a STP (SIMD&VFP) instruction using Q registers into *BUF.
1150
1151      STP qt, qt2, [rn, #offset]
1152
1153    RT and RT2 are the Q registers to store.
1154    RN is the base address register.
1155    OFFSET is the immediate to add to the base address.  It is limited to
1156    -1024 .. 1008 range (7 bits << 4).  */
1157
1158 static int
1159 emit_stp_q_offset (uint32_t *buf, unsigned rt, unsigned rt2,
1160                    struct aarch64_register rn, int32_t offset)
1161 {
1162   uint32_t opc = ENCODE (2, 2, 30);
1163   uint32_t pre_index = ENCODE (1, 1, 24);
1164
1165   return emit_insn (buf, STP_SIMD_VFP | opc | pre_index
1166                     | ENCODE (offset >> 4, 7, 15) | ENCODE (rt2, 5, 10)
1167                     | ENCODE (rn.num, 5, 5) | ENCODE (rt, 5, 0));
1168 }
1169
1170 /* Helper function emitting a load or store instruction.  */
1171
1172 static int
1173 emit_load_store (uint32_t *buf, uint32_t size, enum aarch64_opcodes opcode,
1174                  struct aarch64_register rt, struct aarch64_register rn,
1175                  struct aarch64_memory_operand operand)
1176 {
1177   uint32_t op;
1178
1179   switch (operand.type)
1180     {
1181     case MEMORY_OPERAND_OFFSET:
1182       {
1183         op = ENCODE (1, 1, 24);
1184
1185         return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op
1186                           | ENCODE (operand.index >> 3, 12, 10)
1187                           | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1188       }
1189     case MEMORY_OPERAND_POSTINDEX:
1190       {
1191         uint32_t post_index = ENCODE (1, 2, 10);
1192
1193         op = ENCODE (0, 1, 24);
1194
1195         return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op
1196                           | post_index | ENCODE (operand.index, 9, 12)
1197                           | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1198       }
1199     case MEMORY_OPERAND_PREINDEX:
1200       {
1201         uint32_t pre_index = ENCODE (3, 2, 10);
1202
1203         op = ENCODE (0, 1, 24);
1204
1205         return emit_insn (buf, opcode | ENCODE (size, 2, 30) | op
1206                           | pre_index | ENCODE (operand.index, 9, 12)
1207                           | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1208       }
1209     default:
1210       return 0;
1211     }
1212 }
1213
1214 /* Write a LDR instruction into *BUF.
1215
1216      LDR rt, [rn, #offset]
1217      LDR rt, [rn, #index]!
1218      LDR rt, [rn], #index
1219
1220    RT is the register to store.
1221    RN is the base address register.
1222    OFFSET is the immediate to add to the base address.  It is limited to
1223    0 .. 32760 range (12 bits << 3).  */
1224
1225 static int
1226 emit_ldr (uint32_t *buf, struct aarch64_register rt,
1227           struct aarch64_register rn, struct aarch64_memory_operand operand)
1228 {
1229   return emit_load_store (buf, rt.is64 ? 3 : 2, LDR, rt, rn, operand);
1230 }
1231
1232 /* Write a LDRH instruction into *BUF.
1233
1234      LDRH wt, [xn, #offset]
1235      LDRH wt, [xn, #index]!
1236      LDRH wt, [xn], #index
1237
1238    RT is the register to store.
1239    RN is the base address register.
1240    OFFSET is the immediate to add to the base address.  It is limited to
1241    0 .. 32760 range (12 bits << 3).  */
1242
1243 static int
1244 emit_ldrh (uint32_t *buf, struct aarch64_register rt,
1245            struct aarch64_register rn,
1246            struct aarch64_memory_operand operand)
1247 {
1248   return emit_load_store (buf, 1, LDR, rt, rn, operand);
1249 }
1250
1251 /* Write a LDRB instruction into *BUF.
1252
1253      LDRB wt, [xn, #offset]
1254      LDRB wt, [xn, #index]!
1255      LDRB wt, [xn], #index
1256
1257    RT is the register to store.
1258    RN is the base address register.
1259    OFFSET is the immediate to add to the base address.  It is limited to
1260    0 .. 32760 range (12 bits << 3).  */
1261
1262 static int
1263 emit_ldrb (uint32_t *buf, struct aarch64_register rt,
1264            struct aarch64_register rn,
1265            struct aarch64_memory_operand operand)
1266 {
1267   return emit_load_store (buf, 0, LDR, rt, rn, operand);
1268 }
1269
1270 /* Write a LDRSW instruction into *BUF.  The register size is 64-bit.
1271
1272      LDRSW xt, [rn, #offset]
1273      LDRSW xt, [rn, #index]!
1274      LDRSW xt, [rn], #index
1275
1276    RT is the register to store.
1277    RN is the base address register.
1278    OFFSET is the immediate to add to the base address.  It is limited to
1279    0 .. 16380 range (12 bits << 2).  */
1280
1281 static int
1282 emit_ldrsw (uint32_t *buf, struct aarch64_register rt,
1283                    struct aarch64_register rn,
1284                    struct aarch64_memory_operand operand)
1285 {
1286   return emit_load_store (buf, 3, LDRSW, rt, rn, operand);
1287 }
1288
1289 /* Write a STR instruction into *BUF.
1290
1291      STR rt, [rn, #offset]
1292      STR rt, [rn, #index]!
1293      STR rt, [rn], #index
1294
1295    RT is the register to store.
1296    RN is the base address register.
1297    OFFSET is the immediate to add to the base address.  It is limited to
1298    0 .. 32760 range (12 bits << 3).  */
1299
1300 static int
1301 emit_str (uint32_t *buf, struct aarch64_register rt,
1302           struct aarch64_register rn,
1303           struct aarch64_memory_operand operand)
1304 {
1305   return emit_load_store (buf, rt.is64 ? 3 : 2, STR, rt, rn, operand);
1306 }
1307
1308 /* Helper function emitting an exclusive load or store instruction.  */
1309
1310 static int
1311 emit_load_store_exclusive (uint32_t *buf, uint32_t size,
1312                            enum aarch64_opcodes opcode,
1313                            struct aarch64_register rs,
1314                            struct aarch64_register rt,
1315                            struct aarch64_register rt2,
1316                            struct aarch64_register rn)
1317 {
1318   return emit_insn (buf, opcode | ENCODE (size, 2, 30)
1319                     | ENCODE (rs.num, 5, 16) | ENCODE (rt2.num, 5, 10)
1320                     | ENCODE (rn.num, 5, 5) | ENCODE (rt.num, 5, 0));
1321 }
1322
1323 /* Write a LAXR instruction into *BUF.
1324
1325      LDAXR rt, [xn]
1326
1327    RT is the destination register.
1328    RN is the base address register.  */
1329
1330 static int
1331 emit_ldaxr (uint32_t *buf, struct aarch64_register rt,
1332             struct aarch64_register rn)
1333 {
1334   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, LDAXR, xzr, rt,
1335                                     xzr, rn);
1336 }
1337
1338 /* Write a STXR instruction into *BUF.
1339
1340      STXR ws, rt, [xn]
1341
1342    RS is the result register, it indicates if the store succeeded or not.
1343    RT is the destination register.
1344    RN is the base address register.  */
1345
1346 static int
1347 emit_stxr (uint32_t *buf, struct aarch64_register rs,
1348            struct aarch64_register rt, struct aarch64_register rn)
1349 {
1350   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STXR, rs, rt,
1351                                     xzr, rn);
1352 }
1353
1354 /* Write a STLR instruction into *BUF.
1355
1356      STLR rt, [xn]
1357
1358    RT is the register to store.
1359    RN is the base address register.  */
1360
1361 static int
1362 emit_stlr (uint32_t *buf, struct aarch64_register rt,
1363            struct aarch64_register rn)
1364 {
1365   return emit_load_store_exclusive (buf, rt.is64 ? 3 : 2, STLR, xzr, rt,
1366                                     xzr, rn);
1367 }
1368
1369 /* Helper function for data processing instructions with register sources.  */
1370
1371 static int
1372 emit_data_processing_reg (uint32_t *buf, enum aarch64_opcodes opcode,
1373                           struct aarch64_register rd,
1374                           struct aarch64_register rn,
1375                           struct aarch64_register rm)
1376 {
1377   uint32_t size = ENCODE (rd.is64, 1, 31);
1378
1379   return emit_insn (buf, opcode | size | ENCODE (rm.num, 5, 16)
1380                     | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0));
1381 }
1382
1383 /* Helper function for data processing instructions taking either a register
1384    or an immediate.  */
1385
1386 static int
1387 emit_data_processing (uint32_t *buf, enum aarch64_opcodes opcode,
1388                       struct aarch64_register rd,
1389                       struct aarch64_register rn,
1390                       struct aarch64_operand operand)
1391 {
1392   uint32_t size = ENCODE (rd.is64, 1, 31);
1393   /* The opcode is different for register and immediate source operands.  */
1394   uint32_t operand_opcode;
1395
1396   if (operand.type == OPERAND_IMMEDIATE)
1397     {
1398       /* xxx1 000x xxxx xxxx xxxx xxxx xxxx xxxx */
1399       operand_opcode = ENCODE (8, 4, 25);
1400
1401       return emit_insn (buf, opcode | operand_opcode | size
1402                         | ENCODE (operand.imm, 12, 10)
1403                         | ENCODE (rn.num, 5, 5) | ENCODE (rd.num, 5, 0));
1404     }
1405   else
1406     {
1407       /* xxx0 101x xxxx xxxx xxxx xxxx xxxx xxxx */
1408       operand_opcode = ENCODE (5, 4, 25);
1409
1410       return emit_data_processing_reg (buf, opcode | operand_opcode, rd,
1411                                        rn, operand.reg);
1412     }
1413 }
1414
1415 /* Write an ADD instruction into *BUF.
1416
1417      ADD rd, rn, #imm
1418      ADD rd, rn, rm
1419
1420    This function handles both an immediate and register add.
1421
1422    RD is the destination register.
1423    RN is the input register.
1424    OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
1425    OPERAND_REGISTER.  */
1426
1427 static int
1428 emit_add (uint32_t *buf, struct aarch64_register rd,
1429           struct aarch64_register rn, struct aarch64_operand operand)
1430 {
1431   return emit_data_processing (buf, ADD, rd, rn, operand);
1432 }
1433
1434 /* Write a SUB instruction into *BUF.
1435
1436      SUB rd, rn, #imm
1437      SUB rd, rn, rm
1438
1439    This function handles both an immediate and register sub.
1440
1441    RD is the destination register.
1442    RN is the input register.
1443    IMM is the immediate to substract to RN.  */
1444
1445 static int
1446 emit_sub (uint32_t *buf, struct aarch64_register rd,
1447           struct aarch64_register rn, struct aarch64_operand operand)
1448 {
1449   return emit_data_processing (buf, SUB, rd, rn, operand);
1450 }
1451
1452 /* Write a MOV instruction into *BUF.
1453
1454      MOV rd, #imm
1455      MOV rd, rm
1456
1457    This function handles both a wide immediate move and a register move,
1458    with the condition that the source register is not xzr.  xzr and the
1459    stack pointer share the same encoding and this function only supports
1460    the stack pointer.
1461
1462    RD is the destination register.
1463    OPERAND is the source operand, either of type OPERAND_IMMEDIATE or
1464    OPERAND_REGISTER.  */
1465
1466 static int
1467 emit_mov (uint32_t *buf, struct aarch64_register rd,
1468           struct aarch64_operand operand)
1469 {
1470   if (operand.type == OPERAND_IMMEDIATE)
1471     {
1472       uint32_t size = ENCODE (rd.is64, 1, 31);
1473       /* Do not shift the immediate.  */
1474       uint32_t shift = ENCODE (0, 2, 21);
1475
1476       return emit_insn (buf, MOV | size | shift
1477                         | ENCODE (operand.imm, 16, 5)
1478                         | ENCODE (rd.num, 5, 0));
1479     }
1480   else
1481     return emit_add (buf, rd, operand.reg, immediate_operand (0));
1482 }
1483
1484 /* Write a MOVK instruction into *BUF.
1485
1486      MOVK rd, #imm, lsl #shift
1487
1488    RD is the destination register.
1489    IMM is the immediate.
1490    SHIFT is the logical shift left to apply to IMM.   */
1491
1492 static int
1493 emit_movk (uint32_t *buf, struct aarch64_register rd, uint32_t imm, unsigned shift)
1494 {
1495   uint32_t size = ENCODE (rd.is64, 1, 31);
1496
1497   return emit_insn (buf, MOVK | size | ENCODE (shift, 2, 21) |
1498                     ENCODE (imm, 16, 5) | ENCODE (rd.num, 5, 0));
1499 }
1500
1501 /* Write instructions into *BUF in order to move ADDR into a register.
1502    ADDR can be a 64-bit value.
1503
1504    This function will emit a series of MOV and MOVK instructions, such as:
1505
1506      MOV  xd, #(addr)
1507      MOVK xd, #(addr >> 16), lsl #16
1508      MOVK xd, #(addr >> 32), lsl #32
1509      MOVK xd, #(addr >> 48), lsl #48  */
1510
1511 static int
1512 emit_mov_addr (uint32_t *buf, struct aarch64_register rd, CORE_ADDR addr)
1513 {
1514   uint32_t *p = buf;
1515
1516   /* The MOV (wide immediate) instruction clears to top bits of the
1517      register.  */
1518   p += emit_mov (p, rd, immediate_operand (addr & 0xffff));
1519
1520   if ((addr >> 16) != 0)
1521     p += emit_movk (p, rd, (addr >> 16) & 0xffff, 1);
1522   else
1523     return p - buf;
1524
1525   if ((addr >> 32) != 0)
1526     p += emit_movk (p, rd, (addr >> 32) & 0xffff, 2);
1527   else
1528     return p - buf;
1529
1530   if ((addr >> 48) != 0)
1531     p += emit_movk (p, rd, (addr >> 48) & 0xffff, 3);
1532
1533   return p - buf;
1534 }
1535
1536 /* Write a SUBS instruction into *BUF.
1537
1538      SUBS rd, rn, rm
1539
1540    This instruction update the condition flags.
1541
1542    RD is the destination register.
1543    RN and RM are the source registers.  */
1544
1545 static int
1546 emit_subs (uint32_t *buf, struct aarch64_register rd,
1547            struct aarch64_register rn, struct aarch64_operand operand)
1548 {
1549   return emit_data_processing (buf, SUBS, rd, rn, operand);
1550 }
1551
1552 /* Write a CMP instruction into *BUF.
1553
1554      CMP rn, rm
1555
1556    This instruction is an alias of SUBS xzr, rn, rm.
1557
1558    RN and RM are the registers to compare.  */
1559
1560 static int
1561 emit_cmp (uint32_t *buf, struct aarch64_register rn,
1562               struct aarch64_operand operand)
1563 {
1564   return emit_subs (buf, xzr, rn, operand);
1565 }
1566
1567 /* Write a AND instruction into *BUF.
1568
1569      AND rd, rn, rm
1570
1571    RD is the destination register.
1572    RN and RM are the source registers.  */
1573
1574 static int
1575 emit_and (uint32_t *buf, struct aarch64_register rd,
1576           struct aarch64_register rn, struct aarch64_register rm)
1577 {
1578   return emit_data_processing_reg (buf, AND, rd, rn, rm);
1579 }
1580
1581 /* Write a ORR instruction into *BUF.
1582
1583      ORR rd, rn, rm
1584
1585    RD is the destination register.
1586    RN and RM are the source registers.  */
1587
1588 static int
1589 emit_orr (uint32_t *buf, struct aarch64_register rd,
1590           struct aarch64_register rn, struct aarch64_register rm)
1591 {
1592   return emit_data_processing_reg (buf, ORR, rd, rn, rm);
1593 }
1594
1595 /* Write a ORN instruction into *BUF.
1596
1597      ORN rd, rn, rm
1598
1599    RD is the destination register.
1600    RN and RM are the source registers.  */
1601
1602 static int
1603 emit_orn (uint32_t *buf, struct aarch64_register rd,
1604           struct aarch64_register rn, struct aarch64_register rm)
1605 {
1606   return emit_data_processing_reg (buf, ORN, rd, rn, rm);
1607 }
1608
1609 /* Write a EOR instruction into *BUF.
1610
1611      EOR rd, rn, rm
1612
1613    RD is the destination register.
1614    RN and RM are the source registers.  */
1615
1616 static int
1617 emit_eor (uint32_t *buf, struct aarch64_register rd,
1618           struct aarch64_register rn, struct aarch64_register rm)
1619 {
1620   return emit_data_processing_reg (buf, EOR, rd, rn, rm);
1621 }
1622
1623 /* Write a MVN instruction into *BUF.
1624
1625      MVN rd, rm
1626
1627    This is an alias for ORN rd, xzr, rm.
1628
1629    RD is the destination register.
1630    RM is the source register.  */
1631
1632 static int
1633 emit_mvn (uint32_t *buf, struct aarch64_register rd,
1634           struct aarch64_register rm)
1635 {
1636   return emit_orn (buf, rd, xzr, rm);
1637 }
1638
1639 /* Write a LSLV instruction into *BUF.
1640
1641      LSLV rd, rn, rm
1642
1643    RD is the destination register.
1644    RN and RM are the source registers.  */
1645
1646 static int
1647 emit_lslv (uint32_t *buf, struct aarch64_register rd,
1648            struct aarch64_register rn, struct aarch64_register rm)
1649 {
1650   return emit_data_processing_reg (buf, LSLV, rd, rn, rm);
1651 }
1652
1653 /* Write a LSRV instruction into *BUF.
1654
1655      LSRV rd, rn, rm
1656
1657    RD is the destination register.
1658    RN and RM are the source registers.  */
1659
1660 static int
1661 emit_lsrv (uint32_t *buf, struct aarch64_register rd,
1662            struct aarch64_register rn, struct aarch64_register rm)
1663 {
1664   return emit_data_processing_reg (buf, LSRV, rd, rn, rm);
1665 }
1666
1667 /* Write a ASRV instruction into *BUF.
1668
1669      ASRV rd, rn, rm
1670
1671    RD is the destination register.
1672    RN and RM are the source registers.  */
1673
1674 static int
1675 emit_asrv (uint32_t *buf, struct aarch64_register rd,
1676            struct aarch64_register rn, struct aarch64_register rm)
1677 {
1678   return emit_data_processing_reg (buf, ASRV, rd, rn, rm);
1679 }
1680
1681 /* Write a MUL instruction into *BUF.
1682
1683      MUL rd, rn, rm
1684
1685    RD is the destination register.
1686    RN and RM are the source registers.  */
1687
1688 static int
1689 emit_mul (uint32_t *buf, struct aarch64_register rd,
1690           struct aarch64_register rn, struct aarch64_register rm)
1691 {
1692   return emit_data_processing_reg (buf, MUL, rd, rn, rm);
1693 }
1694
1695 /* Write a MRS instruction into *BUF.  The register size is 64-bit.
1696
1697      MRS xt, system_reg
1698
1699    RT is the destination register.
1700    SYSTEM_REG is special purpose register to read.  */
1701
1702 static int
1703 emit_mrs (uint32_t *buf, struct aarch64_register rt,
1704           enum aarch64_system_control_registers system_reg)
1705 {
1706   return emit_insn (buf, MRS | ENCODE (system_reg, 15, 5)
1707                     | ENCODE (rt.num, 5, 0));
1708 }
1709
1710 /* Write a MSR instruction into *BUF.  The register size is 64-bit.
1711
1712      MSR system_reg, xt
1713
1714    SYSTEM_REG is special purpose register to write.
1715    RT is the input register.  */
1716
1717 static int
1718 emit_msr (uint32_t *buf, enum aarch64_system_control_registers system_reg,
1719           struct aarch64_register rt)
1720 {
1721   return emit_insn (buf, MSR | ENCODE (system_reg, 15, 5)
1722                     | ENCODE (rt.num, 5, 0));
1723 }
1724
1725 /* Write a SEVL instruction into *BUF.
1726
1727    This is a hint instruction telling the hardware to trigger an event.  */
1728
1729 static int
1730 emit_sevl (uint32_t *buf)
1731 {
1732   return emit_insn (buf, SEVL);
1733 }
1734
1735 /* Write a WFE instruction into *BUF.
1736
1737    This is a hint instruction telling the hardware to wait for an event.  */
1738
1739 static int
1740 emit_wfe (uint32_t *buf)
1741 {
1742   return emit_insn (buf, WFE);
1743 }
1744
1745 /* Write a SBFM instruction into *BUF.
1746
1747      SBFM rd, rn, #immr, #imms
1748
1749    This instruction moves the bits from #immr to #imms into the
1750    destination, sign extending the result.
1751
1752    RD is the destination register.
1753    RN is the source register.
1754    IMMR is the bit number to start at (least significant bit).
1755    IMMS is the bit number to stop at (most significant bit).  */
1756
1757 static int
1758 emit_sbfm (uint32_t *buf, struct aarch64_register rd,
1759            struct aarch64_register rn, uint32_t immr, uint32_t imms)
1760 {
1761   uint32_t size = ENCODE (rd.is64, 1, 31);
1762   uint32_t n = ENCODE (rd.is64, 1, 22);
1763
1764   return emit_insn (buf, SBFM | size | n | ENCODE (immr, 6, 16)
1765                     | ENCODE (imms, 6, 10) | ENCODE (rn.num, 5, 5)
1766                     | ENCODE (rd.num, 5, 0));
1767 }
1768
1769 /* Write a SBFX instruction into *BUF.
1770
1771      SBFX rd, rn, #lsb, #width
1772
1773    This instruction moves #width bits from #lsb into the destination, sign
1774    extending the result.  This is an alias for:
1775
1776      SBFM rd, rn, #lsb, #(lsb + width - 1)
1777
1778    RD is the destination register.
1779    RN is the source register.
1780    LSB is the bit number to start at (least significant bit).
1781    WIDTH is the number of bits to move.  */
1782
1783 static int
1784 emit_sbfx (uint32_t *buf, struct aarch64_register rd,
1785            struct aarch64_register rn, uint32_t lsb, uint32_t width)
1786 {
1787   return emit_sbfm (buf, rd, rn, lsb, lsb + width - 1);
1788 }
1789
1790 /* Write a UBFM instruction into *BUF.
1791
1792      UBFM rd, rn, #immr, #imms
1793
1794    This instruction moves the bits from #immr to #imms into the
1795    destination, extending the result with zeros.
1796
1797    RD is the destination register.
1798    RN is the source register.
1799    IMMR is the bit number to start at (least significant bit).
1800    IMMS is the bit number to stop at (most significant bit).  */
1801
1802 static int
1803 emit_ubfm (uint32_t *buf, struct aarch64_register rd,
1804            struct aarch64_register rn, uint32_t immr, uint32_t imms)
1805 {
1806   uint32_t size = ENCODE (rd.is64, 1, 31);
1807   uint32_t n = ENCODE (rd.is64, 1, 22);
1808
1809   return emit_insn (buf, UBFM | size | n | ENCODE (immr, 6, 16)
1810                     | ENCODE (imms, 6, 10) | ENCODE (rn.num, 5, 5)
1811                     | ENCODE (rd.num, 5, 0));
1812 }
1813
1814 /* Write a UBFX instruction into *BUF.
1815
1816      UBFX rd, rn, #lsb, #width
1817
1818    This instruction moves #width bits from #lsb into the destination,
1819    extending the result with zeros.  This is an alias for:
1820
1821      UBFM rd, rn, #lsb, #(lsb + width - 1)
1822
1823    RD is the destination register.
1824    RN is the source register.
1825    LSB is the bit number to start at (least significant bit).
1826    WIDTH is the number of bits to move.  */
1827
1828 static int
1829 emit_ubfx (uint32_t *buf, struct aarch64_register rd,
1830            struct aarch64_register rn, uint32_t lsb, uint32_t width)
1831 {
1832   return emit_ubfm (buf, rd, rn, lsb, lsb + width - 1);
1833 }
1834
1835 /* Write a CSINC instruction into *BUF.
1836
1837      CSINC rd, rn, rm, cond
1838
1839    This instruction conditionally increments rn or rm and places the result
1840    in rd.  rn is chosen is the condition is true.
1841
1842    RD is the destination register.
1843    RN and RM are the source registers.
1844    COND is the encoded condition.  */
1845
1846 static int
1847 emit_csinc (uint32_t *buf, struct aarch64_register rd,
1848             struct aarch64_register rn, struct aarch64_register rm,
1849             unsigned cond)
1850 {
1851   uint32_t size = ENCODE (rd.is64, 1, 31);
1852
1853   return emit_insn (buf, CSINC | size | ENCODE (rm.num, 5, 16)
1854                     | ENCODE (cond, 4, 12) | ENCODE (rn.num, 5, 5)
1855                     | ENCODE (rd.num, 5, 0));
1856 }
1857
1858 /* Write a CSET instruction into *BUF.
1859
1860      CSET rd, cond
1861
1862    This instruction conditionally write 1 or 0 in the destination register.
1863    1 is written if the condition is true.  This is an alias for:
1864
1865      CSINC rd, xzr, xzr, !cond
1866
1867    Note that the condition needs to be inverted.
1868
1869    RD is the destination register.
1870    RN and RM are the source registers.
1871    COND is the encoded condition.  */
1872
1873 static int
1874 emit_cset (uint32_t *buf, struct aarch64_register rd, unsigned cond)
1875 {
1876   /* The least significant bit of the condition needs toggling in order to
1877      invert it.  */
1878   return emit_csinc (buf, rd, xzr, xzr, cond ^ 0x1);
1879 }
1880
1881 /* Write a NOP instruction into *BUF.  */
1882
1883 static int
1884 emit_nop (uint32_t *buf)
1885 {
1886   return emit_insn (buf, NOP);
1887 }
1888
1889 /* Write LEN instructions from BUF into the inferior memory at *TO.
1890
1891    Note instructions are always little endian on AArch64, unlike data.  */
1892
1893 static void
1894 append_insns (CORE_ADDR *to, size_t len, const uint32_t *buf)
1895 {
1896   size_t byte_len = len * sizeof (uint32_t);
1897 #if (__BYTE_ORDER == __BIG_ENDIAN)
1898   uint32_t *le_buf = xmalloc (byte_len);
1899   size_t i;
1900
1901   for (i = 0; i < len; i++)
1902     le_buf[i] = htole32 (buf[i]);
1903
1904   write_inferior_memory (*to, (const unsigned char *) le_buf, byte_len);
1905
1906   xfree (le_buf);
1907 #else
1908   write_inferior_memory (*to, (const unsigned char *) buf, byte_len);
1909 #endif
1910
1911   *to += byte_len;
1912 }
1913
1914 /* Helper function.  Return 1 if VAL can be encoded in BITS bits.  */
1915
1916 static int
1917 can_encode_int32 (int32_t val, unsigned bits)
1918 {
1919   /* This must be an arithemic shift.  */
1920   int32_t rest = val >> bits;
1921
1922   return rest == 0 || rest == -1;
1923 }
1924
1925 /* Relocate an instruction from OLDLOC to *TO.  This function will also
1926    increment TO by the number of bytes the new instruction(s) take(s).
1927
1928    PC relative instructions need to be handled specifically:
1929
1930    - B/BL
1931    - B.COND
1932    - CBZ/CBNZ
1933    - TBZ/TBNZ
1934    - ADR/ADRP
1935    - LDR/LDRSW (literal)  */
1936
1937 static void
1938 aarch64_relocate_instruction (CORE_ADDR *to, CORE_ADDR oldloc)
1939 {
1940   uint32_t buf[32];
1941   uint32_t *p = buf;
1942   uint32_t insn;
1943
1944   int is_bl;
1945   int is64;
1946   int is_sw;
1947   int is_cbnz;
1948   int is_tbnz;
1949   int is_adrp;
1950   unsigned rn;
1951   unsigned rt;
1952   unsigned rd;
1953   unsigned cond;
1954   unsigned bit;
1955   int32_t offset;
1956
1957   target_read_uint32 (oldloc, &insn);
1958
1959   if (aarch64_decode_b (oldloc, insn, &is_bl, &offset))
1960     {
1961       offset = (oldloc - *to + offset);
1962
1963       if (can_encode_int32 (offset, 28))
1964         p += emit_b (p, is_bl, offset);
1965       else
1966         return;
1967     }
1968   else if (aarch64_decode_bcond (oldloc, insn, &cond, &offset))
1969     {
1970       offset = (oldloc - *to + offset);
1971
1972       if (can_encode_int32 (offset, 21))
1973         p += emit_bcond (p, cond, offset);
1974       else if (can_encode_int32 (offset, 28))
1975         {
1976           /* The offset is out of range for a conditional branch
1977              instruction but not for a unconditional branch.  We can use
1978              the following instructions instead:
1979
1980                B.COND TAKEN    ; If cond is true, then jump to TAKEN.
1981                B NOT_TAKEN     ; Else jump over TAKEN and continue.
1982              TAKEN:
1983                B #(offset - 8)
1984              NOT_TAKEN:
1985
1986              */
1987
1988           p += emit_bcond (p, cond, 8);
1989           p += emit_b (p, 0, 8);
1990           p += emit_b (p, 0, offset - 8);
1991         }
1992       else
1993         return;
1994     }
1995   else if (aarch64_decode_cb (oldloc, insn, &is64, &is_cbnz, &rn, &offset))
1996     {
1997       offset = (oldloc - *to + offset);
1998
1999       if (can_encode_int32 (offset, 21))
2000         p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), offset);
2001       else if (can_encode_int32 (offset, 28))
2002         {
2003           /* The offset is out of range for a compare and branch
2004              instruction but not for a unconditional branch.  We can use
2005              the following instructions instead:
2006
2007                CBZ xn, TAKEN   ; xn == 0, then jump to TAKEN.
2008                B NOT_TAKEN     ; Else jump over TAKEN and continue.
2009              TAKEN:
2010                B #(offset - 8)
2011              NOT_TAKEN:
2012
2013              */
2014           p += emit_cb (p, is_cbnz, aarch64_register (rn, is64), 8);
2015           p += emit_b (p, 0, 8);
2016           p += emit_b (p, 0, offset - 8);
2017         }
2018       else
2019         return;
2020     }
2021   else if (aarch64_decode_tb (oldloc, insn, &is_tbnz, &bit, &rt, &offset))
2022     {
2023       offset = (oldloc - *to + offset);
2024
2025       if (can_encode_int32 (offset, 16))
2026         p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), offset);
2027       else if (can_encode_int32 (offset, 28))
2028         {
2029           /* The offset is out of range for a test bit and branch
2030              instruction but not for a unconditional branch.  We can use
2031              the following instructions instead:
2032
2033                TBZ xn, #bit, TAKEN ; xn[bit] == 0, then jump to TAKEN.
2034                B NOT_TAKEN         ; Else jump over TAKEN and continue.
2035              TAKEN:
2036                B #(offset - 8)
2037              NOT_TAKEN:
2038
2039              */
2040           p += emit_tb (p, is_tbnz, bit, aarch64_register (rt, 1), 8);
2041           p += emit_b (p, 0, 8);
2042           p += emit_b (p, 0, offset - 8);
2043         }
2044       else
2045         return;
2046     }
2047   else if (aarch64_decode_adr (oldloc, insn, &is_adrp, &rd, &offset))
2048     {
2049
2050       /* We know exactly the address the ADR{P,} instruction will compute.
2051          We can just write it to the destination register.  */
2052       CORE_ADDR address = oldloc + offset;
2053
2054       if (is_adrp)
2055         {
2056           /* Clear the lower 12 bits of the offset to get the 4K page.  */
2057           p += emit_mov_addr (p, aarch64_register (rd, 1),
2058                               address & ~0xfff);
2059         }
2060       else
2061         p += emit_mov_addr (p, aarch64_register (rd, 1), address);
2062     }
2063   else if (aarch64_decode_ldr_literal (oldloc, insn, &is_sw, &is64, &rt,
2064                                        &offset))
2065     {
2066       /* We know exactly what address to load from, and what register we
2067          can use:
2068
2069            MOV xd, #(oldloc + offset)
2070            MOVK xd, #((oldloc + offset) >> 16), lsl #16
2071            ...
2072
2073            LDR xd, [xd] ; or LDRSW xd, [xd]
2074
2075          */
2076       CORE_ADDR address = oldloc + offset;
2077
2078       p += emit_mov_addr (p, aarch64_register (rt, 1), address);
2079
2080       if (is_sw)
2081         p += emit_ldrsw (p, aarch64_register (rt, 1),
2082                          aarch64_register (rt, 1),
2083                          offset_memory_operand (0));
2084       else
2085         p += emit_ldr (p, aarch64_register (rt, is64),
2086                        aarch64_register (rt, 1),
2087                        offset_memory_operand (0));
2088     }
2089   else
2090     {
2091       /* The instruction is not PC relative.  Just re-emit it at the new
2092          location.  */
2093       p += emit_insn (p, insn);
2094     }
2095
2096   append_insns (to, p - buf, buf);
2097 }
2098
2099 /* Implementation of linux_target_ops method
2100    "install_fast_tracepoint_jump_pad".  */
2101
2102 static int
2103 aarch64_install_fast_tracepoint_jump_pad (CORE_ADDR tpoint,
2104                                           CORE_ADDR tpaddr,
2105                                           CORE_ADDR collector,
2106                                           CORE_ADDR lockaddr,
2107                                           ULONGEST orig_size,
2108                                           CORE_ADDR *jump_entry,
2109                                           CORE_ADDR *trampoline,
2110                                           ULONGEST *trampoline_size,
2111                                           unsigned char *jjump_pad_insn,
2112                                           ULONGEST *jjump_pad_insn_size,
2113                                           CORE_ADDR *adjusted_insn_addr,
2114                                           CORE_ADDR *adjusted_insn_addr_end,
2115                                           char *err)
2116 {
2117   uint32_t buf[256];
2118   uint32_t *p = buf;
2119   int32_t offset;
2120   int i;
2121   CORE_ADDR buildaddr = *jump_entry;
2122
2123   /* We need to save the current state on the stack both to restore it
2124      later and to collect register values when the tracepoint is hit.
2125
2126      The saved registers are pushed in a layout that needs to be in sync
2127      with aarch64_ft_collect_regmap (see linux-aarch64-ipa.c).  Later on
2128      the supply_fast_tracepoint_registers function will fill in the
2129      register cache from a pointer to saved registers on the stack we build
2130      here.
2131
2132      For simplicity, we set the size of each cell on the stack to 16 bytes.
2133      This way one cell can hold any register type, from system registers
2134      to the 128 bit SIMD&FP registers.  Furthermore, the stack pointer
2135      has to be 16 bytes aligned anyway.
2136
2137      Note that the CPSR register does not exist on AArch64.  Instead we
2138      can access system bits describing the process state with the
2139      MRS/MSR instructions, namely the condition flags.  We save them as
2140      if they are part of a CPSR register because that's how GDB
2141      interprets these system bits.  At the moment, only the condition
2142      flags are saved in CPSR (NZCV).
2143
2144      Stack layout, each cell is 16 bytes (descending):
2145
2146      High *-------- SIMD&FP registers from 31 down to 0. --------*
2147           | q31                                                  |
2148           .                                                      .
2149           .                                                      . 32 cells
2150           .                                                      .
2151           | q0                                                   |
2152           *---- General purpose registers from 30 down to 0. ----*
2153           | x30                                                  |
2154           .                                                      .
2155           .                                                      . 31 cells
2156           .                                                      .
2157           | x0                                                   |
2158           *------------- Special purpose registers. -------------*
2159           | SP                                                   |
2160           | PC                                                   |
2161           | CPSR (NZCV)                                          | 5 cells
2162           | FPSR                                                 |
2163           | FPCR                                                 | <- SP + 16
2164           *------------- collecting_t object --------------------*
2165           | TPIDR_EL0               | struct tracepoint *        |
2166      Low  *------------------------------------------------------*
2167
2168      After this stack is set up, we issue a call to the collector, passing
2169      it the saved registers at (SP + 16).  */
2170
2171   /* Push SIMD&FP registers on the stack:
2172
2173        SUB sp, sp, #(32 * 16)
2174
2175        STP q30, q31, [sp, #(30 * 16)]
2176        ...
2177        STP q0, q1, [sp]
2178
2179      */
2180   p += emit_sub (p, sp, sp, immediate_operand (32 * 16));
2181   for (i = 30; i >= 0; i -= 2)
2182     p += emit_stp_q_offset (p, i, i + 1, sp, i * 16);
2183
2184   /* Push general puspose registers on the stack.  Note that we do not need
2185      to push x31 as it represents the xzr register and not the stack
2186      pointer in a STR instruction.
2187
2188        SUB sp, sp, #(31 * 16)
2189
2190        STR x30, [sp, #(30 * 16)]
2191        ...
2192        STR x0, [sp]
2193
2194      */
2195   p += emit_sub (p, sp, sp, immediate_operand (31 * 16));
2196   for (i = 30; i >= 0; i -= 1)
2197     p += emit_str (p, aarch64_register (i, 1), sp,
2198                    offset_memory_operand (i * 16));
2199
2200   /* Make space for 5 more cells.
2201
2202        SUB sp, sp, #(5 * 16)
2203
2204      */
2205   p += emit_sub (p, sp, sp, immediate_operand (5 * 16));
2206
2207
2208   /* Save SP:
2209
2210        ADD x4, sp, #((32 + 31 + 5) * 16)
2211        STR x4, [sp, #(4 * 16)]
2212
2213      */
2214   p += emit_add (p, x4, sp, immediate_operand ((32 + 31 + 5) * 16));
2215   p += emit_str (p, x4, sp, offset_memory_operand (4 * 16));
2216
2217   /* Save PC (tracepoint address):
2218
2219        MOV  x3, #(tpaddr)
2220        ...
2221
2222        STR x3, [sp, #(3 * 16)]
2223
2224      */
2225
2226   p += emit_mov_addr (p, x3, tpaddr);
2227   p += emit_str (p, x3, sp, offset_memory_operand (3 * 16));
2228
2229   /* Save CPSR (NZCV), FPSR and FPCR:
2230
2231        MRS x2, nzcv
2232        MRS x1, fpsr
2233        MRS x0, fpcr
2234
2235        STR x2, [sp, #(2 * 16)]
2236        STR x1, [sp, #(1 * 16)]
2237        STR x0, [sp, #(0 * 16)]
2238
2239      */
2240   p += emit_mrs (p, x2, NZCV);
2241   p += emit_mrs (p, x1, FPSR);
2242   p += emit_mrs (p, x0, FPCR);
2243   p += emit_str (p, x2, sp, offset_memory_operand (2 * 16));
2244   p += emit_str (p, x1, sp, offset_memory_operand (1 * 16));
2245   p += emit_str (p, x0, sp, offset_memory_operand (0 * 16));
2246
2247   /* Push the collecting_t object.  It consist of the address of the
2248      tracepoint and an ID for the current thread.  We get the latter by
2249      reading the tpidr_el0 system register.  It corresponds to the
2250      NT_ARM_TLS register accessible with ptrace.
2251
2252        MOV x0, #(tpoint)
2253        ...
2254
2255        MRS x1, tpidr_el0
2256
2257        STP x0, x1, [sp, #-16]!
2258
2259      */
2260
2261   p += emit_mov_addr (p, x0, tpoint);
2262   p += emit_mrs (p, x1, TPIDR_EL0);
2263   p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-16));
2264
2265   /* Spin-lock:
2266
2267      The shared memory for the lock is at lockaddr.  It will hold zero
2268      if no-one is holding the lock, otherwise it contains the address of
2269      the collecting_t object on the stack of the thread which acquired it.
2270
2271      At this stage, the stack pointer points to this thread's collecting_t
2272      object.
2273
2274      We use the following registers:
2275      - x0: Address of the lock.
2276      - x1: Pointer to collecting_t object.
2277      - x2: Scratch register.
2278
2279        MOV x0, #(lockaddr)
2280        ...
2281        MOV x1, sp
2282
2283        ; Trigger an event local to this core.  So the following WFE
2284        ; instruction is ignored.
2285        SEVL
2286      again:
2287        ; Wait for an event.  The event is triggered by either the SEVL
2288        ; or STLR instructions (store release).
2289        WFE
2290
2291        ; Atomically read at lockaddr.  This marks the memory location as
2292        ; exclusive.  This instruction also has memory constraints which
2293        ; make sure all previous data reads and writes are done before
2294        ; executing it.
2295        LDAXR x2, [x0]
2296
2297        ; Try again if another thread holds the lock.
2298        CBNZ x2, again
2299
2300        ; We can lock it!  Write the address of the collecting_t object.
2301        ; This instruction will fail if the memory location is not marked
2302        ; as exclusive anymore.  If it succeeds, it will remove the
2303        ; exclusive mark on the memory location.  This way, if another
2304        ; thread executes this instruction before us, we will fail and try
2305        ; all over again.
2306        STXR w2, x1, [x0]
2307        CBNZ w2, again
2308
2309      */
2310
2311   p += emit_mov_addr (p, x0, lockaddr);
2312   p += emit_mov (p, x1, register_operand (sp));
2313
2314   p += emit_sevl (p);
2315   p += emit_wfe (p);
2316   p += emit_ldaxr (p, x2, x0);
2317   p += emit_cb (p, 1, w2, -2 * 4);
2318   p += emit_stxr (p, w2, x1, x0);
2319   p += emit_cb (p, 1, x2, -4 * 4);
2320
2321   /* Call collector (struct tracepoint *, unsigned char *):
2322
2323        MOV x0, #(tpoint)
2324        ...
2325
2326        ; Saved registers start after the collecting_t object.
2327        ADD x1, sp, #16
2328
2329        ; We use an intra-procedure-call scratch register.
2330        MOV ip0, #(collector)
2331        ...
2332
2333        ; And call back to C!
2334        BLR ip0
2335
2336      */
2337
2338   p += emit_mov_addr (p, x0, tpoint);
2339   p += emit_add (p, x1, sp, immediate_operand (16));
2340
2341   p += emit_mov_addr (p, ip0, collector);
2342   p += emit_blr (p, ip0);
2343
2344   /* Release the lock.
2345
2346        MOV x0, #(lockaddr)
2347        ...
2348
2349        ; This instruction is a normal store with memory ordering
2350        ; constraints.  Thanks to this we do not have to put a data
2351        ; barrier instruction to make sure all data read and writes are done
2352        ; before this instruction is executed.  Furthermore, this instrucion
2353        ; will trigger an event, letting other threads know they can grab
2354        ; the lock.
2355        STLR xzr, [x0]
2356
2357      */
2358   p += emit_mov_addr (p, x0, lockaddr);
2359   p += emit_stlr (p, xzr, x0);
2360
2361   /* Free collecting_t object:
2362
2363        ADD sp, sp, #16
2364
2365      */
2366   p += emit_add (p, sp, sp, immediate_operand (16));
2367
2368   /* Restore CPSR (NZCV), FPSR and FPCR.  And free all special purpose
2369      registers from the stack.
2370
2371        LDR x2, [sp, #(2 * 16)]
2372        LDR x1, [sp, #(1 * 16)]
2373        LDR x0, [sp, #(0 * 16)]
2374
2375        MSR NZCV, x2
2376        MSR FPSR, x1
2377        MSR FPCR, x0
2378
2379        ADD sp, sp #(5 * 16)
2380
2381      */
2382   p += emit_ldr (p, x2, sp, offset_memory_operand (2 * 16));
2383   p += emit_ldr (p, x1, sp, offset_memory_operand (1 * 16));
2384   p += emit_ldr (p, x0, sp, offset_memory_operand (0 * 16));
2385   p += emit_msr (p, NZCV, x2);
2386   p += emit_msr (p, FPSR, x1);
2387   p += emit_msr (p, FPCR, x0);
2388
2389   p += emit_add (p, sp, sp, immediate_operand (5 * 16));
2390
2391   /* Pop general purpose registers:
2392
2393        LDR x0, [sp]
2394        ...
2395        LDR x30, [sp, #(30 * 16)]
2396
2397        ADD sp, sp, #(31 * 16)
2398
2399      */
2400   for (i = 0; i <= 30; i += 1)
2401     p += emit_ldr (p, aarch64_register (i, 1), sp,
2402                    offset_memory_operand (i * 16));
2403   p += emit_add (p, sp, sp, immediate_operand (31 * 16));
2404
2405   /* Pop SIMD&FP registers:
2406
2407        LDP q0, q1, [sp]
2408        ...
2409        LDP q30, q31, [sp, #(30 * 16)]
2410
2411        ADD sp, sp, #(32 * 16)
2412
2413      */
2414   for (i = 0; i <= 30; i += 2)
2415     p += emit_ldp_q_offset (p, i, i + 1, sp, i * 16);
2416   p += emit_add (p, sp, sp, immediate_operand (32 * 16));
2417
2418   /* Write the code into the inferior memory.  */
2419   append_insns (&buildaddr, p - buf, buf);
2420
2421   /* Now emit the relocated instruction.  */
2422   *adjusted_insn_addr = buildaddr;
2423   aarch64_relocate_instruction (&buildaddr, tpaddr);
2424   *adjusted_insn_addr_end = buildaddr;
2425
2426   /* We may not have been able to relocate the instruction.  */
2427   if (*adjusted_insn_addr == *adjusted_insn_addr_end)
2428     {
2429       sprintf (err,
2430                "E.Could not relocate instruction from %s to %s.",
2431                core_addr_to_string_nz (tpaddr),
2432                core_addr_to_string_nz (buildaddr));
2433       return 1;
2434     }
2435
2436   /* Go back to the start of the buffer.  */
2437   p = buf;
2438
2439   /* Emit a branch back from the jump pad.  */
2440   offset = (tpaddr + orig_size - buildaddr);
2441   if (!can_encode_int32 (offset, 28))
2442     {
2443       sprintf (err,
2444                "E.Jump back from jump pad too far from tracepoint "
2445                "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).",
2446                offset);
2447       return 1;
2448     }
2449
2450   p += emit_b (p, 0, offset);
2451   append_insns (&buildaddr, p - buf, buf);
2452
2453   /* Give the caller a branch instruction into the jump pad.  */
2454   offset = (*jump_entry - tpaddr);
2455   if (!can_encode_int32 (offset, 28))
2456     {
2457       sprintf (err,
2458                "E.Jump pad too far from tracepoint "
2459                "(offset 0x%" PRIx32 " cannot be encoded in 28 bits).",
2460                offset);
2461       return 1;
2462     }
2463
2464   emit_b ((uint32_t *) jjump_pad_insn, 0, offset);
2465   *jjump_pad_insn_size = 4;
2466
2467   /* Return the end address of our pad.  */
2468   *jump_entry = buildaddr;
2469
2470   return 0;
2471 }
2472
2473 /* Helper function writing LEN instructions from START into
2474    current_insn_ptr.  */
2475
2476 static void
2477 emit_ops_insns (const uint32_t *start, int len)
2478 {
2479   CORE_ADDR buildaddr = current_insn_ptr;
2480
2481   if (debug_threads)
2482     debug_printf ("Adding %d instrucions at %s\n",
2483                   len, paddress (buildaddr));
2484
2485   append_insns (&buildaddr, len, start);
2486   current_insn_ptr = buildaddr;
2487 }
2488
2489 /* Pop a register from the stack.  */
2490
2491 static int
2492 emit_pop (uint32_t *buf, struct aarch64_register rt)
2493 {
2494   return emit_ldr (buf, rt, sp, postindex_memory_operand (1 * 16));
2495 }
2496
2497 /* Push a register on the stack.  */
2498
2499 static int
2500 emit_push (uint32_t *buf, struct aarch64_register rt)
2501 {
2502   return emit_str (buf, rt, sp, preindex_memory_operand (-1 * 16));
2503 }
2504
2505 /* Implementation of emit_ops method "emit_prologue".  */
2506
2507 static void
2508 aarch64_emit_prologue (void)
2509 {
2510   uint32_t buf[16];
2511   uint32_t *p = buf;
2512
2513   /* This function emit a prologue for the following function prototype:
2514
2515      enum eval_result_type f (unsigned char *regs,
2516                               ULONGEST *value);
2517
2518      The first argument is a buffer of raw registers.  The second
2519      argument is the result of
2520      evaluating the expression, which will be set to whatever is on top of
2521      the stack at the end.
2522
2523      The stack set up by the prologue is as such:
2524
2525      High *------------------------------------------------------*
2526           | LR                                                   |
2527           | FP                                                   | <- FP
2528           | x1  (ULONGEST *value)                                |
2529           | x0  (unsigned char *regs)                            |
2530      Low  *------------------------------------------------------*
2531
2532      As we are implementing a stack machine, each opcode can expand the
2533      stack so we never know how far we are from the data saved by this
2534      prologue.  In order to be able refer to value and regs later, we save
2535      the current stack pointer in the frame pointer.  This way, it is not
2536      clobbered when calling C functions.
2537
2538      Finally, throughtout every operation, we are using register x0 as the
2539      top of the stack, and x1 as a scratch register.  */
2540
2541   p += emit_stp (p, x0, x1, sp, preindex_memory_operand (-2 * 16));
2542   p += emit_str (p, lr, sp, offset_memory_operand (3 * 8));
2543   p += emit_str (p, fp, sp, offset_memory_operand (2 * 8));
2544
2545   p += emit_add (p, fp, sp, immediate_operand (2 * 8));
2546
2547
2548   emit_ops_insns (buf, p - buf);
2549 }
2550
2551 /* Implementation of emit_ops method "emit_epilogue".  */
2552
2553 static void
2554 aarch64_emit_epilogue (void)
2555 {
2556   uint32_t buf[16];
2557   uint32_t *p = buf;
2558
2559   /* Store the result of the expression (x0) in *value.  */
2560   p += emit_sub (p, x1, fp, immediate_operand (1 * 8));
2561   p += emit_ldr (p, x1, x1, offset_memory_operand (0));
2562   p += emit_str (p, x0, x1, offset_memory_operand (0));
2563
2564   /* Restore the previous state.  */
2565   p += emit_add (p, sp, fp, immediate_operand (2 * 8));
2566   p += emit_ldp (p, fp, lr, fp, offset_memory_operand (0));
2567
2568   /* Return expr_eval_no_error.  */
2569   p += emit_mov (p, x0, immediate_operand (expr_eval_no_error));
2570   p += emit_ret (p, lr);
2571
2572   emit_ops_insns (buf, p - buf);
2573 }
2574
2575 /* Implementation of emit_ops method "emit_add".  */
2576
2577 static void
2578 aarch64_emit_add (void)
2579 {
2580   uint32_t buf[16];
2581   uint32_t *p = buf;
2582
2583   p += emit_pop (p, x1);
2584   p += emit_add (p, x0, x0, register_operand (x1));
2585
2586   emit_ops_insns (buf, p - buf);
2587 }
2588
2589 /* Implementation of emit_ops method "emit_sub".  */
2590
2591 static void
2592 aarch64_emit_sub (void)
2593 {
2594   uint32_t buf[16];
2595   uint32_t *p = buf;
2596
2597   p += emit_pop (p, x1);
2598   p += emit_sub (p, x0, x0, register_operand (x1));
2599
2600   emit_ops_insns (buf, p - buf);
2601 }
2602
2603 /* Implementation of emit_ops method "emit_mul".  */
2604
2605 static void
2606 aarch64_emit_mul (void)
2607 {
2608   uint32_t buf[16];
2609   uint32_t *p = buf;
2610
2611   p += emit_pop (p, x1);
2612   p += emit_mul (p, x0, x1, x0);
2613
2614   emit_ops_insns (buf, p - buf);
2615 }
2616
2617 /* Implementation of emit_ops method "emit_lsh".  */
2618
2619 static void
2620 aarch64_emit_lsh (void)
2621 {
2622   uint32_t buf[16];
2623   uint32_t *p = buf;
2624
2625   p += emit_pop (p, x1);
2626   p += emit_lslv (p, x0, x1, x0);
2627
2628   emit_ops_insns (buf, p - buf);
2629 }
2630
2631 /* Implementation of emit_ops method "emit_rsh_signed".  */
2632
2633 static void
2634 aarch64_emit_rsh_signed (void)
2635 {
2636   uint32_t buf[16];
2637   uint32_t *p = buf;
2638
2639   p += emit_pop (p, x1);
2640   p += emit_asrv (p, x0, x1, x0);
2641
2642   emit_ops_insns (buf, p - buf);
2643 }
2644
2645 /* Implementation of emit_ops method "emit_rsh_unsigned".  */
2646
2647 static void
2648 aarch64_emit_rsh_unsigned (void)
2649 {
2650   uint32_t buf[16];
2651   uint32_t *p = buf;
2652
2653   p += emit_pop (p, x1);
2654   p += emit_lsrv (p, x0, x1, x0);
2655
2656   emit_ops_insns (buf, p - buf);
2657 }
2658
2659 /* Implementation of emit_ops method "emit_ext".  */
2660
2661 static void
2662 aarch64_emit_ext (int arg)
2663 {
2664   uint32_t buf[16];
2665   uint32_t *p = buf;
2666
2667   p += emit_sbfx (p, x0, x0, 0, arg);
2668
2669   emit_ops_insns (buf, p - buf);
2670 }
2671
2672 /* Implementation of emit_ops method "emit_log_not".  */
2673
2674 static void
2675 aarch64_emit_log_not (void)
2676 {
2677   uint32_t buf[16];
2678   uint32_t *p = buf;
2679
2680   /* If the top of the stack is 0, replace it with 1.  Else replace it with
2681      0.  */
2682
2683   p += emit_cmp (p, x0, immediate_operand (0));
2684   p += emit_cset (p, x0, EQ);
2685
2686   emit_ops_insns (buf, p - buf);
2687 }
2688
2689 /* Implementation of emit_ops method "emit_bit_and".  */
2690
2691 static void
2692 aarch64_emit_bit_and (void)
2693 {
2694   uint32_t buf[16];
2695   uint32_t *p = buf;
2696
2697   p += emit_pop (p, x1);
2698   p += emit_and (p, x0, x0, x1);
2699
2700   emit_ops_insns (buf, p - buf);
2701 }
2702
2703 /* Implementation of emit_ops method "emit_bit_or".  */
2704
2705 static void
2706 aarch64_emit_bit_or (void)
2707 {
2708   uint32_t buf[16];
2709   uint32_t *p = buf;
2710
2711   p += emit_pop (p, x1);
2712   p += emit_orr (p, x0, x0, x1);
2713
2714   emit_ops_insns (buf, p - buf);
2715 }
2716
2717 /* Implementation of emit_ops method "emit_bit_xor".  */
2718
2719 static void
2720 aarch64_emit_bit_xor (void)
2721 {
2722   uint32_t buf[16];
2723   uint32_t *p = buf;
2724
2725   p += emit_pop (p, x1);
2726   p += emit_eor (p, x0, x0, x1);
2727
2728   emit_ops_insns (buf, p - buf);
2729 }
2730
2731 /* Implementation of emit_ops method "emit_bit_not".  */
2732
2733 static void
2734 aarch64_emit_bit_not (void)
2735 {
2736   uint32_t buf[16];
2737   uint32_t *p = buf;
2738
2739   p += emit_mvn (p, x0, x0);
2740
2741   emit_ops_insns (buf, p - buf);
2742 }
2743
2744 /* Implementation of emit_ops method "emit_equal".  */
2745
2746 static void
2747 aarch64_emit_equal (void)
2748 {
2749   uint32_t buf[16];
2750   uint32_t *p = buf;
2751
2752   p += emit_pop (p, x1);
2753   p += emit_cmp (p, x0, register_operand (x1));
2754   p += emit_cset (p, x0, EQ);
2755
2756   emit_ops_insns (buf, p - buf);
2757 }
2758
2759 /* Implementation of emit_ops method "emit_less_signed".  */
2760
2761 static void
2762 aarch64_emit_less_signed (void)
2763 {
2764   uint32_t buf[16];
2765   uint32_t *p = buf;
2766
2767   p += emit_pop (p, x1);
2768   p += emit_cmp (p, x1, register_operand (x0));
2769   p += emit_cset (p, x0, LT);
2770
2771   emit_ops_insns (buf, p - buf);
2772 }
2773
2774 /* Implementation of emit_ops method "emit_less_unsigned".  */
2775
2776 static void
2777 aarch64_emit_less_unsigned (void)
2778 {
2779   uint32_t buf[16];
2780   uint32_t *p = buf;
2781
2782   p += emit_pop (p, x1);
2783   p += emit_cmp (p, x1, register_operand (x0));
2784   p += emit_cset (p, x0, LO);
2785
2786   emit_ops_insns (buf, p - buf);
2787 }
2788
2789 /* Implementation of emit_ops method "emit_ref".  */
2790
2791 static void
2792 aarch64_emit_ref (int size)
2793 {
2794   uint32_t buf[16];
2795   uint32_t *p = buf;
2796
2797   switch (size)
2798     {
2799     case 1:
2800       p += emit_ldrb (p, w0, x0, offset_memory_operand (0));
2801       break;
2802     case 2:
2803       p += emit_ldrh (p, w0, x0, offset_memory_operand (0));
2804       break;
2805     case 4:
2806       p += emit_ldr (p, w0, x0, offset_memory_operand (0));
2807       break;
2808     case 8:
2809       p += emit_ldr (p, x0, x0, offset_memory_operand (0));
2810       break;
2811     default:
2812       /* Unknown size, bail on compilation.  */
2813       emit_error = 1;
2814       break;
2815     }
2816
2817   emit_ops_insns (buf, p - buf);
2818 }
2819
2820 /* Implementation of emit_ops method "emit_if_goto".  */
2821
2822 static void
2823 aarch64_emit_if_goto (int *offset_p, int *size_p)
2824 {
2825   uint32_t buf[16];
2826   uint32_t *p = buf;
2827
2828   /* The Z flag is set or cleared here.  */
2829   p += emit_cmp (p, x0, immediate_operand (0));
2830   /* This instruction must not change the Z flag.  */
2831   p += emit_pop (p, x0);
2832   /* Branch over the next instruction if x0 == 0.  */
2833   p += emit_bcond (p, EQ, 8);
2834
2835   /* The NOP instruction will be patched with an unconditional branch.  */
2836   if (offset_p)
2837     *offset_p = (p - buf) * 4;
2838   if (size_p)
2839     *size_p = 4;
2840   p += emit_nop (p);
2841
2842   emit_ops_insns (buf, p - buf);
2843 }
2844
2845 /* Implementation of emit_ops method "emit_goto".  */
2846
2847 static void
2848 aarch64_emit_goto (int *offset_p, int *size_p)
2849 {
2850   uint32_t buf[16];
2851   uint32_t *p = buf;
2852
2853   /* The NOP instruction will be patched with an unconditional branch.  */
2854   if (offset_p)
2855     *offset_p = 0;
2856   if (size_p)
2857     *size_p = 4;
2858   p += emit_nop (p);
2859
2860   emit_ops_insns (buf, p - buf);
2861 }
2862
2863 /* Implementation of emit_ops method "write_goto_address".  */
2864
2865 void
2866 aarch64_write_goto_address (CORE_ADDR from, CORE_ADDR to, int size)
2867 {
2868   uint32_t insn;
2869
2870   emit_b (&insn, 0, to - from);
2871   append_insns (&from, 1, &insn);
2872 }
2873
2874 /* Implementation of emit_ops method "emit_const".  */
2875
2876 static void
2877 aarch64_emit_const (LONGEST num)
2878 {
2879   uint32_t buf[16];
2880   uint32_t *p = buf;
2881
2882   p += emit_mov_addr (p, x0, num);
2883
2884   emit_ops_insns (buf, p - buf);
2885 }
2886
2887 /* Implementation of emit_ops method "emit_call".  */
2888
2889 static void
2890 aarch64_emit_call (CORE_ADDR fn)
2891 {
2892   uint32_t buf[16];
2893   uint32_t *p = buf;
2894
2895   p += emit_mov_addr (p, ip0, fn);
2896   p += emit_blr (p, ip0);
2897
2898   emit_ops_insns (buf, p - buf);
2899 }
2900
2901 /* Implementation of emit_ops method "emit_reg".  */
2902
2903 static void
2904 aarch64_emit_reg (int reg)
2905 {
2906   uint32_t buf[16];
2907   uint32_t *p = buf;
2908
2909   /* Set x0 to unsigned char *regs.  */
2910   p += emit_sub (p, x0, fp, immediate_operand (2 * 8));
2911   p += emit_ldr (p, x0, x0, offset_memory_operand (0));
2912   p += emit_mov (p, x1, immediate_operand (reg));
2913
2914   emit_ops_insns (buf, p - buf);
2915
2916   aarch64_emit_call (get_raw_reg_func_addr ());
2917 }
2918
2919 /* Implementation of emit_ops method "emit_pop".  */
2920
2921 static void
2922 aarch64_emit_pop (void)
2923 {
2924   uint32_t buf[16];
2925   uint32_t *p = buf;
2926
2927   p += emit_pop (p, x0);
2928
2929   emit_ops_insns (buf, p - buf);
2930 }
2931
2932 /* Implementation of emit_ops method "emit_stack_flush".  */
2933
2934 static void
2935 aarch64_emit_stack_flush (void)
2936 {
2937   uint32_t buf[16];
2938   uint32_t *p = buf;
2939
2940   p += emit_push (p, x0);
2941
2942   emit_ops_insns (buf, p - buf);
2943 }
2944
2945 /* Implementation of emit_ops method "emit_zero_ext".  */
2946
2947 static void
2948 aarch64_emit_zero_ext (int arg)
2949 {
2950   uint32_t buf[16];
2951   uint32_t *p = buf;
2952
2953   p += emit_ubfx (p, x0, x0, 0, arg);
2954
2955   emit_ops_insns (buf, p - buf);
2956 }
2957
2958 /* Implementation of emit_ops method "emit_swap".  */
2959
2960 static void
2961 aarch64_emit_swap (void)
2962 {
2963   uint32_t buf[16];
2964   uint32_t *p = buf;
2965
2966   p += emit_ldr (p, x1, sp, offset_memory_operand (0 * 16));
2967   p += emit_str (p, x0, sp, offset_memory_operand (0 * 16));
2968   p += emit_mov (p, x0, register_operand (x1));
2969
2970   emit_ops_insns (buf, p - buf);
2971 }
2972
2973 /* Implementation of emit_ops method "emit_stack_adjust".  */
2974
2975 static void
2976 aarch64_emit_stack_adjust (int n)
2977 {
2978   /* This is not needed with our design.  */
2979   uint32_t buf[16];
2980   uint32_t *p = buf;
2981
2982   p += emit_add (p, sp, sp, immediate_operand (n * 16));
2983
2984   emit_ops_insns (buf, p - buf);
2985 }
2986
2987 /* Implementation of emit_ops method "emit_int_call_1".  */
2988
2989 static void
2990 aarch64_emit_int_call_1 (CORE_ADDR fn, int arg1)
2991 {
2992   uint32_t buf[16];
2993   uint32_t *p = buf;
2994
2995   p += emit_mov (p, x0, immediate_operand (arg1));
2996
2997   emit_ops_insns (buf, p - buf);
2998
2999   aarch64_emit_call (fn);
3000 }
3001
3002 /* Implementation of emit_ops method "emit_void_call_2".  */
3003
3004 static void
3005 aarch64_emit_void_call_2 (CORE_ADDR fn, int arg1)
3006 {
3007   uint32_t buf[16];
3008   uint32_t *p = buf;
3009
3010   /* Push x0 on the stack.  */
3011   aarch64_emit_stack_flush ();
3012
3013   /* Setup arguments for the function call:
3014
3015      x0: arg1
3016      x1: top of the stack
3017
3018        MOV x1, x0
3019        MOV x0, #arg1  */
3020
3021   p += emit_mov (p, x1, register_operand (x0));
3022   p += emit_mov (p, x0, immediate_operand (arg1));
3023
3024   emit_ops_insns (buf, p - buf);
3025
3026   aarch64_emit_call (fn);
3027
3028   /* Restore x0.  */
3029   aarch64_emit_pop ();
3030 }
3031
3032 /* Implementation of emit_ops method "emit_eq_goto".  */
3033
3034 static void
3035 aarch64_emit_eq_goto (int *offset_p, int *size_p)
3036 {
3037   uint32_t buf[16];
3038   uint32_t *p = buf;
3039
3040   p += emit_pop (p, x1);
3041   p += emit_cmp (p, x1, register_operand (x0));
3042   /* Branch over the next instruction if x0 != x1.  */
3043   p += emit_bcond (p, NE, 8);
3044   /* The NOP instruction will be patched with an unconditional branch.  */
3045   if (offset_p)
3046     *offset_p = (p - buf) * 4;
3047   if (size_p)
3048     *size_p = 4;
3049   p += emit_nop (p);
3050
3051   emit_ops_insns (buf, p - buf);
3052 }
3053
3054 /* Implementation of emit_ops method "emit_ne_goto".  */
3055
3056 static void
3057 aarch64_emit_ne_goto (int *offset_p, int *size_p)
3058 {
3059   uint32_t buf[16];
3060   uint32_t *p = buf;
3061
3062   p += emit_pop (p, x1);
3063   p += emit_cmp (p, x1, register_operand (x0));
3064   /* Branch over the next instruction if x0 == x1.  */
3065   p += emit_bcond (p, EQ, 8);
3066   /* The NOP instruction will be patched with an unconditional branch.  */
3067   if (offset_p)
3068     *offset_p = (p - buf) * 4;
3069   if (size_p)
3070     *size_p = 4;
3071   p += emit_nop (p);
3072
3073   emit_ops_insns (buf, p - buf);
3074 }
3075
3076 /* Implementation of emit_ops method "emit_lt_goto".  */
3077
3078 static void
3079 aarch64_emit_lt_goto (int *offset_p, int *size_p)
3080 {
3081   uint32_t buf[16];
3082   uint32_t *p = buf;
3083
3084   p += emit_pop (p, x1);
3085   p += emit_cmp (p, x1, register_operand (x0));
3086   /* Branch over the next instruction if x0 >= x1.  */
3087   p += emit_bcond (p, GE, 8);
3088   /* The NOP instruction will be patched with an unconditional branch.  */
3089   if (offset_p)
3090     *offset_p = (p - buf) * 4;
3091   if (size_p)
3092     *size_p = 4;
3093   p += emit_nop (p);
3094
3095   emit_ops_insns (buf, p - buf);
3096 }
3097
3098 /* Implementation of emit_ops method "emit_le_goto".  */
3099
3100 static void
3101 aarch64_emit_le_goto (int *offset_p, int *size_p)
3102 {
3103   uint32_t buf[16];
3104   uint32_t *p = buf;
3105
3106   p += emit_pop (p, x1);
3107   p += emit_cmp (p, x1, register_operand (x0));
3108   /* Branch over the next instruction if x0 > x1.  */
3109   p += emit_bcond (p, GT, 8);
3110   /* The NOP instruction will be patched with an unconditional branch.  */
3111   if (offset_p)
3112     *offset_p = (p - buf) * 4;
3113   if (size_p)
3114     *size_p = 4;
3115   p += emit_nop (p);
3116
3117   emit_ops_insns (buf, p - buf);
3118 }
3119
3120 /* Implementation of emit_ops method "emit_gt_goto".  */
3121
3122 static void
3123 aarch64_emit_gt_goto (int *offset_p, int *size_p)
3124 {
3125   uint32_t buf[16];
3126   uint32_t *p = buf;
3127
3128   p += emit_pop (p, x1);
3129   p += emit_cmp (p, x1, register_operand (x0));
3130   /* Branch over the next instruction if x0 <= x1.  */
3131   p += emit_bcond (p, LE, 8);
3132   /* The NOP instruction will be patched with an unconditional branch.  */
3133   if (offset_p)
3134     *offset_p = (p - buf) * 4;
3135   if (size_p)
3136     *size_p = 4;
3137   p += emit_nop (p);
3138
3139   emit_ops_insns (buf, p - buf);
3140 }
3141
3142 /* Implementation of emit_ops method "emit_ge_got".  */
3143
3144 static void
3145 aarch64_emit_ge_got (int *offset_p, int *size_p)
3146 {
3147   uint32_t buf[16];
3148   uint32_t *p = buf;
3149
3150   p += emit_pop (p, x1);
3151   p += emit_cmp (p, x1, register_operand (x0));
3152   /* Branch over the next instruction if x0 <= x1.  */
3153   p += emit_bcond (p, LT, 8);
3154   /* The NOP instruction will be patched with an unconditional branch.  */
3155   if (offset_p)
3156     *offset_p = (p - buf) * 4;
3157   if (size_p)
3158     *size_p = 4;
3159   p += emit_nop (p);
3160
3161   emit_ops_insns (buf, p - buf);
3162 }
3163
3164 static struct emit_ops aarch64_emit_ops_impl =
3165 {
3166   aarch64_emit_prologue,
3167   aarch64_emit_epilogue,
3168   aarch64_emit_add,
3169   aarch64_emit_sub,
3170   aarch64_emit_mul,
3171   aarch64_emit_lsh,
3172   aarch64_emit_rsh_signed,
3173   aarch64_emit_rsh_unsigned,
3174   aarch64_emit_ext,
3175   aarch64_emit_log_not,
3176   aarch64_emit_bit_and,
3177   aarch64_emit_bit_or,
3178   aarch64_emit_bit_xor,
3179   aarch64_emit_bit_not,
3180   aarch64_emit_equal,
3181   aarch64_emit_less_signed,
3182   aarch64_emit_less_unsigned,
3183   aarch64_emit_ref,
3184   aarch64_emit_if_goto,
3185   aarch64_emit_goto,
3186   aarch64_write_goto_address,
3187   aarch64_emit_const,
3188   aarch64_emit_call,
3189   aarch64_emit_reg,
3190   aarch64_emit_pop,
3191   aarch64_emit_stack_flush,
3192   aarch64_emit_zero_ext,
3193   aarch64_emit_swap,
3194   aarch64_emit_stack_adjust,
3195   aarch64_emit_int_call_1,
3196   aarch64_emit_void_call_2,
3197   aarch64_emit_eq_goto,
3198   aarch64_emit_ne_goto,
3199   aarch64_emit_lt_goto,
3200   aarch64_emit_le_goto,
3201   aarch64_emit_gt_goto,
3202   aarch64_emit_ge_got,
3203 };
3204
3205 /* Implementation of linux_target_ops method "emit_ops".  */
3206
3207 static struct emit_ops *
3208 aarch64_emit_ops (void)
3209 {
3210   return &aarch64_emit_ops_impl;
3211 }
3212
3213 /* Implementation of linux_target_ops method
3214    "get_min_fast_tracepoint_insn_len".  */
3215
3216 static int
3217 aarch64_get_min_fast_tracepoint_insn_len (void)
3218 {
3219   return 4;
3220 }
3221
3222 /* Implementation of linux_target_ops method "supports_range_stepping".  */
3223
3224 static int
3225 aarch64_supports_range_stepping (void)
3226 {
3227   return 1;
3228 }
3229
3230 struct linux_target_ops the_low_target =
3231 {
3232   aarch64_arch_setup,
3233   aarch64_regs_info,
3234   aarch64_cannot_fetch_register,
3235   aarch64_cannot_store_register,
3236   NULL, /* fetch_register */
3237   aarch64_get_pc,
3238   aarch64_set_pc,
3239   (const unsigned char *) &aarch64_breakpoint,
3240   aarch64_breakpoint_len,
3241   NULL, /* breakpoint_reinsert_addr */
3242   0,    /* decr_pc_after_break */
3243   aarch64_breakpoint_at,
3244   aarch64_supports_z_point_type,
3245   aarch64_insert_point,
3246   aarch64_remove_point,
3247   aarch64_stopped_by_watchpoint,
3248   aarch64_stopped_data_address,
3249   NULL, /* collect_ptrace_register */
3250   NULL, /* supply_ptrace_register */
3251   aarch64_linux_siginfo_fixup,
3252   aarch64_linux_new_process,
3253   aarch64_linux_new_thread,
3254   aarch64_linux_new_fork,
3255   aarch64_linux_prepare_to_resume,
3256   NULL, /* process_qsupported */
3257   aarch64_supports_tracepoints,
3258   aarch64_get_thread_area,
3259   aarch64_install_fast_tracepoint_jump_pad,
3260   aarch64_emit_ops,
3261   aarch64_get_min_fast_tracepoint_insn_len,
3262   aarch64_supports_range_stepping,
3263 };
3264
3265 void
3266 initialize_low_arch (void)
3267 {
3268   init_registers_aarch64 ();
3269
3270   initialize_low_arch_aarch32 ();
3271
3272   initialize_regsets_info (&aarch64_regsets_info);
3273 }