gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "regs.h"
  30 #include "df.h"
  31 #include "hard-reg-set.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "reload.h"
  35 #include "toplev.h"
  36 #include "target.h"
  37 #include "target-def.h"
  38 #include "targhooks.h"
  39 #include "ggc.h"
  40 #include "function.h"
  41 #include "tm_p.h"
  42 #include "recog.h"
  43 #include "langhooks.h"
  44 #include "diagnostic-core.h"
  45 #include "gimple.h"
  46 #include "optabs.h"
  47 #include "dwarf2.h"
  48
  49 /* Classifies an address.
  50
  51    ADDRESS_REG_IMM
  52        A simple base register plus immediate offset.
  53
  54    ADDRESS_REG_WB
  55        A base register indexed by immediate offset with writeback.
  56
  57    ADDRESS_REG_REG
  58        A base register indexed by (optionally scaled) register.
  59
  60    ADDRESS_REG_UXTW
  61        A base register indexed by (optionally scaled) zero-extended register.
  62
  63    ADDRESS_REG_SXTW
  64        A base register indexed by (optionally scaled) sign-extended register.
  65
  66    ADDRESS_LO_SUM
  67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  68
  69    ADDRESS_SYMBOLIC:
  70        A constant symbolic address, in pc-relative literal pool.  */
  71
  72 enum aarch64_address_type {
  73   ADDRESS_REG_IMM,
  74   ADDRESS_REG_WB,
  75   ADDRESS_REG_REG,
  76   ADDRESS_REG_UXTW,
  77   ADDRESS_REG_SXTW,
  78   ADDRESS_LO_SUM,
  79   ADDRESS_SYMBOLIC
  80 };
  81
  82 struct aarch64_address_info {
  83   enum aarch64_address_type type;
  84   rtx base;
  85   rtx offset;
  86   int shift;
  87   enum aarch64_symbol_type symbol_type;
  88 };
  89
  90 /* The current code model.  */
  91 enum aarch64_code_model aarch64_cmodel;
  92
  93 #ifdef HAVE_AS_TLS
  94 #undef TARGET_HAVE_TLS
  95 #define TARGET_HAVE_TLS 1
  96 #endif
  97
  98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
  99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 100                                                      const_tree,
 101                                                      enum machine_mode *, int *,
 102                                                      bool *);
 103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 105 static void aarch64_override_options_after_change (void);
 106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
 107                                          int *, unsigned char *, int *, int *);
 108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 109 static unsigned bit_count (unsigned HOST_WIDE_INT);
 110 static bool aarch64_const_vec_all_same_int_p (rtx,
 111                                               HOST_WIDE_INT, HOST_WIDE_INT);
 112
 113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 114                                                  const unsigned char *sel);
 115
 116 /* The processor for which instructions should be scheduled.  */
 117 enum aarch64_processor aarch64_tune = generic;
 118
 119 /* The current tuning set.  */
 120 const struct tune_params *aarch64_tune_params;
 121
 122 /* Mask to specify which instructions we are allowed to generate.  */
 123 unsigned long aarch64_isa_flags = 0;
 124
 125 /* Mask to specify which instruction scheduling options should be used.  */
 126 unsigned long aarch64_tune_flags = 0;
 127
 128 /* Tuning parameters.  */
 129
 130 #if HAVE_DESIGNATED_INITIALIZERS
 131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 132 #else
 133 #define NAMED_PARAM(NAME, VAL) (VAL)
 134 #endif
 135
 136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 137 __extension__
 138 #endif
 139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
 140 {
 141   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
 142   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
 143   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
 144   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
 145   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
 146   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
 147   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
 148   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
 149   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
 150   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
 151   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
 152   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
 153 };
 154
 155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 156 __extension__
 157 #endif
 158 static const struct cpu_addrcost_table generic_addrcost_table =
 159 {
 160   NAMED_PARAM (pre_modify, 0),
 161   NAMED_PARAM (post_modify, 0),
 162   NAMED_PARAM (register_offset, 0),
 163   NAMED_PARAM (register_extend, 0),
 164   NAMED_PARAM (imm_offset, 0)
 165 };
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170 static const struct cpu_regmove_cost generic_regmove_cost =
 171 {
 172   NAMED_PARAM (GP2GP, 1),
 173   NAMED_PARAM (GP2FP, 2),
 174   NAMED_PARAM (FP2GP, 2),
 175   /* We currently do not provide direct support for TFmode Q->Q move.
 176      Therefore we need to raise the cost above 2 in order to have
 177      reload handle the situation.  */
 178   NAMED_PARAM (FP2FP, 4)
 179 };
 180
 181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 182 __extension__
 183 #endif
 184 static const struct tune_params generic_tunings =
 185 {
 186   &generic_rtx_cost_table,
 187   &generic_addrcost_table,
 188   &generic_regmove_cost,
 189   NAMED_PARAM (memmov_cost, 4)
 190 };
 191
 192 /* A processor implementing AArch64.  */
 193 struct processor
 194 {
 195   const char *const name;
 196   enum aarch64_processor core;
 197   const char *arch;
 198   const unsigned long flags;
 199   const struct tune_params *const tune;
 200 };
 201
 202 /* Processor cores implementing AArch64.  */
 203 static const struct processor all_cores[] =
 204 {
 205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
 206   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 207 #include "aarch64-cores.def"
 208 #undef AARCH64_CORE
 209   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 210   {NULL, aarch64_none, NULL, 0, NULL}
 211 };
 212
 213 /* Architectures implementing AArch64.  */
 214 static const struct processor all_architectures[] =
 215 {
 216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 217   {NAME, CORE, #ARCH, FLAGS, NULL},
 218 #include "aarch64-arches.def"
 219 #undef AARCH64_ARCH
 220   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
 221   {NULL, aarch64_none, NULL, 0, NULL}
 222 };
 223
 224 /* Target specification.  These are populated as commandline arguments
 225    are processed, or NULL if not specified.  */
 226 static const struct processor *selected_arch;
 227 static const struct processor *selected_cpu;
 228 static const struct processor *selected_tune;
 229
 230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 231
 232 /* An ISA extension in the co-processor and main instruction set space.  */
 233 struct aarch64_option_extension
 234 {
 235   const char *const name;
 236   const unsigned long flags_on;
 237   const unsigned long flags_off;
 238 };
 239
 240 /* ISA extensions in AArch64.  */
 241 static const struct aarch64_option_extension all_extensions[] =
 242 {
 243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 244   {NAME, FLAGS_ON, FLAGS_OFF},
 245 #include "aarch64-option-extensions.def"
 246 #undef AARCH64_OPT_EXTENSION
 247   {NULL, 0, 0}
 248 };
 249
 250 /* Used to track the size of an address when generating a pre/post
 251    increment address.  */
 252 static enum machine_mode aarch64_memory_reference_mode;
 253
 254 /* Used to force GTY into this file.  */
 255 static GTY(()) int gty_dummy;
 256
 257 /* A table of valid AArch64 "bitmask immediate" values for
 258    logical instructions.  */
 259
 260 #define AARCH64_NUM_BITMASKS  5334
 261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 262
 263 /* Did we set flag_omit_frame_pointer just so
 264    aarch64_frame_pointer_required would be called? */
 265 static bool faked_omit_frame_pointer;
 266
 267 typedef enum aarch64_cond_code
 268 {
 269   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 270   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 271   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 272 }
 273 aarch64_cc;
 274
 275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 276
 277 /* The condition codes of the processor, and the inverse function.  */
 278 static const char * const aarch64_condition_codes[] =
 279 {
 280   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 281   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 282 };
 283
 284 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 285 unsigned
 286 aarch64_dbx_register_number (unsigned regno)
 287 {
 288    if (GP_REGNUM_P (regno))
 289      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 290    else if (regno == SP_REGNUM)
 291      return AARCH64_DWARF_SP;
 292    else if (FP_REGNUM_P (regno))
 293      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 294
 295    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 296       equivalent DWARF register.  */
 297    return DWARF_FRAME_REGISTERS;
 298 }
 299
 300 /* Return TRUE if MODE is any of the large INT modes.  */
 301 static bool
 302 aarch64_vect_struct_mode_p (enum machine_mode mode)
 303 {
 304   return mode == OImode || mode == CImode || mode == XImode;
 305 }
 306
 307 /* Return TRUE if MODE is any of the vector modes.  */
 308 static bool
 309 aarch64_vector_mode_p (enum machine_mode mode)
 310 {
 311   return aarch64_vector_mode_supported_p (mode)
 312          || aarch64_vect_struct_mode_p (mode);
 313 }
 314
 315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 316 static bool
 317 aarch64_array_mode_supported_p (enum machine_mode mode,
 318                                 unsigned HOST_WIDE_INT nelems)
 319 {
 320   if (TARGET_SIMD
 321       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 322       && (nelems >= 2 && nelems <= 4))
 323     return true;
 324
 325   return false;
 326 }
 327
 328 /* Implement HARD_REGNO_NREGS.  */
 329
 330 int
 331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 332 {
 333   switch (aarch64_regno_regclass (regno))
 334     {
 335     case FP_REGS:
 336     case FP_LO_REGS:
 337       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 338     default:
 339       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 340     }
 341   gcc_unreachable ();
 342 }
 343
 344 /* Implement HARD_REGNO_MODE_OK.  */
 345
 346 int
 347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 348 {
 349   if (GET_MODE_CLASS (mode) == MODE_CC)
 350     return regno == CC_REGNUM;
 351
 352   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
 353       || regno == ARG_POINTER_REGNUM)
 354     return mode == Pmode;
 355
 356   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 357     return 1;
 358
 359   if (FP_REGNUM_P (regno))
 360     {
 361       if (aarch64_vect_struct_mode_p (mode))
 362         return
 363           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 364       else
 365         return 1;
 366     }
 367
 368   return 0;
 369 }
 370
 371 /* Return true if calls to DECL should be treated as
 372    long-calls (ie called via a register).  */
 373 static bool
 374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 375 {
 376   return false;
 377 }
 378
 379 /* Return true if calls to symbol-ref SYM should be treated as
 380    long-calls (ie called via a register).  */
 381 bool
 382 aarch64_is_long_call_p (rtx sym)
 383 {
 384   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 385 }
 386
 387 /* Return true if the offsets to a zero/sign-extract operation
 388    represent an expression that matches an extend operation.  The
 389    operands represent the paramters from
 390
 391    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
 392 bool
 393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 394                                 rtx extract_imm)
 395 {
 396   HOST_WIDE_INT mult_val, extract_val;
 397
 398   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 399     return false;
 400
 401   mult_val = INTVAL (mult_imm);
 402   extract_val = INTVAL (extract_imm);
 403
 404   if (extract_val > 8
 405       && extract_val < GET_MODE_BITSIZE (mode)
 406       && exact_log2 (extract_val & ~7) > 0
 407       && (extract_val & 7) <= 4
 408       && mult_val == (1 << (extract_val & 7)))
 409     return true;
 410
 411   return false;
 412 }
 413
 414 /* Emit an insn that's a simple single-set.  Both the operands must be
 415    known to be valid.  */
 416 inline static rtx
 417 emit_set_insn (rtx x, rtx y)
 418 {
 419   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 420 }
 421
 422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 423    return the rtx for register 0 in the proper mode.  */
 424 rtx
 425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 426 {
 427   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 428   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 429
 430   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 431   return cc_reg;
 432 }
 433
 434 /* Build the SYMBOL_REF for __tls_get_addr.  */
 435
 436 static GTY(()) rtx tls_get_addr_libfunc;
 437
 438 rtx
 439 aarch64_tls_get_addr (void)
 440 {
 441   if (!tls_get_addr_libfunc)
 442     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 443   return tls_get_addr_libfunc;
 444 }
 445
 446 /* Return the TLS model to use for ADDR.  */
 447
 448 static enum tls_model
 449 tls_symbolic_operand_type (rtx addr)
 450 {
 451   enum tls_model tls_kind = TLS_MODEL_NONE;
 452   rtx sym, addend;
 453
 454   if (GET_CODE (addr) == CONST)
 455     {
 456       split_const (addr, &sym, &addend);
 457       if (GET_CODE (sym) == SYMBOL_REF)
 458         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 459     }
 460   else if (GET_CODE (addr) == SYMBOL_REF)
 461     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 462
 463   return tls_kind;
 464 }
 465
 466 /* We'll allow lo_sum's in addresses in our legitimate addresses
 467    so that combine would take care of combining addresses where
 468    necessary, but for generation purposes, we'll generate the address
 469    as :
 470    RTL                               Absolute
 471    tmp = hi (symbol_ref);            adrp  x1, foo
 472    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 473                                      nop
 474
 475    PIC                               TLS
 476    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 477    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 478                                      bl   __tls_get_addr
 479                                      nop
 480
 481    Load TLS symbol, depending on TLS mechanism and TLS access model.
 482
 483    Global Dynamic - Traditional TLS:
 484    adrp tmp, :tlsgd:imm
 485    add  dest, tmp, #:tlsgd_lo12:imm
 486    bl   __tls_get_addr
 487
 488    Global Dynamic - TLS Descriptors:
 489    adrp dest, :tlsdesc:imm
 490    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 491    add  dest, dest, #:tlsdesc_lo12:imm
 492    blr  tmp
 493    mrs  tp, tpidr_el0
 494    add  dest, dest, tp
 495
 496    Initial Exec:
 497    mrs  tp, tpidr_el0
 498    adrp tmp, :gottprel:imm
 499    ldr  dest, [tmp, #:gottprel_lo12:imm]
 500    add  dest, dest, tp
 501
 502    Local Exec:
 503    mrs  tp, tpidr_el0
 504    add  t0, tp, #:tprel_hi12:imm
 505    add  t0, #:tprel_lo12_nc:imm
 506 */
 507
 508 static void
 509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 510                                    enum aarch64_symbol_type type)
 511 {
 512   switch (type)
 513     {
 514     case SYMBOL_SMALL_ABSOLUTE:
 515       {
 516         rtx tmp_reg = dest;
 517         if (can_create_pseudo_p ())
 518           {
 519             tmp_reg =  gen_reg_rtx (Pmode);
 520           }
 521
 522         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 523         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 524         return;
 525       }
 526
 527     case SYMBOL_SMALL_GOT:
 528       {
 529         rtx tmp_reg = dest;
 530         if (can_create_pseudo_p ())
 531           {
 532             tmp_reg =  gen_reg_rtx (Pmode);
 533           }
 534         emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 535         emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 536         return;
 537       }
 538
 539     case SYMBOL_SMALL_TLSGD:
 540       {
 541         rtx insns;
 542         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 543
 544         start_sequence ();
 545         emit_call_insn (gen_tlsgd_small (result, imm));
 546         insns = get_insns ();
 547         end_sequence ();
 548
 549         RTL_CONST_CALL_P (insns) = 1;
 550         emit_libcall_block (insns, dest, result, imm);
 551         return;
 552       }
 553
 554     case SYMBOL_SMALL_TLSDESC:
 555       {
 556         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 557         rtx tp;
 558
 559         emit_insn (gen_tlsdesc_small (imm));
 560         tp = aarch64_load_tp (NULL);
 561         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 562         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 563         return;
 564       }
 565
 566     case SYMBOL_SMALL_GOTTPREL:
 567       {
 568         rtx tmp_reg = gen_reg_rtx (Pmode);
 569         rtx tp = aarch64_load_tp (NULL);
 570         emit_insn (gen_tlsie_small (tmp_reg, imm));
 571         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 572         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 573         return;
 574       }
 575
 576     case SYMBOL_SMALL_TPREL:
 577       {
 578         rtx tp = aarch64_load_tp (NULL);
 579         emit_insn (gen_tlsle_small (dest, tp, imm));
 580         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 581         return;
 582       }
 583
 584     default:
 585       gcc_unreachable ();
 586     }
 587 }
 588
 589 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 590    handle all moves if !can_create_pseudo_p ().  The distinction is
 591    important because, unlike emit_move_insn, the move expanders know
 592    how to force Pmode objects into the constant pool even when the
 593    constant pool address is not itself legitimate.  */
 594 static rtx
 595 aarch64_emit_move (rtx dest, rtx src)
 596 {
 597   return (can_create_pseudo_p ()
 598           ? emit_move_insn (dest, src)
 599           : emit_move_insn_1 (dest, src));
 600 }
 601
 602 void
 603 aarch64_split_128bit_move (rtx dst, rtx src)
 604 {
 605   rtx low_dst;
 606
 607   gcc_assert (GET_MODE (dst) == TImode);
 608
 609   if (REG_P (dst) && REG_P (src))
 610     {
 611       int src_regno = REGNO (src);
 612       int dst_regno = REGNO (dst);
 613
 614       gcc_assert (GET_MODE (src) == TImode);
 615
 616       /* Handle r -> w, w -> r.  */
 617       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 618         {
 619           emit_insn (gen_aarch64_movtilow_di (dst,
 620                                               gen_lowpart (word_mode, src)));
 621           emit_insn (gen_aarch64_movtihigh_di (dst,
 622                                                gen_highpart (word_mode, src)));
 623           return;
 624         }
 625       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 626         {
 627           emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
 628                                               src));
 629           emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
 630                                                src));
 631           return;
 632         }
 633       /* Fall through to r -> r cases.  */
 634     }
 635
 636   low_dst = gen_lowpart (word_mode, dst);
 637   if (REG_P (low_dst)
 638       && reg_overlap_mentioned_p (low_dst, src))
 639     {
 640       aarch64_emit_move (gen_highpart (word_mode, dst),
 641                          gen_highpart_mode (word_mode, TImode, src));
 642       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 643     }
 644   else
 645     {
 646       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
 647       aarch64_emit_move (gen_highpart (word_mode, dst),
 648                          gen_highpart_mode (word_mode, TImode, src));
 649     }
 650 }
 651
 652 bool
 653 aarch64_split_128bit_move_p (rtx dst, rtx src)
 654 {
 655   return (! REG_P (src)
 656           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 657 }
 658
 659 static rtx
 660 aarch64_force_temporary (rtx x, rtx value)
 661 {
 662   if (can_create_pseudo_p ())
 663     return force_reg (Pmode, value);
 664   else
 665     {
 666       x = aarch64_emit_move (x, value);
 667       return x;
 668     }
 669 }
 670
 671
 672 static rtx
 673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 674 {
 675   if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
 676     {
 677       rtx high;
 678       /* Load the full offset into a register.  This
 679          might be improvable in the future.  */
 680       high = GEN_INT (offset);
 681       offset = 0;
 682       high = aarch64_force_temporary (temp, high);
 683       reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
 684     }
 685   return plus_constant (mode, reg, offset);
 686 }
 687
 688 void
 689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 690 {
 691   enum machine_mode mode = GET_MODE (dest);
 692   unsigned HOST_WIDE_INT mask;
 693   int i;
 694   bool first;
 695   unsigned HOST_WIDE_INT val;
 696   bool subtargets;
 697   rtx subtarget;
 698   int one_match, zero_match;
 699
 700   gcc_assert (mode == SImode || mode == DImode);
 701
 702   /* Check on what type of symbol it is.  */
 703   if (GET_CODE (imm) == SYMBOL_REF
 704       || GET_CODE (imm) == LABEL_REF
 705       || GET_CODE (imm) == CONST)
 706     {
 707       rtx mem, base, offset;
 708       enum aarch64_symbol_type sty;
 709
 710       /* If we have (const (plus symbol offset)), separate out the offset
 711          before we start classifying the symbol.  */
 712       split_const (imm, &base, &offset);
 713
 714       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 715       switch (sty)
 716         {
 717         case SYMBOL_FORCE_TO_MEM:
 718           if (offset != const0_rtx
 719               && targetm.cannot_force_const_mem (mode, imm))
 720             {
 721               gcc_assert(can_create_pseudo_p ());
 722               base = aarch64_force_temporary (dest, base);
 723               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 724               aarch64_emit_move (dest, base);
 725               return;
 726             }
 727           mem = force_const_mem (mode, imm);
 728           gcc_assert (mem);
 729           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 730           return;
 731
 732         case SYMBOL_SMALL_TLSGD:
 733         case SYMBOL_SMALL_TLSDESC:
 734         case SYMBOL_SMALL_GOTTPREL:
 735         case SYMBOL_SMALL_GOT:
 736           if (offset != const0_rtx)
 737             {
 738               gcc_assert(can_create_pseudo_p ());
 739               base = aarch64_force_temporary (dest, base);
 740               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 741               aarch64_emit_move (dest, base);
 742               return;
 743             }
 744           /* FALLTHRU */
 745
 746         case SYMBOL_SMALL_TPREL:
 747         case SYMBOL_SMALL_ABSOLUTE:
 748           aarch64_load_symref_appropriately (dest, imm, sty);
 749           return;
 750
 751         default:
 752           gcc_unreachable ();
 753         }
 754     }
 755
 756   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
 757     {
 758       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 759       return;
 760     }
 761
 762   if (!CONST_INT_P (imm))
 763     {
 764       if (GET_CODE (imm) == HIGH)
 765         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
 766       else
 767         {
 768           rtx mem = force_const_mem (mode, imm);
 769           gcc_assert (mem);
 770           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 771         }
 772
 773       return;
 774     }
 775
 776   if (mode == SImode)
 777     {
 778       /* We know we can't do this in 1 insn, and we must be able to do it
 779          in two; so don't mess around looking for sequences that don't buy
 780          us anything.  */
 781       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
 782       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
 783                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
 784       return;
 785     }
 786
 787   /* Remaining cases are all for DImode.  */
 788
 789   val = INTVAL (imm);
 790   subtargets = optimize && can_create_pseudo_p ();
 791
 792   one_match = 0;
 793   zero_match = 0;
 794   mask = 0xffff;
 795
 796   for (i = 0; i < 64; i += 16, mask <<= 16)
 797     {
 798       if ((val & mask) == 0)
 799         zero_match++;
 800       else if ((val & mask) == mask)
 801         one_match++;
 802     }
 803
 804   if (one_match == 2)
 805     {
 806       mask = 0xffff;
 807       for (i = 0; i < 64; i += 16, mask <<= 16)
 808         {
 809           if ((val & mask) != mask)
 810             {
 811               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
 812               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 813                                          GEN_INT ((val >> i) & 0xffff)));
 814               return;
 815             }
 816         }
 817       gcc_unreachable ();
 818     }
 819
 820   if (zero_match == 2)
 821     goto simple_sequence;
 822
 823   mask = 0x0ffff0000UL;
 824   for (i = 16; i < 64; i += 16, mask <<= 16)
 825     {
 826       HOST_WIDE_INT comp = mask & ~(mask - 1);
 827
 828       if (aarch64_uimm12_shift (val - (val & mask)))
 829         {
 830           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 831
 832           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
 833           emit_insn (gen_adddi3 (dest, subtarget,
 834                                  GEN_INT (val - (val & mask))));
 835           return;
 836         }
 837       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 838         {
 839           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 840
 841           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 842                                   GEN_INT ((val + comp) & mask)));
 843           emit_insn (gen_adddi3 (dest, subtarget,
 844                                  GEN_INT (val - ((val + comp) & mask))));
 845           return;
 846         }
 847       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 848         {
 849           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 850
 851           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 852                                   GEN_INT ((val - comp) | ~mask)));
 853           emit_insn (gen_adddi3 (dest, subtarget,
 854                                  GEN_INT (val - ((val - comp) | ~mask))));
 855           return;
 856         }
 857       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 858         {
 859           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 860
 861           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 862                                   GEN_INT (val | ~mask)));
 863           emit_insn (gen_adddi3 (dest, subtarget,
 864                                  GEN_INT (val - (val | ~mask))));
 865           return;
 866         }
 867     }
 868
 869   /* See if we can do it by arithmetically combining two
 870      immediates.  */
 871   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 872     {
 873       int j;
 874       mask = 0xffff;
 875
 876       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
 877           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
 878         {
 879           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
 880           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 881                                   GEN_INT (aarch64_bitmasks[i])));
 882           emit_insn (gen_adddi3 (dest, subtarget,
 883                                  GEN_INT (val - aarch64_bitmasks[i])));
 884           return;
 885         }
 886
 887       for (j = 0; j < 64; j += 16, mask <<= 16)
 888         {
 889           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
 890             {
 891               emit_insn (gen_rtx_SET (VOIDmode, dest,
 892                                       GEN_INT (aarch64_bitmasks[i])));
 893               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
 894                                          GEN_INT ((val >> j) & 0xffff)));
 895               return;
 896             }
 897         }
 898     }
 899
 900   /* See if we can do it by logically combining two immediates.  */
 901   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
 902     {
 903       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
 904         {
 905           int j;
 906
 907           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 908             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
 909               {
 910                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 911                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 912                                         GEN_INT (aarch64_bitmasks[i])));
 913                 emit_insn (gen_iordi3 (dest, subtarget,
 914                                        GEN_INT (aarch64_bitmasks[j])));
 915                 return;
 916               }
 917         }
 918       else if ((val & aarch64_bitmasks[i]) == val)
 919         {
 920           int j;
 921
 922           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 923             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
 924               {
 925
 926                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
 927                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
 928                                         GEN_INT (aarch64_bitmasks[j])));
 929                 emit_insn (gen_anddi3 (dest, subtarget,
 930                                        GEN_INT (aarch64_bitmasks[i])));
 931                 return;
 932               }
 933         }
 934     }
 935
 936  simple_sequence:
 937   first = true;
 938   mask = 0xffff;
 939   for (i = 0; i < 64; i += 16, mask <<= 16)
 940     {
 941       if ((val & mask) != 0)
 942         {
 943           if (first)
 944             {
 945               emit_insn (gen_rtx_SET (VOIDmode, dest,
 946                                       GEN_INT (val & mask)));
 947               first = false;
 948             }
 949           else
 950             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 951                                        GEN_INT ((val >> i) & 0xffff)));
 952         }
 953     }
 954 }
 955
 956 static bool
 957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 958 {
 959   /* Indirect calls are not currently supported.  */
 960   if (decl == NULL)
 961     return false;
 962
 963   /* Cannot tail-call to long-calls, since these are outside of the
 964      range of a branch instruction (we could handle this if we added
 965      support for indirect tail-calls.  */
 966   if (aarch64_decl_is_long_call_p (decl))
 967     return false;
 968
 969   return true;
 970 }
 971
 972 /* Implement TARGET_PASS_BY_REFERENCE.  */
 973
 974 static bool
 975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
 976                            enum machine_mode mode,
 977                            const_tree type,
 978                            bool named ATTRIBUTE_UNUSED)
 979 {
 980   HOST_WIDE_INT size;
 981   enum machine_mode dummymode;
 982   int nregs;
 983
 984   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
 985   size = (mode == BLKmode && type)
 986     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
 987
 988   if (type)
 989     {
 990       /* Arrays always passed by reference.  */
 991       if (TREE_CODE (type) == ARRAY_TYPE)
 992         return true;
 993       /* Other aggregates based on their size.  */
 994       if (AGGREGATE_TYPE_P (type))
 995         size = int_size_in_bytes (type);
 996     }
 997
 998   /* Variable sized arguments are always returned by reference.  */
 999   if (size < 0)
1000     return true;
1001
1002   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1003   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004                                                &dummymode, &nregs,
1005                                                NULL))
1006     return false;
1007
1008   /* Arguments which are variable sized or larger than 2 registers are
1009      passed by reference unless they are a homogenous floating point
1010      aggregate.  */
1011   return size > 2 * UNITS_PER_WORD;
1012 }
1013
1014 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1015 static bool
1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018   enum machine_mode dummy_mode;
1019   int dummy_int;
1020
1021   /* Never happens in little-endian mode.  */
1022   if (!BYTES_BIG_ENDIAN)
1023     return false;
1024
1025   /* Only composite types smaller than or equal to 16 bytes can
1026      be potentially returned in registers.  */
1027   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028       || int_size_in_bytes (valtype) <= 0
1029       || int_size_in_bytes (valtype) > 16)
1030     return false;
1031
1032   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034      is always passed/returned in the least significant bits of fp/simd
1035      register(s).  */
1036   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037                                                &dummy_mode, &dummy_int, NULL))
1038     return false;
1039
1040   return true;
1041 }
1042
1043 /* Implement TARGET_FUNCTION_VALUE.
1044    Define how to find the value returned by a function.  */
1045
1046 static rtx
1047 aarch64_function_value (const_tree type, const_tree func,
1048                         bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050   enum machine_mode mode;
1051   int unsignedp;
1052   int count;
1053   enum machine_mode ag_mode;
1054
1055   mode = TYPE_MODE (type);
1056   if (INTEGRAL_TYPE_P (type))
1057     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059   if (aarch64_return_in_msb (type))
1060     {
1061       HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063       if (size % UNITS_PER_WORD != 0)
1064         {
1065           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067         }
1068     }
1069
1070   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071                                                &ag_mode, &count, NULL))
1072     {
1073       if (!aarch64_composite_type_p (type, mode))
1074         {
1075           gcc_assert (count == 1 && mode == ag_mode);
1076           return gen_rtx_REG (mode, V0_REGNUM);
1077         }
1078       else
1079         {
1080           int i;
1081           rtx par;
1082
1083           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084           for (i = 0; i < count; i++)
1085             {
1086               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089               XVECEXP (par, 0, i) = tmp;
1090             }
1091           return par;
1092         }
1093     }
1094   else
1095     return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099    Return true if REGNO is the number of a hard register in which the values
1100    of called function may come back.  */
1101
1102 static bool
1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1106      of 16-byte return values are: 128-bit integers and 16-byte small
1107      structures (excluding homogeneous floating-point aggregates).  */
1108   if (regno == R0_REGNUM || regno == R1_REGNUM)
1109     return true;
1110
1111   /* Up to four fp/simd registers can return a function value, e.g. a
1112      homogeneous floating-point aggregate having four members.  */
1113   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114     return !TARGET_GENERAL_REGS_ONLY;
1115
1116   return false;
1117 }
1118
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120
1121    If the type T of the result of a function is such that
1122      void func (T arg)
1123    would require that arg be passed as a value in a register (or set of
1124    registers) according to the parameter passing rules, then the result
1125    is returned in the same registers as would be used for such an
1126    argument.  */
1127
1128 static bool
1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131   HOST_WIDE_INT size;
1132   enum machine_mode ag_mode;
1133   int count;
1134
1135   if (!AGGREGATE_TYPE_P (type)
1136       && TREE_CODE (type) != COMPLEX_TYPE
1137       && TREE_CODE (type) != VECTOR_TYPE)
1138     /* Simple scalar types always returned in registers.  */
1139     return false;
1140
1141   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142                                                type,
1143                                                &ag_mode,
1144                                                &count,
1145                                                NULL))
1146     return false;
1147
1148   /* Types larger than 2 registers returned in memory.  */
1149   size = int_size_in_bytes (type);
1150   return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152
1153 static bool
1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155                                const_tree type, int *nregs)
1156 {
1157   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158   return aarch64_vfp_is_call_or_return_candidate (mode,
1159                                                   type,
1160                                                   &pcum->aapcs_vfp_rmode,
1161                                                   nregs,
1162                                                   NULL);
1163 }
1164
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166    bits.  The idea is to suppress any stronger alignment requested by
1167    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168    This is a helper function for local use only.  */
1169
1170 static unsigned int
1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173   unsigned int alignment;
1174
1175   if (type)
1176     {
1177       if (!integer_zerop (TYPE_SIZE (type)))
1178         {
1179           if (TYPE_MODE (type) == mode)
1180             alignment = TYPE_ALIGN (type);
1181           else
1182             alignment = GET_MODE_ALIGNMENT (mode);
1183         }
1184       else
1185         alignment = 0;
1186     }
1187   else
1188     alignment = GET_MODE_ALIGNMENT (mode);
1189
1190   return alignment;
1191 }
1192
1193 /* Layout a function argument according to the AAPCS64 rules.  The rule
1194    numbers refer to the rule numbers in the AAPCS64.  */
1195
1196 static void
1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198                     const_tree type,
1199                     bool named ATTRIBUTE_UNUSED)
1200 {
1201   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202   int ncrn, nvrn, nregs;
1203   bool allocate_ncrn, allocate_nvrn;
1204
1205   /* We need to do this once per argument.  */
1206   if (pcum->aapcs_arg_processed)
1207     return;
1208
1209   pcum->aapcs_arg_processed = true;
1210
1211   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213                                                  mode,
1214                                                  type,
1215                                                  &nregs);
1216
1217   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218      The following code thus handles passing by SIMD/FP registers first.  */
1219
1220   nvrn = pcum->aapcs_nvrn;
1221
1222   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223      and homogenous short-vector aggregates (HVA).  */
1224   if (allocate_nvrn)
1225     {
1226       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227         {
1228           pcum->aapcs_nextnvrn = nvrn + nregs;
1229           if (!aarch64_composite_type_p (type, mode))
1230             {
1231               gcc_assert (nregs == 1);
1232               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233             }
1234           else
1235             {
1236               rtx par;
1237               int i;
1238               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239               for (i = 0; i < nregs; i++)
1240                 {
1241                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242                                          V0_REGNUM + nvrn + i);
1243                   tmp = gen_rtx_EXPR_LIST
1244                     (VOIDmode, tmp,
1245                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246                   XVECEXP (par, 0, i) = tmp;
1247                 }
1248               pcum->aapcs_reg = par;
1249             }
1250           return;
1251         }
1252       else
1253         {
1254           /* C.3 NSRN is set to 8.  */
1255           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256           goto on_stack;
1257         }
1258     }
1259
1260   ncrn = pcum->aapcs_ncrn;
1261   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262            + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265   /* C6 - C9.  though the sign and zero extension semantics are
1266      handled elsewhere.  This is the case where the argument fits
1267      entirely general registers.  */
1268   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269     {
1270       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274       /* C.8 if the argument has an alignment of 16 then the NGRN is
1275          rounded up to the next even number.  */
1276       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277         {
1278           ++ncrn;
1279           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280         }
1281       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282          A reg is still generated for it, but the caller should be smart
1283          enough not to use it.  */
1284       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285         {
1286           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287         }
1288       else
1289         {
1290           rtx par;
1291           int i;
1292
1293           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294           for (i = 0; i < nregs; i++)
1295             {
1296               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298                                        GEN_INT (i * UNITS_PER_WORD));
1299               XVECEXP (par, 0, i) = tmp;
1300             }
1301           pcum->aapcs_reg = par;
1302         }
1303
1304       pcum->aapcs_nextncrn = ncrn + nregs;
1305       return;
1306     }
1307
1308   /* C.11  */
1309   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311   /* The argument is passed on stack; record the needed number of words for
1312      this argument (we can re-use NREGS) and align the total size if
1313      necessary.  */
1314 on_stack:
1315   pcum->aapcs_stack_words = nregs;
1316   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318                                                16 / UNITS_PER_WORD) + 1;
1319   return;
1320 }
1321
1322 /* Implement TARGET_FUNCTION_ARG.  */
1323
1324 static rtx
1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326                       const_tree type, bool named)
1327 {
1328   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331   if (mode == VOIDmode)
1332     return NULL_RTX;
1333
1334   aarch64_layout_arg (pcum_v, mode, type, named);
1335   return pcum->aapcs_reg;
1336 }
1337
1338 void
1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340                            const_tree fntype ATTRIBUTE_UNUSED,
1341                            rtx libname ATTRIBUTE_UNUSED,
1342                            const_tree fndecl ATTRIBUTE_UNUSED,
1343                            unsigned n_named ATTRIBUTE_UNUSED)
1344 {
1345   pcum->aapcs_ncrn = 0;
1346   pcum->aapcs_nvrn = 0;
1347   pcum->aapcs_nextncrn = 0;
1348   pcum->aapcs_nextnvrn = 0;
1349   pcum->pcs_variant = ARM_PCS_AAPCS64;
1350   pcum->aapcs_reg = NULL_RTX;
1351   pcum->aapcs_arg_processed = false;
1352   pcum->aapcs_stack_words = 0;
1353   pcum->aapcs_stack_size = 0;
1354
1355   return;
1356 }
1357
1358 static void
1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360                               enum machine_mode mode,
1361                               const_tree type,
1362                               bool named)
1363 {
1364   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366     {
1367       aarch64_layout_arg (pcum_v, mode, type, named);
1368       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369                   != (pcum->aapcs_stack_words != 0));
1370       pcum->aapcs_arg_processed = false;
1371       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374       pcum->aapcs_stack_words = 0;
1375       pcum->aapcs_reg = NULL_RTX;
1376     }
1377 }
1378
1379 bool
1380 aarch64_function_arg_regno_p (unsigned regno)
1381 {
1382   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384 }
1385
1386 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1387    PARM_BOUNDARY bits of alignment, but will be given anything up
1388    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1389    that both before and after the layout of each argument, the Next
1390    Stacked Argument Address (NSAA) will have a minimum alignment of
1391    8 bytes.  */
1392
1393 static unsigned int
1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395 {
1396   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398   if (alignment < PARM_BOUNDARY)
1399     alignment = PARM_BOUNDARY;
1400   if (alignment > STACK_BOUNDARY)
1401     alignment = STACK_BOUNDARY;
1402   return alignment;
1403 }
1404
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407    Return true if an argument passed on the stack should be padded upwards,
1408    i.e. if the least-significant byte of the stack slot has useful data.
1409
1410    Small aggregate types are placed in the lowest memory address.
1411
1412    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1413
1414 bool
1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416 {
1417   /* On little-endian targets, the least significant byte of every stack
1418      argument is passed at the lowest byte address of the stack slot.  */
1419   if (!BYTES_BIG_ENDIAN)
1420     return true;
1421
1422   /* Otherwise, integral types and floating point types are padded downward:
1423      the least significant byte of a stack argument is passed at the highest
1424      byte address of the stack slot.  */
1425   if (type
1426       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428     return false;
1429
1430   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1431   return true;
1432 }
1433
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436    It specifies padding for the last (may also be the only)
1437    element of a block move between registers and memory.  If
1438    assuming the block is in the memory, padding upward means that
1439    the last element is padded after its highest significant byte,
1440    while in downward padding, the last element is padded at the
1441    its least significant byte side.
1442
1443    Small aggregates and small complex types are always padded
1444    upwards.
1445
1446    We don't need to worry about homogeneous floating-point or
1447    short-vector aggregates; their move is not affected by the
1448    padding direction determined here.  Regardless of endianness,
1449    each element of such an aggregate is put in the least
1450    significant bits of a fp/simd register.
1451
1452    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453    register has useful data, and return the opposite if the most
1454    significant byte does.  */
1455
1456 bool
1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458                      bool first ATTRIBUTE_UNUSED)
1459 {
1460
1461   /* Small composite types are always padded upward.  */
1462   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463     {
1464       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465                             : GET_MODE_SIZE (mode));
1466       if (size < 2 * UNITS_PER_WORD)
1467         return true;
1468     }
1469
1470   /* Otherwise, use the default padding.  */
1471   return !BYTES_BIG_ENDIAN;
1472 }
1473
1474 static enum machine_mode
1475 aarch64_libgcc_cmp_return_mode (void)
1476 {
1477   return SImode;
1478 }
1479
1480 static bool
1481 aarch64_frame_pointer_required (void)
1482 {
1483   /* If the function contains dynamic stack allocations, we need to
1484      use the frame pointer to access the static parts of the frame.  */
1485   if (cfun->calls_alloca)
1486     return true;
1487
1488   /* We may have turned flag_omit_frame_pointer on in order to have this
1489      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490      and we'll check it here.
1491      If we really did set flag_omit_frame_pointer normally, then we return false
1492      (no frame pointer required) in all cases.  */
1493
1494   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495     return false;
1496   else if (flag_omit_leaf_frame_pointer)
1497     return !crtl->is_leaf;
1498   return true;
1499 }
1500
1501 /* Mark the registers that need to be saved by the callee and calculate
1502    the size of the callee-saved registers area and frame record (both FP
1503    and LR may be omitted).  */
1504 static void
1505 aarch64_layout_frame (void)
1506 {
1507   HOST_WIDE_INT offset = 0;
1508   int regno;
1509
1510   if (reload_completed && cfun->machine->frame.laid_out)
1511     return;
1512
1513   cfun->machine->frame.fp_lr_offset = 0;
1514
1515   /* First mark all the registers that really need to be saved...  */
1516   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517     cfun->machine->frame.reg_offset[regno] = -1;
1518
1519   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520     cfun->machine->frame.reg_offset[regno] = -1;
1521
1522   /* ... that includes the eh data registers (if needed)...  */
1523   if (crtl->calls_eh_return)
1524     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527   /* ... and any callee saved register that dataflow says is live.  */
1528   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529     if (df_regs_ever_live_p (regno)
1530         && !call_used_regs[regno])
1531       cfun->machine->frame.reg_offset[regno] = 0;
1532
1533   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534     if (df_regs_ever_live_p (regno)
1535         && !call_used_regs[regno])
1536       cfun->machine->frame.reg_offset[regno] = 0;
1537
1538   if (frame_pointer_needed)
1539     {
1540       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543     }
1544
1545   /* Now assign stack slots for them.  */
1546   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547     if (cfun->machine->frame.reg_offset[regno] != -1)
1548       {
1549         cfun->machine->frame.reg_offset[regno] = offset;
1550         offset += UNITS_PER_WORD;
1551       }
1552
1553   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554     if (cfun->machine->frame.reg_offset[regno] != -1)
1555       {
1556         cfun->machine->frame.reg_offset[regno] = offset;
1557         offset += UNITS_PER_WORD;
1558       }
1559
1560   if (frame_pointer_needed)
1561     {
1562       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563       offset += UNITS_PER_WORD;
1564       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565     }
1566
1567   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568     {
1569       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570       offset += UNITS_PER_WORD;
1571       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572     }
1573
1574   cfun->machine->frame.padding0 =
1575     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578   cfun->machine->frame.saved_regs_size = offset;
1579   cfun->machine->frame.laid_out = true;
1580 }
1581
1582 /* Make the last instruction frame-related and note that it performs
1583    the operation described by FRAME_PATTERN.  */
1584
1585 static void
1586 aarch64_set_frame_expr (rtx frame_pattern)
1587 {
1588   rtx insn;
1589
1590   insn = get_last_insn ();
1591   RTX_FRAME_RELATED_P (insn) = 1;
1592   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594                                       frame_pattern,
1595                                       REG_NOTES (insn));
1596 }
1597
1598 static bool
1599 aarch64_register_saved_on_entry (int regno)
1600 {
1601   return cfun->machine->frame.reg_offset[regno] != -1;
1602 }
1603
1604
1605 static void
1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607                               bool restore, rtx base_rtx)
1608
1609 {
1610   unsigned regno;
1611   unsigned regno2;
1612   rtx insn;
1613   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617     {
1618       if (aarch64_register_saved_on_entry (regno))
1619         {
1620           rtx mem;
1621           mem = gen_mem_ref (DFmode,
1622                              plus_constant (Pmode,
1623                                             base_rtx,
1624                                             start_offset));
1625
1626           for (regno2 = regno + 1;
1627                regno2 <= V31_REGNUM
1628                  && !aarch64_register_saved_on_entry (regno2);
1629                regno2++)
1630             {
1631               /* Empty loop.  */
1632             }
1633           if (regno2 <= V31_REGNUM &&
1634               aarch64_register_saved_on_entry (regno2))
1635             {
1636               rtx mem2;
1637               /* Next highest register to be saved.  */
1638               mem2 = gen_mem_ref (DFmode,
1639                                   plus_constant
1640                                   (Pmode,
1641                                    base_rtx,
1642                                    start_offset + increment));
1643               if (restore == false)
1644                 {
1645                   insn = emit_insn
1646                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647                                         mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649                 }
1650               else
1651                 {
1652                   insn = emit_insn
1653                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654                                        gen_rtx_REG (DFmode, regno2), mem2));
1655
1656                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658                 }
1659
1660                   /* The first part of a frame-related parallel insn
1661                      is always assumed to be relevant to the frame
1662                      calculations; subsequent parts, are only
1663                      frame-related if explicitly marked.  */
1664               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665                                             1)) = 1;
1666               regno = regno2;
1667               start_offset += increment * 2;
1668             }
1669           else
1670             {
1671               if (restore == false)
1672                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673               else
1674                 {
1675                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677                 }
1678               start_offset += increment;
1679             }
1680           RTX_FRAME_RELATED_P (insn) = 1;
1681         }
1682     }
1683
1684 }
1685
1686
1687 /* offset from the stack pointer of where the saves and
1688    restore's have to happen.  */
1689 static void
1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691                                             bool restore)
1692 {
1693   rtx insn;
1694   rtx base_rtx = stack_pointer_rtx;
1695   HOST_WIDE_INT start_offset = offset;
1696   HOST_WIDE_INT increment = UNITS_PER_WORD;
1697   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699   unsigned regno;
1700   unsigned regno2;
1701
1702   for (regno = R0_REGNUM; regno <= limit; regno++)
1703     {
1704       if (aarch64_register_saved_on_entry (regno))
1705         {
1706           rtx mem;
1707           mem = gen_mem_ref (Pmode,
1708                              plus_constant (Pmode,
1709                                             base_rtx,
1710                                             start_offset));
1711
1712           for (regno2 = regno + 1;
1713                regno2 <= limit
1714                  && !aarch64_register_saved_on_entry (regno2);
1715                regno2++)
1716             {
1717               /* Empty loop.  */
1718             }
1719           if (regno2 <= limit &&
1720               aarch64_register_saved_on_entry (regno2))
1721             {
1722               rtx mem2;
1723               /* Next highest register to be saved.  */
1724               mem2 = gen_mem_ref (Pmode,
1725                                   plus_constant
1726                                   (Pmode,
1727                                    base_rtx,
1728                                    start_offset + increment));
1729               if (restore == false)
1730                 {
1731                   insn = emit_insn
1732                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733                                         mem2, gen_rtx_REG (DImode, regno2)));
1734
1735                 }
1736               else
1737                 {
1738                   insn = emit_insn
1739                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740                                      gen_rtx_REG (DImode, regno2), mem2));
1741
1742                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744                 }
1745
1746                   /* The first part of a frame-related parallel insn
1747                      is always assumed to be relevant to the frame
1748                      calculations; subsequent parts, are only
1749                      frame-related if explicitly marked.  */
1750               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751                                             1)) = 1;
1752               regno = regno2;
1753               start_offset += increment * 2;
1754             }
1755           else
1756             {
1757               if (restore == false)
1758                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759               else
1760                 {
1761                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763                 }
1764               start_offset += increment;
1765             }
1766           RTX_FRAME_RELATED_P (insn) = 1;
1767         }
1768     }
1769
1770   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772 }
1773
1774 /* AArch64 stack frames generated by this compiler look like:
1775
1776         +-------------------------------+
1777         |                               |
1778         |  incoming stack arguments     |
1779         |                               |
1780         +-------------------------------+ <-- arg_pointer_rtx
1781         |                               |
1782         |  callee-allocated save area   |
1783         |  for register varargs         |
1784         |                               |
1785         +-------------------------------+
1786         |                               |
1787         |  local variables              |
1788         |                               |
1789         +-------------------------------+ <-- frame_pointer_rtx
1790         |                               |
1791         |  callee-saved registers       |
1792         |                               |
1793         +-------------------------------+
1794         |  LR'                          |
1795         +-------------------------------+
1796         |  FP'                          |
1797       P +-------------------------------+ <-- hard_frame_pointer_rtx
1798         |  dynamic allocation           |
1799         +-------------------------------+
1800         |                               |
1801         |  outgoing stack arguments     |
1802         |                               |
1803         +-------------------------------+ <-- stack_pointer_rtx
1804
1805    Dynamic stack allocations such as alloca insert data at point P.
1806    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807    hard_frame_pointer_rtx unchanged.  */
1808
1809 /* Generate the prologue instructions for entry into a function.
1810    Establish the stack frame by decreasing the stack pointer with a
1811    properly calculated size and, if necessary, create a frame record
1812    filled with the values of LR and previous frame pointer.  The
1813    current FP is also set up is it is in use.  */
1814
1815 void
1816 aarch64_expand_prologue (void)
1817 {
1818   /* sub sp, sp, #<frame_size>
1819      stp {fp, lr}, [sp, #<frame_size> - 16]
1820      add fp, sp, #<frame_size> - hardfp_offset
1821      stp {cs_reg}, [fp, #-16] etc.
1822
1823      sub sp, sp, <final_adjustment_if_any>
1824   */
1825   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
1826   HOST_WIDE_INT frame_size, offset;
1827   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
1828   rtx insn;
1829
1830   aarch64_layout_frame ();
1831   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835                 + crtl->outgoing_args_size);
1836   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837                                           STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839   if (flag_stack_usage_info)
1840     current_function_static_stack_size = frame_size;
1841
1842   fp_offset = (offset
1843                - original_frame_size
1844                - cfun->machine->frame.saved_regs_size);
1845
1846   /* Store pairs and load pairs have a range only -512 to 504.  */
1847   if (offset >= 512)
1848     {
1849       /* When the frame has a large size, an initial decrease is done on
1850          the stack pointer to jump over the callee-allocated save area for
1851          register varargs, the local variable area and/or the callee-saved
1852          register area.  This will allow the pre-index write-back
1853          store pair instructions to be used for setting up the stack frame
1854          efficiently.  */
1855       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856       if (offset >= 512)
1857         offset = cfun->machine->frame.saved_regs_size;
1858
1859       frame_size -= (offset + crtl->outgoing_args_size);
1860       fp_offset = 0;
1861
1862       if (frame_size >= 0x1000000)
1863         {
1864           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865           emit_move_insn (op0, GEN_INT (-frame_size));
1866           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867           aarch64_set_frame_expr (gen_rtx_SET
1868                                   (Pmode, stack_pointer_rtx,
1869                                    gen_rtx_PLUS (Pmode,
1870                                                  stack_pointer_rtx,
1871                                                  GEN_INT (-frame_size))));
1872         }
1873       else if (frame_size > 0)
1874         {
1875           if ((frame_size & 0xfff) != frame_size)
1876             {
1877               insn = emit_insn (gen_add2_insn
1878                                 (stack_pointer_rtx,
1879                                  GEN_INT (-(frame_size
1880                                             & ~(HOST_WIDE_INT)0xfff))));
1881               RTX_FRAME_RELATED_P (insn) = 1;
1882             }
1883           if ((frame_size & 0xfff) != 0)
1884             {
1885               insn = emit_insn (gen_add2_insn
1886                                 (stack_pointer_rtx,
1887                                  GEN_INT (-(frame_size
1888                                             & (HOST_WIDE_INT)0xfff))));
1889               RTX_FRAME_RELATED_P (insn) = 1;
1890             }
1891         }
1892     }
1893   else
1894     frame_size = -1;
1895
1896   if (offset > 0)
1897     {
1898       /* Save the frame pointer and lr if the frame pointer is needed
1899          first.  Make the frame pointer point to the location of the
1900          old frame pointer on the stack.  */
1901       if (frame_pointer_needed)
1902         {
1903           rtx mem_fp, mem_lr;
1904
1905           if (fp_offset)
1906             {
1907               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908                                                GEN_INT (-offset)));
1909               RTX_FRAME_RELATED_P (insn) = 1;
1910               aarch64_set_frame_expr (gen_rtx_SET
1911                                       (Pmode, stack_pointer_rtx,
1912                                        gen_rtx_MINUS (Pmode,
1913                                                       stack_pointer_rtx,
1914                                                       GEN_INT (offset))));
1915               mem_fp = gen_frame_mem (DImode,
1916                                       plus_constant (Pmode,
1917                                                      stack_pointer_rtx,
1918                                                      fp_offset));
1919               mem_lr = gen_frame_mem (DImode,
1920                                       plus_constant (Pmode,
1921                                                      stack_pointer_rtx,
1922                                                      fp_offset
1923                                                      + UNITS_PER_WORD));
1924               insn = emit_insn (gen_store_pairdi (mem_fp,
1925                                                   hard_frame_pointer_rtx,
1926                                                   mem_lr,
1927                                                   gen_rtx_REG (DImode,
1928                                                                LR_REGNUM)));
1929             }
1930           else
1931             {
1932               insn = emit_insn (gen_storewb_pairdi_di
1933                                 (stack_pointer_rtx, stack_pointer_rtx,
1934                                  hard_frame_pointer_rtx,
1935                                  gen_rtx_REG (DImode, LR_REGNUM),
1936                                  GEN_INT (-offset),
1937                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939             }
1940
1941           /* The first part of a frame-related parallel insn is always
1942              assumed to be relevant to the frame calculations;
1943              subsequent parts, are only frame-related if explicitly
1944              marked.  */
1945           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946           RTX_FRAME_RELATED_P (insn) = 1;
1947
1948           /* Set up frame pointer to point to the location of the
1949              previous frame pointer on the stack.  */
1950           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951                                            stack_pointer_rtx,
1952                                            GEN_INT (fp_offset)));
1953           aarch64_set_frame_expr (gen_rtx_SET
1954                                   (Pmode, hard_frame_pointer_rtx,
1955                                    gen_rtx_PLUS (Pmode,
1956                                                  stack_pointer_rtx,
1957                                                  GEN_INT (fp_offset))));
1958           RTX_FRAME_RELATED_P (insn) = 1;
1959           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960                                            hard_frame_pointer_rtx));
1961         }
1962       else
1963         {
1964           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965                                            GEN_INT (-offset)));
1966           RTX_FRAME_RELATED_P (insn) = 1;
1967         }
1968
1969       aarch64_save_or_restore_callee_save_registers
1970         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971     }
1972
1973   /* when offset >= 512,
1974      sub sp, sp, #<outgoing_args_size> */
1975   if (frame_size > -1)
1976     {
1977       if (crtl->outgoing_args_size > 0)
1978         {
1979           insn = emit_insn (gen_add2_insn
1980                             (stack_pointer_rtx,
1981                              GEN_INT (- crtl->outgoing_args_size)));
1982           RTX_FRAME_RELATED_P (insn) = 1;
1983         }
1984     }
1985 }
1986
1987 /* Generate the epilogue instructions for returning from a function.  */
1988 void
1989 aarch64_expand_epilogue (bool for_sibcall)
1990 {
1991   HOST_WIDE_INT original_frame_size, frame_size, offset;
1992   HOST_WIDE_INT fp_offset;
1993   rtx insn;
1994   rtx cfa_reg;
1995
1996   aarch64_layout_frame ();
1997   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999                 + crtl->outgoing_args_size);
2000   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001                                           STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003   fp_offset = (offset
2004                - original_frame_size
2005                - cfun->machine->frame.saved_regs_size);
2006
2007   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009   /* Store pairs and load pairs have a range only -512 to 504.  */
2010   if (offset >= 512)
2011     {
2012       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013       if (offset >= 512)
2014         offset = cfun->machine->frame.saved_regs_size;
2015
2016       frame_size -= (offset + crtl->outgoing_args_size);
2017       fp_offset = 0;
2018       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019         {
2020           insn = emit_insn (gen_add2_insn
2021                             (stack_pointer_rtx,
2022                              GEN_INT (crtl->outgoing_args_size)));
2023           RTX_FRAME_RELATED_P (insn) = 1;
2024         }
2025     }
2026   else
2027     frame_size = -1;
2028
2029   /* If there were outgoing arguments or we've done dynamic stack
2030      allocation, then restore the stack pointer from the frame
2031      pointer.  This is at most one insn and more efficient than using
2032      GCC's internal mechanism.  */
2033   if (frame_pointer_needed
2034       && (crtl->outgoing_args_size || cfun->calls_alloca))
2035     {
2036       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037                                        hard_frame_pointer_rtx,
2038                                        GEN_INT (- fp_offset)));
2039       RTX_FRAME_RELATED_P (insn) = 1;
2040       /* As SP is set to (FP - fp_offset), according to the rules in
2041          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042          from the value of SP from now on.  */
2043       cfa_reg = stack_pointer_rtx;
2044     }
2045
2046   aarch64_save_or_restore_callee_save_registers
2047     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2050   if (offset > 0)
2051     {
2052       if (frame_pointer_needed)
2053         {
2054           rtx mem_fp, mem_lr;
2055
2056           if (fp_offset)
2057             {
2058               mem_fp = gen_frame_mem (DImode,
2059                                       plus_constant (Pmode,
2060                                                      stack_pointer_rtx,
2061                                                      fp_offset));
2062               mem_lr = gen_frame_mem (DImode,
2063                                       plus_constant (Pmode,
2064                                                      stack_pointer_rtx,
2065                                                      fp_offset
2066                                                      + UNITS_PER_WORD));
2067               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068                                                  mem_fp,
2069                                                  gen_rtx_REG (DImode,
2070                                                               LR_REGNUM),
2071                                                  mem_lr));
2072             }
2073           else
2074             {
2075               insn = emit_insn (gen_loadwb_pairdi_di
2076                                 (stack_pointer_rtx,
2077                                  stack_pointer_rtx,
2078                                  hard_frame_pointer_rtx,
2079                                  gen_rtx_REG (DImode, LR_REGNUM),
2080                                  GEN_INT (offset),
2081                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085                                           plus_constant (Pmode, cfa_reg,
2086                                                          offset))));
2087             }
2088
2089           /* The first part of a frame-related parallel insn
2090              is always assumed to be relevant to the frame
2091              calculations; subsequent parts, are only
2092              frame-related if explicitly marked.  */
2093           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094           RTX_FRAME_RELATED_P (insn) = 1;
2095           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096           add_reg_note (insn, REG_CFA_RESTORE,
2097                         gen_rtx_REG (DImode, LR_REGNUM));
2098
2099           if (fp_offset)
2100             {
2101               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102                                                GEN_INT (offset)));
2103               RTX_FRAME_RELATED_P (insn) = 1;
2104             }
2105         }
2106       else
2107         {
2108           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109                                            GEN_INT (offset)));
2110           RTX_FRAME_RELATED_P (insn) = 1;
2111         }
2112     }
2113
2114   /* Stack adjustment for exception handler.  */
2115   if (crtl->calls_eh_return)
2116     {
2117       /* We need to unwind the stack by the offset computed by
2118          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2119          based on SP.  Ideally we would update the SP and define the
2120          CFA along the lines of:
2121
2122          SP = SP + EH_RETURN_STACKADJ_RTX
2123          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125          However the dwarf emitter only understands a constant
2126          register offset.
2127
2128          The solution choosen here is to use the otherwise unused IP0
2129          as a temporary register to hold the current SP value.  The
2130          CFA is described using IP0 then SP is modified.  */
2131
2132       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134       insn = emit_move_insn (ip0, stack_pointer_rtx);
2135       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136       RTX_FRAME_RELATED_P (insn) = 1;
2137
2138       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140       /* Ensure the assignment to IP0 does not get optimized away.  */
2141       emit_use (ip0);
2142     }
2143
2144   if (frame_size > -1)
2145     {
2146       if (frame_size >= 0x1000000)
2147         {
2148           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149           emit_move_insn (op0, GEN_INT (frame_size));
2150           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151           aarch64_set_frame_expr (gen_rtx_SET
2152                                   (Pmode, stack_pointer_rtx,
2153                                    gen_rtx_PLUS (Pmode,
2154                                                  stack_pointer_rtx,
2155                                                  GEN_INT (frame_size))));
2156         }
2157       else if (frame_size > 0)
2158         {
2159           if ((frame_size & 0xfff) != 0)
2160             {
2161               insn = emit_insn (gen_add2_insn
2162                                 (stack_pointer_rtx,
2163                                  GEN_INT ((frame_size
2164                                            & (HOST_WIDE_INT) 0xfff))));
2165               RTX_FRAME_RELATED_P (insn) = 1;
2166             }
2167           if ((frame_size & 0xfff) != frame_size)
2168             {
2169               insn = emit_insn (gen_add2_insn
2170                                 (stack_pointer_rtx,
2171                                  GEN_INT ((frame_size
2172                                            & ~ (HOST_WIDE_INT) 0xfff))));
2173               RTX_FRAME_RELATED_P (insn) = 1;
2174             }
2175         }
2176
2177       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178                                            gen_rtx_PLUS (Pmode,
2179                                                          stack_pointer_rtx,
2180                                                          GEN_INT (offset))));
2181     }
2182
2183   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184   if (!for_sibcall)
2185     emit_jump_insn (ret_rtx);
2186 }
2187
2188 /* Return the place to copy the exception unwinding return address to.
2189    This will probably be a stack slot, but could (in theory be the
2190    return register).  */
2191 rtx
2192 aarch64_final_eh_return_addr (void)
2193 {
2194   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195   aarch64_layout_frame ();
2196   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198                 + crtl->outgoing_args_size);
2199   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200                                           STACK_BOUNDARY / BITS_PER_UNIT);
2201   fp_offset = offset
2202     - original_frame_size
2203     - cfun->machine->frame.saved_regs_size;
2204
2205   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206     return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2209      result in a store to save LR introduced by builtin_eh_return () being
2210      incorrectly deleted because the alias is not detected.
2211      So in the calculation of the address to copy the exception unwinding
2212      return address to, we note 2 cases.
2213      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214      we return a SP-relative location since all the addresses are SP-relative
2215      in this case.  This prevents the store from being optimized away.
2216      If the fp_offset is not 0, then the addresses will be FP-relative and
2217      therefore we return a FP-relative location.  */
2218
2219   if (frame_pointer_needed)
2220     {
2221       if (fp_offset)
2222         return gen_frame_mem (DImode,
2223                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224       else
2225         return gen_frame_mem (DImode,
2226                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227     }
2228
2229   /* If FP is not needed, we calculate the location of LR, which would be
2230      at the top of the saved registers block.  */
2231
2232   return gen_frame_mem (DImode,
2233                         plus_constant (Pmode,
2234                                        stack_pointer_rtx,
2235                                        fp_offset
2236                                        + cfun->machine->frame.saved_regs_size
2237                                        - 2 * UNITS_PER_WORD));
2238 }
2239
2240 /* Output code to build up a constant in a register.  */
2241 static void
2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2243 {
2244   if (aarch64_bitmask_imm (val, DImode))
2245     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246   else
2247     {
2248       int i;
2249       int ncount = 0;
2250       int zcount = 0;
2251       HOST_WIDE_INT valp = val >> 16;
2252       HOST_WIDE_INT valm;
2253       HOST_WIDE_INT tval;
2254
2255       for (i = 16; i < 64; i += 16)
2256         {
2257           valm = (valp & 0xffff);
2258
2259           if (valm != 0)
2260             ++ zcount;
2261
2262           if (valm != 0xffff)
2263             ++ ncount;
2264
2265           valp >>= 16;
2266         }
2267
2268       /* zcount contains the number of additional MOVK instructions
2269          required if the constant is built up with an initial MOVZ instruction,
2270          while ncount is the number of MOVK instructions required if starting
2271          with a MOVN instruction.  Choose the sequence that yields the fewest
2272          number of instructions, preferring MOVZ instructions when they are both
2273          the same.  */
2274       if (ncount < zcount)
2275         {
2276           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277                           GEN_INT ((~val) & 0xffff));
2278           tval = 0xffff;
2279         }
2280       else
2281         {
2282           emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283                           GEN_INT (val & 0xffff));
2284           tval = 0;
2285         }
2286
2287       val >>= 16;
2288
2289       for (i = 16; i < 64; i += 16)
2290         {
2291           if ((val & 0xffff) != tval)
2292             emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293                                        GEN_INT (i), GEN_INT (val & 0xffff)));
2294           val >>= 16;
2295         }
2296     }
2297 }
2298
2299 static void
2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2301 {
2302   HOST_WIDE_INT mdelta = delta;
2303   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2305
2306   if (mdelta < 0)
2307     mdelta = -mdelta;
2308
2309   if (mdelta >= 4096 * 4096)
2310     {
2311       aarch64_build_constant (scratchreg, delta);
2312       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2313     }
2314   else if (mdelta > 0)
2315     {
2316       if (mdelta >= 4096)
2317         {
2318           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320           if (delta < 0)
2321             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323           else
2324             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326         }
2327       if (mdelta % 4096 != 0)
2328         {
2329           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332         }
2333     }
2334 }
2335
2336 /* Output code to add DELTA to the first argument, and then jump
2337    to FUNCTION.  Used for C++ multiple inheritance.  */
2338 static void
2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340                          HOST_WIDE_INT delta,
2341                          HOST_WIDE_INT vcall_offset,
2342                          tree function)
2343 {
2344   /* The this pointer is always in x0.  Note that this differs from
2345      Arm where the this pointer maybe bumped to r1 if r0 is required
2346      to return a pointer to an aggregate.  On AArch64 a result value
2347      pointer will be in x8.  */
2348   int this_regno = R0_REGNUM;
2349   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2350
2351   reload_completed = 1;
2352   emit_note (NOTE_INSN_PROLOGUE_END);
2353
2354   if (vcall_offset == 0)
2355     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356   else
2357     {
2358       gcc_assert ((vcall_offset & 0x7) == 0);
2359
2360       this_rtx = gen_rtx_REG (Pmode, this_regno);
2361       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2363
2364       addr = this_rtx;
2365       if (delta != 0)
2366         {
2367           if (delta >= -256 && delta < 256)
2368             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369                                        plus_constant (Pmode, this_rtx, delta));
2370           else
2371             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2372         }
2373
2374       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
2376       if (vcall_offset >= -256 && vcall_offset < 32768)
2377           addr = plus_constant (Pmode, temp0, vcall_offset);
2378       else
2379         {
2380           aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2382         }
2383
2384       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385       emit_insn (gen_add2_insn (this_rtx, temp1));
2386     }
2387
2388   /* Generate a tail call to the target function.  */
2389   if (!TREE_USED (function))
2390     {
2391       assemble_external (function);
2392       TREE_USED (function) = 1;
2393     }
2394   funexp = XEXP (DECL_RTL (function), 0);
2395   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397   SIBLING_CALL_P (insn) = 1;
2398
2399   insn = get_insns ();
2400   shorten_branches (insn);
2401   final_start_function (insn, file, 1);
2402   final (insn, file, 1);
2403   final_end_function ();
2404
2405   /* Stop pretending to be a post-reload pass.  */
2406   reload_completed = 0;
2407 }
2408
2409 static int
2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411 {
2412   if (GET_CODE (*x) == SYMBOL_REF)
2413     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416      TLS offsets, not real symbol references.  */
2417   if (GET_CODE (*x) == UNSPEC
2418       && XINT (*x, 1) == UNSPEC_TLS)
2419     return -1;
2420
2421   return 0;
2422 }
2423
2424 static bool
2425 aarch64_tls_referenced_p (rtx x)
2426 {
2427   if (!TARGET_HAVE_TLS)
2428     return false;
2429
2430   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431 }
2432
2433
2434 static int
2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436 {
2437   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440   if (*imm1 < *imm2)
2441     return -1;
2442   if (*imm1 > *imm2)
2443     return +1;
2444   return 0;
2445 }
2446
2447
2448 static void
2449 aarch64_build_bitmask_table (void)
2450 {
2451   unsigned HOST_WIDE_INT mask, imm;
2452   unsigned int log_e, e, s, r;
2453   unsigned int nimms = 0;
2454
2455   for (log_e = 1; log_e <= 6; log_e++)
2456     {
2457       e = 1 << log_e;
2458       if (e == 64)
2459         mask = ~(HOST_WIDE_INT) 0;
2460       else
2461         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462       for (s = 1; s < e; s++)
2463         {
2464           for (r = 0; r < e; r++)
2465             {
2466               /* set s consecutive bits to 1 (s < 64) */
2467               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468               /* rotate right by r */
2469               if (r != 0)
2470                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471               /* replicate the constant depending on SIMD size */
2472               switch (log_e) {
2473               case 1: imm |= (imm <<  2);
2474               case 2: imm |= (imm <<  4);
2475               case 3: imm |= (imm <<  8);
2476               case 4: imm |= (imm << 16);
2477               case 5: imm |= (imm << 32);
2478               case 6:
2479                 break;
2480               default:
2481                 gcc_unreachable ();
2482               }
2483               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484               aarch64_bitmasks[nimms++] = imm;
2485             }
2486         }
2487     }
2488
2489   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491          aarch64_bitmasks_cmp);
2492 }
2493
2494
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496    a left shift of 0 or 12 bits.  */
2497 bool
2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2499 {
2500   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502           );
2503 }
2504
2505
2506 /* Return true if val is an immediate that can be loaded into a
2507    register by a MOVZ instruction.  */
2508 static bool
2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510 {
2511   if (GET_MODE_SIZE (mode) > 4)
2512     {
2513       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515         return 1;
2516     }
2517   else
2518     {
2519       /* Ignore sign extension.  */
2520       val &= (HOST_WIDE_INT) 0xffffffff;
2521     }
2522   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524 }
2525
2526
2527 /* Return true if val is a valid bitmask immediate.  */
2528 bool
2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530 {
2531   if (GET_MODE_SIZE (mode) < 8)
2532     {
2533       /* Replicate bit pattern.  */
2534       val &= (HOST_WIDE_INT) 0xffffffff;
2535       val |= val << 32;
2536     }
2537   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539 }
2540
2541
2542 /* Return true if val is an immediate that can be loaded into a
2543    register in a single instruction.  */
2544 bool
2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546 {
2547   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548     return 1;
2549   return aarch64_bitmask_imm (val, mode);
2550 }
2551
2552 static bool
2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554 {
2555   rtx base, offset;
2556   if (GET_CODE (x) == HIGH)
2557     return true;
2558
2559   split_const (x, &base, &offset);
2560   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563   return aarch64_tls_referenced_p (x);
2564 }
2565
2566 /* Return true if register REGNO is a valid index register.
2567    STRICT_P is true if REG_OK_STRICT is in effect.  */
2568
2569 bool
2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571 {
2572   if (!HARD_REGISTER_NUM_P (regno))
2573     {
2574       if (!strict_p)
2575         return true;
2576
2577       if (!reg_renumber)
2578         return false;
2579
2580       regno = reg_renumber[regno];
2581     }
2582   return GP_REGNUM_P (regno);
2583 }
2584
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586    STRICT_P is true if REG_OK_STRICT is in effect.  */
2587
2588 bool
2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590 {
2591   if (!HARD_REGISTER_NUM_P (regno))
2592     {
2593       if (!strict_p)
2594         return true;
2595
2596       if (!reg_renumber)
2597         return false;
2598
2599       regno = reg_renumber[regno];
2600     }
2601
2602   /* The fake registers will be eliminated to either the stack or
2603      hard frame pointer, both of which are usually valid base registers.
2604      Reload deals with the cases where the eliminated form isn't valid.  */
2605   return (GP_REGNUM_P (regno)
2606           || regno == SP_REGNUM
2607           || regno == FRAME_POINTER_REGNUM
2608           || regno == ARG_POINTER_REGNUM);
2609 }
2610
2611 /* Return true if X is a valid base register for mode MODE.
2612    STRICT_P is true if REG_OK_STRICT is in effect.  */
2613
2614 static bool
2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616 {
2617   if (!strict_p && GET_CODE (x) == SUBREG)
2618     x = SUBREG_REG (x);
2619
2620   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621 }
2622
2623 /* Return true if address offset is a valid index.  If it is, fill in INFO
2624    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2625
2626 static bool
2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628                         enum machine_mode mode, bool strict_p)
2629 {
2630   enum aarch64_address_type type;
2631   rtx index;
2632   int shift;
2633
2634   /* (reg:P) */
2635   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636       && GET_MODE (x) == Pmode)
2637     {
2638       type = ADDRESS_REG_REG;
2639       index = x;
2640       shift = 0;
2641     }
2642   /* (sign_extend:DI (reg:SI)) */
2643   else if ((GET_CODE (x) == SIGN_EXTEND
2644             || GET_CODE (x) == ZERO_EXTEND)
2645            && GET_MODE (x) == DImode
2646            && GET_MODE (XEXP (x, 0)) == SImode)
2647     {
2648       type = (GET_CODE (x) == SIGN_EXTEND)
2649         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650       index = XEXP (x, 0);
2651       shift = 0;
2652     }
2653   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654   else if (GET_CODE (x) == MULT
2655            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657            && GET_MODE (XEXP (x, 0)) == DImode
2658            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659            && CONST_INT_P (XEXP (x, 1)))
2660     {
2661       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663       index = XEXP (XEXP (x, 0), 0);
2664       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665     }
2666   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667   else if (GET_CODE (x) == ASHIFT
2668            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670            && GET_MODE (XEXP (x, 0)) == DImode
2671            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672            && CONST_INT_P (XEXP (x, 1)))
2673     {
2674       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676       index = XEXP (XEXP (x, 0), 0);
2677       shift = INTVAL (XEXP (x, 1));
2678     }
2679   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680   else if ((GET_CODE (x) == SIGN_EXTRACT
2681             || GET_CODE (x) == ZERO_EXTRACT)
2682            && GET_MODE (x) == DImode
2683            && GET_CODE (XEXP (x, 0)) == MULT
2684            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686     {
2687       type = (GET_CODE (x) == SIGN_EXTRACT)
2688         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689       index = XEXP (XEXP (x, 0), 0);
2690       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691       if (INTVAL (XEXP (x, 1)) != 32 + shift
2692           || INTVAL (XEXP (x, 2)) != 0)
2693         shift = -1;
2694     }
2695   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696      (const_int 0xffffffff<<shift)) */
2697   else if (GET_CODE (x) == AND
2698            && GET_MODE (x) == DImode
2699            && GET_CODE (XEXP (x, 0)) == MULT
2700            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702            && CONST_INT_P (XEXP (x, 1)))
2703     {
2704       type = ADDRESS_REG_UXTW;
2705       index = XEXP (XEXP (x, 0), 0);
2706       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708         shift = -1;
2709     }
2710   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711   else if ((GET_CODE (x) == SIGN_EXTRACT
2712             || GET_CODE (x) == ZERO_EXTRACT)
2713            && GET_MODE (x) == DImode
2714            && GET_CODE (XEXP (x, 0)) == ASHIFT
2715            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717     {
2718       type = (GET_CODE (x) == SIGN_EXTRACT)
2719         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720       index = XEXP (XEXP (x, 0), 0);
2721       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722       if (INTVAL (XEXP (x, 1)) != 32 + shift
2723           || INTVAL (XEXP (x, 2)) != 0)
2724         shift = -1;
2725     }
2726   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727      (const_int 0xffffffff<<shift)) */
2728   else if (GET_CODE (x) == AND
2729            && GET_MODE (x) == DImode
2730            && GET_CODE (XEXP (x, 0)) == ASHIFT
2731            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733            && CONST_INT_P (XEXP (x, 1)))
2734     {
2735       type = ADDRESS_REG_UXTW;
2736       index = XEXP (XEXP (x, 0), 0);
2737       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739         shift = -1;
2740     }
2741   /* (mult:P (reg:P) (const_int scale)) */
2742   else if (GET_CODE (x) == MULT
2743            && GET_MODE (x) == Pmode
2744            && GET_MODE (XEXP (x, 0)) == Pmode
2745            && CONST_INT_P (XEXP (x, 1)))
2746     {
2747       type = ADDRESS_REG_REG;
2748       index = XEXP (x, 0);
2749       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750     }
2751   /* (ashift:P (reg:P) (const_int shift)) */
2752   else if (GET_CODE (x) == ASHIFT
2753            && GET_MODE (x) == Pmode
2754            && GET_MODE (XEXP (x, 0)) == Pmode
2755            && CONST_INT_P (XEXP (x, 1)))
2756     {
2757       type = ADDRESS_REG_REG;
2758       index = XEXP (x, 0);
2759       shift = INTVAL (XEXP (x, 1));
2760     }
2761   else
2762     return false;
2763
2764   if (GET_CODE (index) == SUBREG)
2765     index = SUBREG_REG (index);
2766
2767   if ((shift == 0 ||
2768        (shift > 0 && shift <= 3
2769         && (1 << shift) == GET_MODE_SIZE (mode)))
2770       && REG_P (index)
2771       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772     {
2773       info->type = type;
2774       info->offset = index;
2775       info->shift = shift;
2776       return true;
2777     }
2778
2779   return false;
2780 }
2781
2782 static inline bool
2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784 {
2785   return (offset >= -64 * GET_MODE_SIZE (mode)
2786           && offset < 64 * GET_MODE_SIZE (mode)
2787           && offset % GET_MODE_SIZE (mode) == 0);
2788 }
2789
2790 static inline bool
2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792                                HOST_WIDE_INT offset)
2793 {
2794   return offset >= -256 && offset < 256;
2795 }
2796
2797 static inline bool
2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799 {
2800   return (offset >= 0
2801           && offset < 4096 * GET_MODE_SIZE (mode)
2802           && offset % GET_MODE_SIZE (mode) == 0);
2803 }
2804
2805 /* Return true if X is a valid address for machine mode MODE.  If it is,
2806    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2807    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2808
2809 static bool
2810 aarch64_classify_address (struct aarch64_address_info *info,
2811                           rtx x, enum machine_mode mode,
2812                           RTX_CODE outer_code, bool strict_p)
2813 {
2814   enum rtx_code code = GET_CODE (x);
2815   rtx op0, op1;
2816   bool allow_reg_index_p =
2817     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819   /* Don't support anything other than POST_INC or REG addressing for
2820      AdvSIMD.  */
2821   if (aarch64_vector_mode_p (mode)
2822       && (code != POST_INC && code != REG))
2823     return false;
2824
2825   switch (code)
2826     {
2827     case REG:
2828     case SUBREG:
2829       info->type = ADDRESS_REG_IMM;
2830       info->base = x;
2831       info->offset = const0_rtx;
2832       return aarch64_base_register_rtx_p (x, strict_p);
2833
2834     case PLUS:
2835       op0 = XEXP (x, 0);
2836       op1 = XEXP (x, 1);
2837       if (GET_MODE_SIZE (mode) != 0
2838           && CONST_INT_P (op1)
2839           && aarch64_base_register_rtx_p (op0, strict_p))
2840         {
2841           HOST_WIDE_INT offset = INTVAL (op1);
2842
2843           info->type = ADDRESS_REG_IMM;
2844           info->base = op0;
2845           info->offset = op1;
2846
2847           /* TImode and TFmode values are allowed in both pairs of X
2848              registers and individual Q registers.  The available
2849              address modes are:
2850              X,X: 7-bit signed scaled offset
2851              Q:   9-bit signed offset
2852              We conservatively require an offset representable in either mode.
2853            */
2854           if (mode == TImode || mode == TFmode)
2855             return (offset_7bit_signed_scaled_p (mode, offset)
2856                     && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858           if (outer_code == PARALLEL)
2859             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860                     && offset_7bit_signed_scaled_p (mode, offset));
2861           else
2862             return (offset_9bit_signed_unscaled_p (mode, offset)
2863                     || offset_12bit_unsigned_scaled_p (mode, offset));
2864         }
2865
2866       if (allow_reg_index_p)
2867         {
2868           /* Look for base + (scaled/extended) index register.  */
2869           if (aarch64_base_register_rtx_p (op0, strict_p)
2870               && aarch64_classify_index (info, op1, mode, strict_p))
2871             {
2872               info->base = op0;
2873               return true;
2874             }
2875           if (aarch64_base_register_rtx_p (op1, strict_p)
2876               && aarch64_classify_index (info, op0, mode, strict_p))
2877             {
2878               info->base = op1;
2879               return true;
2880             }
2881         }
2882
2883       return false;
2884
2885     case POST_INC:
2886     case POST_DEC:
2887     case PRE_INC:
2888     case PRE_DEC:
2889       info->type = ADDRESS_REG_WB;
2890       info->base = XEXP (x, 0);
2891       info->offset = NULL_RTX;
2892       return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894     case POST_MODIFY:
2895     case PRE_MODIFY:
2896       info->type = ADDRESS_REG_WB;
2897       info->base = XEXP (x, 0);
2898       if (GET_CODE (XEXP (x, 1)) == PLUS
2899           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901           && aarch64_base_register_rtx_p (info->base, strict_p))
2902         {
2903           HOST_WIDE_INT offset;
2904           info->offset = XEXP (XEXP (x, 1), 1);
2905           offset = INTVAL (info->offset);
2906
2907           /* TImode and TFmode values are allowed in both pairs of X
2908              registers and individual Q registers.  The available
2909              address modes are:
2910              X,X: 7-bit signed scaled offset
2911              Q:   9-bit signed offset
2912              We conservatively require an offset representable in either mode.
2913            */
2914           if (mode == TImode || mode == TFmode)
2915             return (offset_7bit_signed_scaled_p (mode, offset)
2916                     && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918           if (outer_code == PARALLEL)
2919             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920                     && offset_7bit_signed_scaled_p (mode, offset));
2921           else
2922             return offset_9bit_signed_unscaled_p (mode, offset);
2923         }
2924       return false;
2925
2926     case CONST:
2927     case SYMBOL_REF:
2928     case LABEL_REF:
2929       /* load literal: pc-relative constant pool entry.  */
2930       info->type = ADDRESS_SYMBOLIC;
2931       if (outer_code != PARALLEL)
2932         {
2933           rtx sym, addend;
2934
2935           split_const (x, &sym, &addend);
2936           return (GET_CODE (sym) == LABEL_REF
2937                   || (GET_CODE (sym) == SYMBOL_REF
2938                       && CONSTANT_POOL_ADDRESS_P (sym)));
2939         }
2940       return false;
2941
2942     case LO_SUM:
2943       info->type = ADDRESS_LO_SUM;
2944       info->base = XEXP (x, 0);
2945       info->offset = XEXP (x, 1);
2946       if (allow_reg_index_p
2947           && aarch64_base_register_rtx_p (info->base, strict_p))
2948         {
2949           rtx sym, offs;
2950           split_const (info->offset, &sym, &offs);
2951           if (GET_CODE (sym) == SYMBOL_REF
2952               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2953                   == SYMBOL_SMALL_ABSOLUTE))
2954             {
2955               /* The symbol and offset must be aligned to the access size.  */
2956               unsigned int align;
2957               unsigned int ref_size;
2958
2959               if (CONSTANT_POOL_ADDRESS_P (sym))
2960                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2961               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2962                 {
2963                   tree exp = SYMBOL_REF_DECL (sym);
2964                   align = TYPE_ALIGN (TREE_TYPE (exp));
2965                   align = CONSTANT_ALIGNMENT (exp, align);
2966                 }
2967               else if (SYMBOL_REF_DECL (sym))
2968                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2969               else
2970                 align = BITS_PER_UNIT;
2971
2972               ref_size = GET_MODE_SIZE (mode);
2973               if (ref_size == 0)
2974                 ref_size = GET_MODE_SIZE (DImode);
2975
2976               return ((INTVAL (offs) & (ref_size - 1)) == 0
2977                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2978             }
2979         }
2980       return false;
2981
2982     default:
2983       return false;
2984     }
2985 }
2986
2987 bool
2988 aarch64_symbolic_address_p (rtx x)
2989 {
2990   rtx offset;
2991
2992   split_const (x, &x, &offset);
2993   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2994 }
2995
2996 /* Classify the base of symbolic expression X, given that X appears in
2997    context CONTEXT.  */
2998 static enum aarch64_symbol_type
2999 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3000 {
3001   rtx offset;
3002   split_const (x, &x, &offset);
3003   return aarch64_classify_symbol (x, context);
3004 }
3005
3006
3007 /* Return TRUE if X is a legitimate address for accessing memory in
3008    mode MODE.  */
3009 static bool
3010 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3011 {
3012   struct aarch64_address_info addr;
3013
3014   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3015 }
3016
3017 /* Return TRUE if X is a legitimate address for accessing memory in
3018    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3019    pair operation.  */
3020 bool
3021 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3022                            RTX_CODE outer_code, bool strict_p)
3023 {
3024   struct aarch64_address_info addr;
3025
3026   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3027 }
3028
3029 /* Return TRUE if rtx X is immediate constant 0.0 */
3030 bool
3031 aarch64_float_const_zero_rtx_p (rtx x)
3032 {
3033   REAL_VALUE_TYPE r;
3034
3035   if (GET_MODE (x) == VOIDmode)
3036     return false;
3037
3038   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3039   if (REAL_VALUE_MINUS_ZERO (r))
3040     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3041   return REAL_VALUES_EQUAL (r, dconst0);
3042 }
3043
3044 /* Return the fixed registers used for condition codes.  */
3045
3046 static bool
3047 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3048 {
3049   *p1 = CC_REGNUM;
3050   *p2 = INVALID_REGNUM;
3051   return true;
3052 }
3053
3054 enum machine_mode
3055 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3056 {
3057   /* All floating point compares return CCFP if it is an equality
3058      comparison, and CCFPE otherwise.  */
3059   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3060     {
3061       switch (code)
3062         {
3063         case EQ:
3064         case NE:
3065         case UNORDERED:
3066         case ORDERED:
3067         case UNLT:
3068         case UNLE:
3069         case UNGT:
3070         case UNGE:
3071         case UNEQ:
3072         case LTGT:
3073           return CCFPmode;
3074
3075         case LT:
3076         case LE:
3077         case GT:
3078         case GE:
3079           return CCFPEmode;
3080
3081         default:
3082           gcc_unreachable ();
3083         }
3084     }
3085
3086   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3087       && y == const0_rtx
3088       && (code == EQ || code == NE || code == LT || code == GE)
3089       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3090     return CC_NZmode;
3091
3092   /* A compare with a shifted operand.  Because of canonicalization,
3093      the comparison will have to be swapped when we emit the assembly
3094      code.  */
3095   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3096       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3097       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3098           || GET_CODE (x) == LSHIFTRT
3099           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3100     return CC_SWPmode;
3101
3102   /* A compare of a mode narrower than SI mode against zero can be done
3103      by extending the value in the comparison.  */
3104   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3105       && y == const0_rtx)
3106     /* Only use sign-extension if we really need it.  */
3107     return ((code == GT || code == GE || code == LE || code == LT)
3108             ? CC_SESWPmode : CC_ZESWPmode);
3109
3110   /* For everything else, return CCmode.  */
3111   return CCmode;
3112 }
3113
3114 static unsigned
3115 aarch64_get_condition_code (rtx x)
3116 {
3117   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3118   enum rtx_code comp_code = GET_CODE (x);
3119
3120   if (GET_MODE_CLASS (mode) != MODE_CC)
3121     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3122
3123   switch (mode)
3124     {
3125     case CCFPmode:
3126     case CCFPEmode:
3127       switch (comp_code)
3128         {
3129         case GE: return AARCH64_GE;
3130         case GT: return AARCH64_GT;
3131         case LE: return AARCH64_LS;
3132         case LT: return AARCH64_MI;
3133         case NE: return AARCH64_NE;
3134         case EQ: return AARCH64_EQ;
3135         case ORDERED: return AARCH64_VC;
3136         case UNORDERED: return AARCH64_VS;
3137         case UNLT: return AARCH64_LT;
3138         case UNLE: return AARCH64_LE;
3139         case UNGT: return AARCH64_HI;
3140         case UNGE: return AARCH64_PL;
3141         default: gcc_unreachable ();
3142         }
3143       break;
3144
3145     case CCmode:
3146       switch (comp_code)
3147         {
3148         case NE: return AARCH64_NE;
3149         case EQ: return AARCH64_EQ;
3150         case GE: return AARCH64_GE;
3151         case GT: return AARCH64_GT;
3152         case LE: return AARCH64_LE;
3153         case LT: return AARCH64_LT;
3154         case GEU: return AARCH64_CS;
3155         case GTU: return AARCH64_HI;
3156         case LEU: return AARCH64_LS;
3157         case LTU: return AARCH64_CC;
3158         default: gcc_unreachable ();
3159         }
3160       break;
3161
3162     case CC_SWPmode:
3163     case CC_ZESWPmode:
3164     case CC_SESWPmode:
3165       switch (comp_code)
3166         {
3167         case NE: return AARCH64_NE;
3168         case EQ: return AARCH64_EQ;
3169         case GE: return AARCH64_LE;
3170         case GT: return AARCH64_LT;
3171         case LE: return AARCH64_GE;
3172         case LT: return AARCH64_GT;
3173         case GEU: return AARCH64_LS;
3174         case GTU: return AARCH64_CC;
3175         case LEU: return AARCH64_CS;
3176         case LTU: return AARCH64_HI;
3177         default: gcc_unreachable ();
3178         }
3179       break;
3180
3181     case CC_NZmode:
3182       switch (comp_code)
3183         {
3184         case NE: return AARCH64_NE;
3185         case EQ: return AARCH64_EQ;
3186         case GE: return AARCH64_PL;
3187         case LT: return AARCH64_MI;
3188         default: gcc_unreachable ();
3189         }
3190       break;
3191
3192     default:
3193       gcc_unreachable ();
3194       break;
3195     }
3196 }
3197
3198 static unsigned
3199 bit_count (unsigned HOST_WIDE_INT value)
3200 {
3201   unsigned count = 0;
3202
3203   while (value)
3204     {
3205       count++;
3206       value &= value - 1;
3207     }
3208
3209   return count;
3210 }
3211
3212 void
3213 aarch64_print_operand (FILE *f, rtx x, char code)
3214 {
3215   switch (code)
3216     {
3217     case 'e':
3218       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3219       {
3220         int n;
3221
3222         if (GET_CODE (x) != CONST_INT
3223             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3224           {
3225             output_operand_lossage ("invalid operand for '%%%c'", code);
3226             return;
3227           }
3228
3229         switch (n)
3230           {
3231           case 3:
3232             fputc ('b', f);
3233             break;
3234           case 4:
3235             fputc ('h', f);
3236             break;
3237           case 5:
3238             fputc ('w', f);
3239             break;
3240           default:
3241             output_operand_lossage ("invalid operand for '%%%c'", code);
3242             return;
3243           }
3244       }
3245       break;
3246
3247     case 'p':
3248       {
3249         int n;
3250
3251         /* Print N such that 2^N == X.  */
3252         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3253           {
3254             output_operand_lossage ("invalid operand for '%%%c'", code);
3255             return;
3256           }
3257
3258         asm_fprintf (f, "%d", n);
3259       }
3260       break;
3261
3262     case 'P':
3263       /* Print the number of non-zero bits in X (a const_int).  */
3264       if (GET_CODE (x) != CONST_INT)
3265         {
3266           output_operand_lossage ("invalid operand for '%%%c'", code);
3267           return;
3268         }
3269
3270       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3271       break;
3272
3273     case 'H':
3274       /* Print the higher numbered register of a pair (TImode) of regs.  */
3275       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3276         {
3277           output_operand_lossage ("invalid operand for '%%%c'", code);
3278           return;
3279         }
3280
3281       asm_fprintf (f, "%r", REGNO (x) + 1);
3282       break;
3283
3284     case 'Q':
3285       /* Print the least significant register of a pair (TImode) of regs.  */
3286       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3287         {
3288           output_operand_lossage ("invalid operand for '%%%c'", code);
3289           return;
3290         }
3291       asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
3292       break;
3293
3294     case 'R':
3295       /* Print the most significant register of a pair (TImode) of regs.  */
3296       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3297         {
3298           output_operand_lossage ("invalid operand for '%%%c'", code);
3299           return;
3300         }
3301       asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
3302       break;
3303
3304     case 'm':
3305       /* Print a condition (eq, ne, etc).  */
3306
3307       /* CONST_TRUE_RTX means always -- that's the default.  */
3308       if (x == const_true_rtx)
3309         return;
3310
3311       if (!COMPARISON_P (x))
3312         {
3313           output_operand_lossage ("invalid operand for '%%%c'", code);
3314           return;
3315         }
3316
3317       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3318       break;
3319
3320     case 'M':
3321       /* Print the inverse of a condition (eq <-> ne, etc).  */
3322
3323       /* CONST_TRUE_RTX means never -- that's the default.  */
3324       if (x == const_true_rtx)
3325         {
3326           fputs ("nv", f);
3327           return;
3328         }
3329
3330       if (!COMPARISON_P (x))
3331         {
3332           output_operand_lossage ("invalid operand for '%%%c'", code);
3333           return;
3334         }
3335
3336       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3337                                   (aarch64_get_condition_code (x))], f);
3338       break;
3339
3340     case 'b':
3341     case 'h':
3342     case 's':
3343     case 'd':
3344     case 'q':
3345       /* Print a scalar FP/SIMD register name.  */
3346       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3347         {
3348           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3349           return;
3350         }
3351       asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3352       break;
3353
3354     case 'S':
3355     case 'T':
3356     case 'U':
3357     case 'V':
3358       /* Print the first FP/SIMD register name in a list.  */
3359       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3360         {
3361           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3362           return;
3363         }
3364       asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3365                                REGNO (x) - V0_REGNUM + (code - 'S'));
3366       break;
3367
3368     case 'w':
3369     case 'x':
3370       /* Print a general register name or the zero register (32-bit or
3371          64-bit).  */
3372       if (x == const0_rtx
3373           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3374         {
3375           asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3376           break;
3377         }
3378
3379       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3380         {
3381           asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3382                        REGNO (x) - R0_REGNUM);
3383           break;
3384         }
3385
3386       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3387         {
3388           asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3389           break;
3390         }
3391
3392       /* Fall through */
3393
3394     case 0:
3395       /* Print a normal operand, if it's a general register, then we
3396          assume DImode.  */
3397       if (x == NULL)
3398         {
3399           output_operand_lossage ("missing operand");
3400           return;
3401         }
3402
3403       switch (GET_CODE (x))
3404         {
3405         case REG:
3406           asm_fprintf (f, "%r", REGNO (x));
3407           break;
3408
3409         case MEM:
3410           aarch64_memory_reference_mode = GET_MODE (x);
3411           output_address (XEXP (x, 0));
3412           break;
3413
3414         case LABEL_REF:
3415         case SYMBOL_REF:
3416           output_addr_const (asm_out_file, x);
3417           break;
3418
3419         case CONST_INT:
3420           asm_fprintf (f, "%wd", INTVAL (x));
3421           break;
3422
3423         case CONST_VECTOR:
3424           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3425             {
3426               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3427                                                             HOST_WIDE_INT_MIN,
3428                                                             HOST_WIDE_INT_MAX));
3429               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3430             }
3431           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3432             {
3433               fputc ('0', f);
3434             }
3435           else
3436             gcc_unreachable ();
3437           break;
3438
3439         case CONST_DOUBLE:
3440           /* CONST_DOUBLE can represent a double-width integer.
3441              In this case, the mode of x is VOIDmode.  */
3442           if (GET_MODE (x) == VOIDmode)
3443             ; /* Do Nothing.  */
3444           else if (aarch64_float_const_zero_rtx_p (x))
3445             {
3446               fputc ('0', f);
3447               break;
3448             }
3449           else if (aarch64_float_const_representable_p (x))
3450             {
3451 #define buf_size 20
3452               char float_buf[buf_size] = {'\0'};
3453               REAL_VALUE_TYPE r;
3454               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455               real_to_decimal_for_mode (float_buf, &r,
3456                                         buf_size, buf_size,
3457                                         1, GET_MODE (x));
3458               asm_fprintf (asm_out_file, "%s", float_buf);
3459               break;
3460 #undef buf_size
3461             }
3462           output_operand_lossage ("invalid constant");
3463           return;
3464         default:
3465           output_operand_lossage ("invalid operand");
3466           return;
3467         }
3468       break;
3469
3470     case 'A':
3471       if (GET_CODE (x) == HIGH)
3472         x = XEXP (x, 0);
3473
3474       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3475         {
3476         case SYMBOL_SMALL_GOT:
3477           asm_fprintf (asm_out_file, ":got:");
3478           break;
3479
3480         case SYMBOL_SMALL_TLSGD:
3481           asm_fprintf (asm_out_file, ":tlsgd:");
3482           break;
3483
3484         case SYMBOL_SMALL_TLSDESC:
3485           asm_fprintf (asm_out_file, ":tlsdesc:");
3486           break;
3487
3488         case SYMBOL_SMALL_GOTTPREL:
3489           asm_fprintf (asm_out_file, ":gottprel:");
3490           break;
3491
3492         case SYMBOL_SMALL_TPREL:
3493           asm_fprintf (asm_out_file, ":tprel:");
3494           break;
3495
3496         default:
3497           break;
3498         }
3499       output_addr_const (asm_out_file, x);
3500       break;
3501
3502     case 'L':
3503       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3504         {
3505         case SYMBOL_SMALL_GOT:
3506           asm_fprintf (asm_out_file, ":lo12:");
3507           break;
3508
3509         case SYMBOL_SMALL_TLSGD:
3510           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3511           break;
3512
3513         case SYMBOL_SMALL_TLSDESC:
3514           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3515           break;
3516
3517         case SYMBOL_SMALL_GOTTPREL:
3518           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3519           break;
3520
3521         case SYMBOL_SMALL_TPREL:
3522           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3523           break;
3524
3525         default:
3526           break;
3527         }
3528       output_addr_const (asm_out_file, x);
3529       break;
3530
3531     case 'G':
3532
3533       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3534         {
3535         case SYMBOL_SMALL_TPREL:
3536           asm_fprintf (asm_out_file, ":tprel_hi12:");
3537           break;
3538         default:
3539           break;
3540         }
3541       output_addr_const (asm_out_file, x);
3542       break;
3543
3544     default:
3545       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3546       return;
3547     }
3548 }
3549
3550 void
3551 aarch64_print_operand_address (FILE *f, rtx x)
3552 {
3553   struct aarch64_address_info addr;
3554
3555   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3556                              MEM, true))
3557     switch (addr.type)
3558       {
3559       case ADDRESS_REG_IMM:
3560         if (addr.offset == const0_rtx)
3561           asm_fprintf (f, "[%r]", REGNO (addr.base));
3562         else
3563           asm_fprintf (f, "[%r,%wd]", REGNO (addr.base),
3564                        INTVAL (addr.offset));
3565         return;
3566
3567       case ADDRESS_REG_REG:
3568         if (addr.shift == 0)
3569           asm_fprintf (f, "[%r,%r]", REGNO (addr.base),
3570                        REGNO (addr.offset));
3571         else
3572           asm_fprintf (f, "[%r,%r,lsl %u]", REGNO (addr.base),
3573                        REGNO (addr.offset), addr.shift);
3574         return;
3575
3576       case ADDRESS_REG_UXTW:
3577         if (addr.shift == 0)
3578           asm_fprintf (f, "[%r,w%d,uxtw]", REGNO (addr.base),
3579                        REGNO (addr.offset) - R0_REGNUM);
3580         else
3581           asm_fprintf (f, "[%r,w%d,uxtw %u]", REGNO (addr.base),
3582                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3583         return;
3584
3585       case ADDRESS_REG_SXTW:
3586         if (addr.shift == 0)
3587           asm_fprintf (f, "[%r,w%d,sxtw]", REGNO (addr.base),
3588                        REGNO (addr.offset) - R0_REGNUM);
3589         else
3590           asm_fprintf (f, "[%r,w%d,sxtw %u]", REGNO (addr.base),
3591                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3592         return;
3593
3594       case ADDRESS_REG_WB:
3595         switch (GET_CODE (x))
3596           {
3597           case PRE_INC:
3598             asm_fprintf (f, "[%r,%d]!", REGNO (addr.base),
3599                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3600             return;
3601           case POST_INC:
3602             asm_fprintf (f, "[%r],%d", REGNO (addr.base),
3603                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3604             return;
3605           case PRE_DEC:
3606             asm_fprintf (f, "[%r,-%d]!", REGNO (addr.base),
3607                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3608             return;
3609           case POST_DEC:
3610             asm_fprintf (f, "[%r],-%d", REGNO (addr.base),
3611                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3612             return;
3613           case PRE_MODIFY:
3614             asm_fprintf (f, "[%r,%wd]!", REGNO (addr.base),
3615                          INTVAL (addr.offset));
3616             return;
3617           case POST_MODIFY:
3618             asm_fprintf (f, "[%r],%wd", REGNO (addr.base),
3619                          INTVAL (addr.offset));
3620             return;
3621           default:
3622             break;
3623           }
3624         break;
3625
3626       case ADDRESS_LO_SUM:
3627         asm_fprintf (f, "[%r,#:lo12:", REGNO (addr.base));
3628         output_addr_const (f, addr.offset);
3629         asm_fprintf (f, "]");
3630         return;
3631
3632       case ADDRESS_SYMBOLIC:
3633         break;
3634       }
3635
3636   output_addr_const (f, x);
3637 }
3638
3639 void
3640 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3641                            int labelno ATTRIBUTE_UNUSED)
3642 {
3643   sorry ("function profiling");
3644 }
3645
3646 bool
3647 aarch64_label_mentioned_p (rtx x)
3648 {
3649   const char *fmt;
3650   int i;
3651
3652   if (GET_CODE (x) == LABEL_REF)
3653     return true;
3654
3655   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3656      referencing instruction, but they are constant offsets, not
3657      symbols.  */
3658   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3659     return false;
3660
3661   fmt = GET_RTX_FORMAT (GET_CODE (x));
3662   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3663     {
3664       if (fmt[i] == 'E')
3665         {
3666           int j;
3667
3668           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3669             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3670               return 1;
3671         }
3672       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3673         return 1;
3674     }
3675
3676   return 0;
3677 }
3678
3679 /* Implement REGNO_REG_CLASS.  */
3680
3681 enum reg_class
3682 aarch64_regno_regclass (unsigned regno)
3683 {
3684   if (GP_REGNUM_P (regno))
3685     return CORE_REGS;
3686
3687   if (regno == SP_REGNUM)
3688     return STACK_REG;
3689
3690   if (regno == FRAME_POINTER_REGNUM
3691       || regno == ARG_POINTER_REGNUM)
3692     return CORE_REGS;
3693
3694   if (FP_REGNUM_P (regno))
3695     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3696
3697   return NO_REGS;
3698 }
3699
3700 /* Try a machine-dependent way of reloading an illegitimate address
3701    operand.  If we find one, push the reload and return the new rtx.  */
3702
3703 rtx
3704 aarch64_legitimize_reload_address (rtx *x_p,
3705                                    enum machine_mode mode,
3706                                    int opnum, int type,
3707                                    int ind_levels ATTRIBUTE_UNUSED)
3708 {
3709   rtx x = *x_p;
3710
3711   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3712   if (aarch64_vector_mode_p (mode)
3713       && GET_CODE (x) == PLUS
3714       && REG_P (XEXP (x, 0))
3715       && CONST_INT_P (XEXP (x, 1)))
3716     {
3717       rtx orig_rtx = x;
3718       x = copy_rtx (x);
3719       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3720                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3721                    opnum, (enum reload_type) type);
3722       return x;
3723     }
3724
3725   /* We must recognize output that we have already generated ourselves.  */
3726   if (GET_CODE (x) == PLUS
3727       && GET_CODE (XEXP (x, 0)) == PLUS
3728       && REG_P (XEXP (XEXP (x, 0), 0))
3729       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3730       && CONST_INT_P (XEXP (x, 1)))
3731     {
3732       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3733                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3734                    opnum, (enum reload_type) type);
3735       return x;
3736     }
3737
3738   /* We wish to handle large displacements off a base register by splitting
3739      the addend across an add and the mem insn.  This can cut the number of
3740      extra insns needed from 3 to 1.  It is only useful for load/store of a
3741      single register with 12 bit offset field.  */
3742   if (GET_CODE (x) == PLUS
3743       && REG_P (XEXP (x, 0))
3744       && CONST_INT_P (XEXP (x, 1))
3745       && HARD_REGISTER_P (XEXP (x, 0))
3746       && mode != TImode
3747       && mode != TFmode
3748       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3749     {
3750       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3751       HOST_WIDE_INT low = val & 0xfff;
3752       HOST_WIDE_INT high = val - low;
3753       HOST_WIDE_INT offs;
3754       rtx cst;
3755
3756       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3757          BLKmode alignment.  */
3758       if (GET_MODE_SIZE (mode) == 0)
3759         return NULL_RTX;
3760
3761       offs = low % GET_MODE_SIZE (mode);
3762
3763       /* Align misaligned offset by adjusting high part to compensate.  */
3764       if (offs != 0)
3765         {
3766           if (aarch64_uimm12_shift (high + offs))
3767             {
3768               /* Align down.  */
3769               low = low - offs;
3770               high = high + offs;
3771             }
3772           else
3773             {
3774               /* Align up.  */
3775               offs = GET_MODE_SIZE (mode) - offs;
3776               low = low + offs;
3777               high = high + (low & 0x1000) - offs;
3778               low &= 0xfff;
3779             }
3780         }
3781
3782       /* Check for overflow.  */
3783       if (high + low != val)
3784         return NULL_RTX;
3785
3786       cst = GEN_INT (high);
3787       if (!aarch64_uimm12_shift (high))
3788         cst = force_const_mem (Pmode, cst);
3789
3790       /* Reload high part into base reg, leaving the low part
3791          in the mem instruction.  */
3792       x = gen_rtx_PLUS (Pmode,
3793                         gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3794                         GEN_INT (low));
3795
3796       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3797                    BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3798                    opnum, (enum reload_type) type);
3799       return x;
3800     }
3801
3802   return NULL_RTX;
3803 }
3804
3805
3806 static reg_class_t
3807 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3808                           reg_class_t rclass,
3809                           enum machine_mode mode,
3810                           secondary_reload_info *sri)
3811 {
3812   /* Address expressions of the form PLUS (SP, large_offset) need two
3813      scratch registers, one for the constant, and one for holding a
3814      copy of SP, since SP cannot be used on the RHS of an add-reg
3815      instruction.  */
3816   if (mode == DImode
3817       && GET_CODE (x) == PLUS
3818       && XEXP (x, 0) == stack_pointer_rtx
3819       && CONST_INT_P (XEXP (x, 1))
3820       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3821     {
3822       sri->icode = CODE_FOR_reload_sp_immediate;
3823       return NO_REGS;
3824     }
3825
3826   /* Without the TARGET_SIMD instructions we cannot move a Q register
3827      to a Q register directly.  We need a scratch.  */
3828   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3829       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3830       && reg_class_subset_p (rclass, FP_REGS))
3831     {
3832       if (mode == TFmode)
3833         sri->icode = CODE_FOR_aarch64_reload_movtf;
3834       else if (mode == TImode)
3835         sri->icode = CODE_FOR_aarch64_reload_movti;
3836       return NO_REGS;
3837     }
3838
3839   /* A TFmode or TImode memory access should be handled via an FP_REGS
3840      because AArch64 has richer addressing modes for LDR/STR instructions
3841      than LDP/STP instructions.  */
3842   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3843       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3844     return FP_REGS;
3845
3846   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3847       return CORE_REGS;
3848
3849   return NO_REGS;
3850 }
3851
3852 static bool
3853 aarch64_can_eliminate (const int from, const int to)
3854 {
3855   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3856      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3857
3858   if (frame_pointer_needed)
3859     {
3860       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3861         return true;
3862       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3863         return false;
3864       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3865           && !cfun->calls_alloca)
3866         return true;
3867       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3868         return true;
3869     return false;
3870     }
3871   else
3872     {
3873       /* If we decided that we didn't need a frame pointer but then used
3874          LR in the function, then we do need a frame pointer after all, so
3875          prevent this elimination to ensure a frame pointer is used.  */
3876
3877       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3878           && df_regs_ever_live_p (LR_REGNUM))
3879         return false;
3880     }
3881   return true;
3882 }
3883
3884 HOST_WIDE_INT
3885 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3886 {
3887   HOST_WIDE_INT frame_size;
3888   HOST_WIDE_INT offset;
3889
3890   aarch64_layout_frame ();
3891   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3892                 + crtl->outgoing_args_size
3893                 + cfun->machine->saved_varargs_size);
3894
3895    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3896    offset = frame_size;
3897
3898    if (to == HARD_FRAME_POINTER_REGNUM)
3899      {
3900        if (from == ARG_POINTER_REGNUM)
3901          return offset - crtl->outgoing_args_size;
3902
3903        if (from == FRAME_POINTER_REGNUM)
3904          return cfun->machine->frame.saved_regs_size;
3905      }
3906
3907    if (to == STACK_POINTER_REGNUM)
3908      {
3909        if (from == FRAME_POINTER_REGNUM)
3910          {
3911            HOST_WIDE_INT elim = crtl->outgoing_args_size
3912                               + cfun->machine->frame.saved_regs_size
3913                               - cfun->machine->frame.fp_lr_offset;
3914            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3915            return elim;
3916          }
3917      }
3918
3919    return offset;
3920 }
3921
3922
3923 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3924    previous frame.  */
3925
3926 rtx
3927 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3928 {
3929   if (count != 0)
3930     return const0_rtx;
3931   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3932 }
3933
3934
3935 static void
3936 aarch64_asm_trampoline_template (FILE *f)
3937 {
3938   asm_fprintf (f, "\tldr\t%r, .+16\n", IP1_REGNUM);
3939   asm_fprintf (f, "\tldr\t%r, .+20\n", STATIC_CHAIN_REGNUM);
3940   asm_fprintf (f, "\tbr\t%r\n", IP1_REGNUM);
3941   assemble_aligned_integer (4, const0_rtx);
3942   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3943   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3944 }
3945
3946 unsigned
3947 aarch64_trampoline_size (void)
3948 {
3949   return 32;  /* 3 insns + padding + 2 dwords.  */
3950 }
3951
3952 static void
3953 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3954 {
3955   rtx fnaddr, mem, a_tramp;
3956
3957   /* Don't need to copy the trailing D-words, we fill those in below.  */
3958   emit_block_move (m_tramp, assemble_trampoline_template (),
3959                    GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3960   mem = adjust_address (m_tramp, DImode, 16);
3961   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962   emit_move_insn (mem, fnaddr);
3963
3964   mem = adjust_address (m_tramp, DImode, 24);
3965   emit_move_insn (mem, chain_value);
3966
3967   /* XXX We should really define a "clear_cache" pattern and use
3968      gen_clear_cache().  */
3969   a_tramp = XEXP (m_tramp, 0);
3970   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3971                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3972                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3973 }
3974
3975 static unsigned char
3976 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3977 {
3978   switch (regclass)
3979     {
3980     case CORE_REGS:
3981     case POINTER_REGS:
3982     case GENERAL_REGS:
3983     case ALL_REGS:
3984     case FP_REGS:
3985     case FP_LO_REGS:
3986       return
3987         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3988                                        (GET_MODE_SIZE (mode) + 7) / 8;
3989     case STACK_REG:
3990       return 1;
3991
3992     case NO_REGS:
3993       return 0;
3994
3995     default:
3996       break;
3997     }
3998   gcc_unreachable ();
3999 }
4000
4001 static reg_class_t
4002 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4003 {
4004   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4005           ? GENERAL_REGS : regclass);
4006 }
4007
4008 void
4009 aarch64_asm_output_labelref (FILE* f, const char *name)
4010 {
4011   asm_fprintf (f, "%U%s", name);
4012 }
4013
4014 static void
4015 aarch64_elf_asm_constructor (rtx symbol, int priority)
4016 {
4017   if (priority == DEFAULT_INIT_PRIORITY)
4018     default_ctor_section_asm_out_constructor (symbol, priority);
4019   else
4020     {
4021       section *s;
4022       char buf[18];
4023       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4024       s = get_section (buf, SECTION_WRITE, NULL);
4025       switch_to_section (s);
4026       assemble_align (POINTER_SIZE);
4027       fputs ("\t.dword\t", asm_out_file);
4028       output_addr_const (asm_out_file, symbol);
4029       fputc ('\n', asm_out_file);
4030     }
4031 }
4032
4033 static void
4034 aarch64_elf_asm_destructor (rtx symbol, int priority)
4035 {
4036   if (priority == DEFAULT_INIT_PRIORITY)
4037     default_dtor_section_asm_out_destructor (symbol, priority);
4038   else
4039     {
4040       section *s;
4041       char buf[18];
4042       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4043       s = get_section (buf, SECTION_WRITE, NULL);
4044       switch_to_section (s);
4045       assemble_align (POINTER_SIZE);
4046       fputs ("\t.dword\t", asm_out_file);
4047       output_addr_const (asm_out_file, symbol);
4048       fputc ('\n', asm_out_file);
4049     }
4050 }
4051
4052 const char*
4053 aarch64_output_casesi (rtx *operands)
4054 {
4055   char buf[100];
4056   char label[100];
4057   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4058   int index;
4059   static const char *const patterns[4][2] =
4060   {
4061     {
4062       "ldrb\t%w3, [%0,%w1,uxtw]",
4063       "add\t%3, %4, %w3, sxtb #2"
4064     },
4065     {
4066       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4067       "add\t%3, %4, %w3, sxth #2"
4068     },
4069     {
4070       "ldr\t%w3, [%0,%w1,uxtw #2]",
4071       "add\t%3, %4, %w3, sxtw #2"
4072     },
4073     /* We assume that DImode is only generated when not optimizing and
4074        that we don't really need 64-bit address offsets.  That would
4075        imply an object file with 8GB of code in a single function!  */
4076     {
4077       "ldr\t%w3, [%0,%w1,uxtw #2]",
4078       "add\t%3, %4, %w3, sxtw #2"
4079     }
4080   };
4081
4082   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4083
4084   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4085
4086   gcc_assert (index >= 0 && index <= 3);
4087
4088   /* Need to implement table size reduction, by chaning the code below.  */
4089   output_asm_insn (patterns[index][0], operands);
4090   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4091   snprintf (buf, sizeof (buf),
4092             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4093   output_asm_insn (buf, operands);
4094   output_asm_insn (patterns[index][1], operands);
4095   output_asm_insn ("br\t%3", operands);
4096   assemble_label (asm_out_file, label);
4097   return "";
4098 }
4099
4100
4101 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4102    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4103    operator.  */
4104
4105 int
4106 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4107 {
4108   if (shift >= 0 && shift <= 3)
4109     {
4110       int size;
4111       for (size = 8; size <= 32; size *= 2)
4112         {
4113           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4114           if (mask == bits << shift)
4115             return size;
4116         }
4117     }
4118   return 0;
4119 }
4120
4121 static bool
4122 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4123                                    const_rtx x ATTRIBUTE_UNUSED)
4124 {
4125   /* We can't use blocks for constants when we're using a per-function
4126      constant pool.  */
4127   return false;
4128 }
4129
4130 static section *
4131 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4132                             rtx x ATTRIBUTE_UNUSED,
4133                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4134 {
4135   /* Force all constant pool entries into the current function section.  */
4136   return function_section (current_function_decl);
4137 }
4138
4139
4140 /* Costs.  */
4141
4142 /* Helper function for rtx cost calculation.  Strip a shift expression
4143    from X.  Returns the inner operand if successful, or the original
4144    expression on failure.  */
4145 static rtx
4146 aarch64_strip_shift (rtx x)
4147 {
4148   rtx op = x;
4149
4150   if ((GET_CODE (op) == ASHIFT
4151        || GET_CODE (op) == ASHIFTRT
4152        || GET_CODE (op) == LSHIFTRT)
4153       && CONST_INT_P (XEXP (op, 1)))
4154     return XEXP (op, 0);
4155
4156   if (GET_CODE (op) == MULT
4157       && CONST_INT_P (XEXP (op, 1))
4158       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4159     return XEXP (op, 0);
4160
4161   return x;
4162 }
4163
4164 /* Helper function for rtx cost calculation.  Strip a shift or extend
4165    expression from X.  Returns the inner operand if successful, or the
4166    original expression on failure.  We deal with a number of possible
4167    canonicalization variations here.  */
4168 static rtx
4169 aarch64_strip_shift_or_extend (rtx x)
4170 {
4171   rtx op = x;
4172
4173   /* Zero and sign extraction of a widened value.  */
4174   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4175       && XEXP (op, 2) == const0_rtx
4176       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4177                                          XEXP (op, 1)))
4178     return XEXP (XEXP (op, 0), 0);
4179
4180   /* It can also be represented (for zero-extend) as an AND with an
4181      immediate.  */
4182   if (GET_CODE (op) == AND
4183       && GET_CODE (XEXP (op, 0)) == MULT
4184       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4185       && CONST_INT_P (XEXP (op, 1))
4186       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4187                            INTVAL (XEXP (op, 1))) != 0)
4188     return XEXP (XEXP (op, 0), 0);
4189
4190   /* Now handle extended register, as this may also have an optional
4191      left shift by 1..4.  */
4192   if (GET_CODE (op) == ASHIFT
4193       && CONST_INT_P (XEXP (op, 1))
4194       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4195     op = XEXP (op, 0);
4196
4197   if (GET_CODE (op) == ZERO_EXTEND
4198       || GET_CODE (op) == SIGN_EXTEND)
4199     op = XEXP (op, 0);
4200
4201   if (op != x)
4202     return op;
4203
4204   return aarch64_strip_shift (x);
4205 }
4206
4207 /* Calculate the cost of calculating X, storing it in *COST.  Result
4208    is true if the total cost of the operation has now been calculated.  */
4209 static bool
4210 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4211                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4212 {
4213   rtx op0, op1;
4214   const struct cpu_rtx_cost_table *extra_cost
4215     = aarch64_tune_params->insn_extra_cost;
4216
4217   switch (code)
4218     {
4219     case SET:
4220       op0 = SET_DEST (x);
4221       op1 = SET_SRC (x);
4222
4223       switch (GET_CODE (op0))
4224         {
4225         case MEM:
4226           if (speed)
4227             *cost += extra_cost->memory_store;
4228
4229           if (op1 != const0_rtx)
4230             *cost += rtx_cost (op1, SET, 1, speed);
4231           return true;
4232
4233         case SUBREG:
4234           if (! REG_P (SUBREG_REG (op0)))
4235             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4236           /* Fall through.  */
4237         case REG:
4238           /* Cost is just the cost of the RHS of the set.  */
4239           *cost += rtx_cost (op1, SET, 1, true);
4240           return true;
4241
4242         case ZERO_EXTRACT:  /* Bit-field insertion.  */
4243         case SIGN_EXTRACT:
4244           /* Strip any redundant widening of the RHS to meet the width of
4245              the target.  */
4246           if (GET_CODE (op1) == SUBREG)
4247             op1 = SUBREG_REG (op1);
4248           if ((GET_CODE (op1) == ZERO_EXTEND
4249                || GET_CODE (op1) == SIGN_EXTEND)
4250               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4251               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4252                   >= INTVAL (XEXP (op0, 1))))
4253             op1 = XEXP (op1, 0);
4254           *cost += rtx_cost (op1, SET, 1, speed);
4255           return true;
4256
4257         default:
4258           break;
4259         }
4260       return false;
4261
4262     case MEM:
4263       if (speed)
4264         *cost += extra_cost->memory_load;
4265
4266       return true;
4267
4268     case NEG:
4269       op0 = CONST0_RTX (GET_MODE (x));
4270       op1 = XEXP (x, 0);
4271       goto cost_minus;
4272
4273     case COMPARE:
4274       op0 = XEXP (x, 0);
4275       op1 = XEXP (x, 1);
4276
4277       if (op1 == const0_rtx
4278           && GET_CODE (op0) == AND)
4279         {
4280           x = op0;
4281           goto cost_logic;
4282         }
4283
4284       /* Comparisons can work if the order is swapped.
4285          Canonicalization puts the more complex operation first, but
4286          we want it in op1.  */
4287       if (! (REG_P (op0)
4288              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4289         {
4290           op0 = XEXP (x, 1);
4291           op1 = XEXP (x, 0);
4292         }
4293       goto cost_minus;
4294
4295     case MINUS:
4296       op0 = XEXP (x, 0);
4297       op1 = XEXP (x, 1);
4298
4299     cost_minus:
4300       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4301           || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4302               && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4303         {
4304           if (op0 != const0_rtx)
4305             *cost += rtx_cost (op0, MINUS, 0, speed);
4306
4307           if (CONST_INT_P (op1))
4308             {
4309               if (!aarch64_uimm12_shift (INTVAL (op1)))
4310                 *cost += rtx_cost (op1, MINUS, 1, speed);
4311             }
4312           else
4313             {
4314               op1 = aarch64_strip_shift_or_extend (op1);
4315               *cost += rtx_cost (op1, MINUS, 1, speed);
4316             }
4317           return true;
4318         }
4319
4320       return false;
4321
4322     case PLUS:
4323       op0 = XEXP (x, 0);
4324       op1 = XEXP (x, 1);
4325
4326       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4327         {
4328           if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4329             {
4330               *cost += rtx_cost (op0, PLUS, 0, speed);
4331             }
4332           else
4333             {
4334               rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4335
4336               if (new_op0 == op0
4337                   && GET_CODE (op0) == MULT)
4338                 {
4339                   if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4340                        && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4341                       || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4342                           && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4343                     {
4344                       *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4345                                           speed)
4346                                 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4347                                             speed)
4348                                 + rtx_cost (op1, PLUS, 1, speed));
4349                       if (speed)
4350                         *cost += extra_cost->int_multiply_extend_add;
4351                       return true;
4352                     }
4353                   *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4354                             + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4355                             + rtx_cost (op1, PLUS, 1, speed));
4356
4357                   if (speed)
4358                     *cost += extra_cost->int_multiply_add;
4359                 }
4360
4361               *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4362                         + rtx_cost (op1, PLUS, 1, speed));
4363             }
4364           return true;
4365         }
4366
4367       return false;
4368
4369     case IOR:
4370     case XOR:
4371     case AND:
4372     cost_logic:
4373       op0 = XEXP (x, 0);
4374       op1 = XEXP (x, 1);
4375
4376       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4377         {
4378           if (CONST_INT_P (op1)
4379               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4380             {
4381               *cost += rtx_cost (op0, AND, 0, speed);
4382             }
4383           else
4384             {
4385               if (GET_CODE (op0) == NOT)
4386                 op0 = XEXP (op0, 0);
4387               op0 = aarch64_strip_shift (op0);
4388               *cost += (rtx_cost (op0, AND, 0, speed)
4389                         + rtx_cost (op1, AND, 1, speed));
4390             }
4391           return true;
4392         }
4393       return false;
4394
4395     case ZERO_EXTEND:
4396       if ((GET_MODE (x) == DImode
4397            && GET_MODE (XEXP (x, 0)) == SImode)
4398           || GET_CODE (XEXP (x, 0)) == MEM)
4399         {
4400           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4401           return true;
4402         }
4403       return false;
4404
4405     case SIGN_EXTEND:
4406       if (GET_CODE (XEXP (x, 0)) == MEM)
4407         {
4408           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4409           return true;
4410         }
4411       return false;
4412
4413     case ROTATE:
4414       if (!CONST_INT_P (XEXP (x, 1)))
4415         *cost += COSTS_N_INSNS (2);
4416       /* Fall through.  */
4417     case ROTATERT:
4418     case LSHIFTRT:
4419     case ASHIFT:
4420     case ASHIFTRT:
4421
4422       /* Shifting by a register often takes an extra cycle.  */
4423       if (speed && !CONST_INT_P (XEXP (x, 1)))
4424         *cost += extra_cost->register_shift;
4425
4426       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4427       return true;
4428
4429     case HIGH:
4430       if (!CONSTANT_P (XEXP (x, 0)))
4431         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4432       return true;
4433
4434     case LO_SUM:
4435       if (!CONSTANT_P (XEXP (x, 1)))
4436         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4437       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4438       return true;
4439
4440     case ZERO_EXTRACT:
4441     case SIGN_EXTRACT:
4442       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4443       return true;
4444
4445     case MULT:
4446       op0 = XEXP (x, 0);
4447       op1 = XEXP (x, 1);
4448
4449       *cost = COSTS_N_INSNS (1);
4450       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4451         {
4452           if (CONST_INT_P (op1)
4453               && exact_log2 (INTVAL (op1)) > 0)
4454             {
4455               *cost += rtx_cost (op0, ASHIFT, 0, speed);
4456               return true;
4457             }
4458
4459           if ((GET_CODE (op0) == ZERO_EXTEND
4460                && GET_CODE (op1) == ZERO_EXTEND)
4461               || (GET_CODE (op0) == SIGN_EXTEND
4462                   && GET_CODE (op1) == SIGN_EXTEND))
4463             {
4464               *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4465                         + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4466               if (speed)
4467                 *cost += extra_cost->int_multiply_extend;
4468               return true;
4469             }
4470
4471           if (speed)
4472             *cost += extra_cost->int_multiply;
4473         }
4474       else if (speed)
4475         {
4476           if (GET_MODE (x) == DFmode)
4477             *cost += extra_cost->double_multiply;
4478           else if (GET_MODE (x) == SFmode)
4479             *cost += extra_cost->float_multiply;
4480         }
4481
4482       return false;  /* All arguments need to be in registers.  */
4483
4484     case MOD:
4485     case UMOD:
4486       *cost = COSTS_N_INSNS (2);
4487       if (speed)
4488         {
4489           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4490             *cost += (extra_cost->int_multiply_add
4491                       + extra_cost->int_divide);
4492           else if (GET_MODE (x) == DFmode)
4493             *cost += (extra_cost->double_multiply
4494                       + extra_cost->double_divide);
4495           else if (GET_MODE (x) == SFmode)
4496             *cost += (extra_cost->float_multiply
4497                       + extra_cost->float_divide);
4498         }
4499       return false;  /* All arguments need to be in registers.  */
4500
4501     case DIV:
4502     case UDIV:
4503       *cost = COSTS_N_INSNS (1);
4504       if (speed)
4505         {
4506           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507             *cost += extra_cost->int_divide;
4508           else if (GET_MODE (x) == DFmode)
4509             *cost += extra_cost->double_divide;
4510           else if (GET_MODE (x) == SFmode)
4511             *cost += extra_cost->float_divide;
4512         }
4513       return false;  /* All arguments need to be in registers.  */
4514
4515     default:
4516       break;
4517     }
4518   return false;
4519 }
4520
4521 static int
4522 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4523                   enum machine_mode mode ATTRIBUTE_UNUSED,
4524                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4525 {
4526   enum rtx_code c  = GET_CODE (x);
4527   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4528
4529   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4530     return addr_cost->pre_modify;
4531
4532   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4533     return addr_cost->post_modify;
4534
4535   if (c == PLUS)
4536     {
4537       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4538         return addr_cost->imm_offset;
4539       else if (GET_CODE (XEXP (x, 0)) == MULT
4540                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4541                || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4542         return addr_cost->register_extend;
4543
4544       return addr_cost->register_offset;
4545     }
4546   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4547     return addr_cost->imm_offset;
4548
4549   return 0;
4550 }
4551
4552 static int
4553 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4554                             reg_class_t from, reg_class_t to)
4555 {
4556   const struct cpu_regmove_cost *regmove_cost
4557     = aarch64_tune_params->regmove_cost;
4558
4559   if (from == GENERAL_REGS && to == GENERAL_REGS)
4560     return regmove_cost->GP2GP;
4561   else if (from == GENERAL_REGS)
4562     return regmove_cost->GP2FP;
4563   else if (to == GENERAL_REGS)
4564     return regmove_cost->FP2GP;
4565
4566   /* When AdvSIMD instructions are disabled it is not possible to move
4567      a 128-bit value directly between Q registers.  This is handled in
4568      secondary reload.  A general register is used as a scratch to move
4569      the upper DI value and the lower DI value is moved directly,
4570      hence the cost is the sum of three moves. */
4571
4572   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4573     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4574
4575   return regmove_cost->FP2FP;
4576 }
4577
4578 static int
4579 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4580                           reg_class_t rclass ATTRIBUTE_UNUSED,
4581                           bool in ATTRIBUTE_UNUSED)
4582 {
4583   return aarch64_tune_params->memmov_cost;
4584 }
4585
4586 static void initialize_aarch64_code_model (void);
4587
4588 /* Parse the architecture extension string.  */
4589
4590 static void
4591 aarch64_parse_extension (char *str)
4592 {
4593   /* The extension string is parsed left to right.  */
4594   const struct aarch64_option_extension *opt = NULL;
4595
4596   /* Flag to say whether we are adding or removing an extension.  */
4597   int adding_ext = -1;
4598
4599   while (str != NULL && *str != 0)
4600     {
4601       char *ext;
4602       size_t len;
4603
4604       str++;
4605       ext = strchr (str, '+');
4606
4607       if (ext != NULL)
4608         len = ext - str;
4609       else
4610         len = strlen (str);
4611
4612       if (len >= 2 && strncmp (str, "no", 2) == 0)
4613         {
4614           adding_ext = 0;
4615           len -= 2;
4616           str += 2;
4617         }
4618       else if (len > 0)
4619         adding_ext = 1;
4620
4621       if (len == 0)
4622         {
4623           error ("missing feature modifier after %qs", "+no");
4624           return;
4625         }
4626
4627       /* Scan over the extensions table trying to find an exact match.  */
4628       for (opt = all_extensions; opt->name != NULL; opt++)
4629         {
4630           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4631             {
4632               /* Add or remove the extension.  */
4633               if (adding_ext)
4634                 aarch64_isa_flags |= opt->flags_on;
4635               else
4636                 aarch64_isa_flags &= ~(opt->flags_off);
4637               break;
4638             }
4639         }
4640
4641       if (opt->name == NULL)
4642         {
4643           /* Extension not found in list.  */
4644           error ("unknown feature modifier %qs", str);
4645           return;
4646         }
4647
4648       str = ext;
4649     };
4650
4651   return;
4652 }
4653
4654 /* Parse the ARCH string.  */
4655
4656 static void
4657 aarch64_parse_arch (void)
4658 {
4659   char *ext;
4660   const struct processor *arch;
4661   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4662   size_t len;
4663
4664   strcpy (str, aarch64_arch_string);
4665
4666   ext = strchr (str, '+');
4667
4668   if (ext != NULL)
4669     len = ext - str;
4670   else
4671     len = strlen (str);
4672
4673   if (len == 0)
4674     {
4675       error ("missing arch name in -march=%qs", str);
4676       return;
4677     }
4678
4679   /* Loop through the list of supported ARCHs to find a match.  */
4680   for (arch = all_architectures; arch->name != NULL; arch++)
4681     {
4682       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4683         {
4684           selected_arch = arch;
4685           aarch64_isa_flags = selected_arch->flags;
4686           selected_cpu = &all_cores[selected_arch->core];
4687
4688           if (ext != NULL)
4689             {
4690               /* ARCH string contains at least one extension.  */
4691               aarch64_parse_extension (ext);
4692             }
4693
4694           return;
4695         }
4696     }
4697
4698   /* ARCH name not found in list.  */
4699   error ("unknown value %qs for -march", str);
4700   return;
4701 }
4702
4703 /* Parse the CPU string.  */
4704
4705 static void
4706 aarch64_parse_cpu (void)
4707 {
4708   char *ext;
4709   const struct processor *cpu;
4710   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4711   size_t len;
4712
4713   strcpy (str, aarch64_cpu_string);
4714
4715   ext = strchr (str, '+');
4716
4717   if (ext != NULL)
4718     len = ext - str;
4719   else
4720     len = strlen (str);
4721
4722   if (len == 0)
4723     {
4724       error ("missing cpu name in -mcpu=%qs", str);
4725       return;
4726     }
4727
4728   /* Loop through the list of supported CPUs to find a match.  */
4729   for (cpu = all_cores; cpu->name != NULL; cpu++)
4730     {
4731       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4732         {
4733           selected_cpu = cpu;
4734           aarch64_isa_flags = selected_cpu->flags;
4735
4736           if (ext != NULL)
4737             {
4738               /* CPU string contains at least one extension.  */
4739               aarch64_parse_extension (ext);
4740             }
4741
4742           return;
4743         }
4744     }
4745
4746   /* CPU name not found in list.  */
4747   error ("unknown value %qs for -mcpu", str);
4748   return;
4749 }
4750
4751 /* Parse the TUNE string.  */
4752
4753 static void
4754 aarch64_parse_tune (void)
4755 {
4756   const struct processor *cpu;
4757   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4758   strcpy (str, aarch64_tune_string);
4759
4760   /* Loop through the list of supported CPUs to find a match.  */
4761   for (cpu = all_cores; cpu->name != NULL; cpu++)
4762     {
4763       if (strcmp (cpu->name, str) == 0)
4764         {
4765           selected_tune = cpu;
4766           return;
4767         }
4768     }
4769
4770   /* CPU name not found in list.  */
4771   error ("unknown value %qs for -mtune", str);
4772   return;
4773 }
4774
4775
4776 /* Implement TARGET_OPTION_OVERRIDE.  */
4777
4778 static void
4779 aarch64_override_options (void)
4780 {
4781   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4782      otherwise march remains undefined.  mtune can be used with either march or
4783      mcpu.  */
4784
4785   if (aarch64_arch_string)
4786     {
4787       aarch64_parse_arch ();
4788       aarch64_cpu_string = NULL;
4789     }
4790
4791   if (aarch64_cpu_string)
4792     {
4793       aarch64_parse_cpu ();
4794       selected_arch = NULL;
4795     }
4796
4797   if (aarch64_tune_string)
4798     {
4799       aarch64_parse_tune ();
4800     }
4801
4802   initialize_aarch64_code_model ();
4803
4804   aarch64_build_bitmask_table ();
4805
4806   /* This target defaults to strict volatile bitfields.  */
4807   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4808     flag_strict_volatile_bitfields = 1;
4809
4810   /* If the user did not specify a processor, choose the default
4811      one for them.  This will be the CPU set during configuration using
4812      --with-cpu, otherwise it is "generic".  */
4813   if (!selected_cpu)
4814     {
4815       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4816       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4817     }
4818
4819   gcc_assert (selected_cpu);
4820
4821   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4822   if (!selected_tune)
4823     selected_tune = &all_cores[selected_cpu->core];
4824
4825   aarch64_tune_flags = selected_tune->flags;
4826   aarch64_tune = selected_tune->core;
4827   aarch64_tune_params = selected_tune->tune;
4828
4829   aarch64_override_options_after_change ();
4830 }
4831
4832 /* Implement targetm.override_options_after_change.  */
4833
4834 static void
4835 aarch64_override_options_after_change (void)
4836 {
4837   faked_omit_frame_pointer = false;
4838
4839   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4840      that aarch64_frame_pointer_required will be called.  We need to remember
4841      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4842
4843   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4844     {
4845       flag_omit_frame_pointer = true;
4846       faked_omit_frame_pointer = true;
4847     }
4848 }
4849
4850 static struct machine_function *
4851 aarch64_init_machine_status (void)
4852 {
4853   struct machine_function *machine;
4854   machine = ggc_alloc_cleared_machine_function ();
4855   return machine;
4856 }
4857
4858 void
4859 aarch64_init_expanders (void)
4860 {
4861   init_machine_status = aarch64_init_machine_status;
4862 }
4863
4864 /* A checking mechanism for the implementation of the various code models.  */
4865 static void
4866 initialize_aarch64_code_model (void)
4867 {
4868    if (flag_pic)
4869      {
4870        switch (aarch64_cmodel_var)
4871          {
4872          case AARCH64_CMODEL_TINY:
4873            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4874            break;
4875          case AARCH64_CMODEL_SMALL:
4876            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4877            break;
4878          case AARCH64_CMODEL_LARGE:
4879            sorry ("code model %qs with -f%s", "large",
4880                   flag_pic > 1 ? "PIC" : "pic");
4881          default:
4882            gcc_unreachable ();
4883          }
4884      }
4885    else
4886      aarch64_cmodel = aarch64_cmodel_var;
4887 }
4888
4889 /* Return true if SYMBOL_REF X binds locally.  */
4890
4891 static bool
4892 aarch64_symbol_binds_local_p (const_rtx x)
4893 {
4894   return (SYMBOL_REF_DECL (x)
4895           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4896           : SYMBOL_REF_LOCAL_P (x));
4897 }
4898
4899 /* Return true if SYMBOL_REF X is thread local */
4900 static bool
4901 aarch64_tls_symbol_p (rtx x)
4902 {
4903   if (! TARGET_HAVE_TLS)
4904     return false;
4905
4906   if (GET_CODE (x) != SYMBOL_REF)
4907     return false;
4908
4909   return SYMBOL_REF_TLS_MODEL (x) != 0;
4910 }
4911
4912 /* Classify a TLS symbol into one of the TLS kinds.  */
4913 enum aarch64_symbol_type
4914 aarch64_classify_tls_symbol (rtx x)
4915 {
4916   enum tls_model tls_kind = tls_symbolic_operand_type (x);
4917
4918   switch (tls_kind)
4919     {
4920     case TLS_MODEL_GLOBAL_DYNAMIC:
4921     case TLS_MODEL_LOCAL_DYNAMIC:
4922       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4923
4924     case TLS_MODEL_INITIAL_EXEC:
4925       return SYMBOL_SMALL_GOTTPREL;
4926
4927     case TLS_MODEL_LOCAL_EXEC:
4928       return SYMBOL_SMALL_TPREL;
4929
4930     case TLS_MODEL_EMULATED:
4931     case TLS_MODEL_NONE:
4932       return SYMBOL_FORCE_TO_MEM;
4933
4934     default:
4935       gcc_unreachable ();
4936     }
4937 }
4938
4939 /* Return the method that should be used to access SYMBOL_REF or
4940    LABEL_REF X in context CONTEXT.  */
4941 enum aarch64_symbol_type
4942 aarch64_classify_symbol (rtx x,
4943                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4944 {
4945   if (GET_CODE (x) == LABEL_REF)
4946     {
4947       switch (aarch64_cmodel)
4948         {
4949         case AARCH64_CMODEL_LARGE:
4950           return SYMBOL_FORCE_TO_MEM;
4951
4952         case AARCH64_CMODEL_TINY_PIC:
4953         case AARCH64_CMODEL_TINY:
4954         case AARCH64_CMODEL_SMALL_PIC:
4955         case AARCH64_CMODEL_SMALL:
4956           return SYMBOL_SMALL_ABSOLUTE;
4957
4958         default:
4959           gcc_unreachable ();
4960         }
4961     }
4962
4963   gcc_assert (GET_CODE (x) == SYMBOL_REF);
4964
4965   switch (aarch64_cmodel)
4966     {
4967     case AARCH64_CMODEL_LARGE:
4968       return SYMBOL_FORCE_TO_MEM;
4969
4970     case AARCH64_CMODEL_TINY:
4971     case AARCH64_CMODEL_SMALL:
4972
4973       /* This is needed to get DFmode, TImode constants to be loaded off
4974          the constant pool.  Is it necessary to dump TImode values into
4975          the constant pool.  We don't handle TImode constant loads properly
4976          yet and hence need to use the constant pool.  */
4977       if (CONSTANT_POOL_ADDRESS_P (x))
4978         return SYMBOL_FORCE_TO_MEM;
4979
4980       if (aarch64_tls_symbol_p (x))
4981         return aarch64_classify_tls_symbol (x);
4982
4983       if (SYMBOL_REF_WEAK (x))
4984         return SYMBOL_FORCE_TO_MEM;
4985
4986       return SYMBOL_SMALL_ABSOLUTE;
4987
4988     case AARCH64_CMODEL_TINY_PIC:
4989     case AARCH64_CMODEL_SMALL_PIC:
4990
4991       if (CONSTANT_POOL_ADDRESS_P (x))
4992         return SYMBOL_FORCE_TO_MEM;
4993
4994       if (aarch64_tls_symbol_p (x))
4995         return aarch64_classify_tls_symbol (x);
4996
4997       if (!aarch64_symbol_binds_local_p (x))
4998         return SYMBOL_SMALL_GOT;
4999
5000       return SYMBOL_SMALL_ABSOLUTE;
5001
5002     default:
5003       gcc_unreachable ();
5004     }
5005   /* By default push everything into the constant pool.  */
5006   return SYMBOL_FORCE_TO_MEM;
5007 }
5008
5009 /* Return true if X is a symbolic constant that can be used in context
5010    CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
5011
5012 bool
5013 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5014                              enum aarch64_symbol_type *symbol_type)
5015 {
5016   rtx offset;
5017   split_const (x, &x, &offset);
5018   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5019     *symbol_type = aarch64_classify_symbol (x, context);
5020   else
5021     return false;
5022
5023   /* No checking of offset at this point.  */
5024   return true;
5025 }
5026
5027 bool
5028 aarch64_constant_address_p (rtx x)
5029 {
5030   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5031 }
5032
5033 bool
5034 aarch64_legitimate_pic_operand_p (rtx x)
5035 {
5036   if (GET_CODE (x) == SYMBOL_REF
5037       || (GET_CODE (x) == CONST
5038           && GET_CODE (XEXP (x, 0)) == PLUS
5039           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5040      return false;
5041
5042   return true;
5043 }
5044
5045 /* Return true if X holds either a quarter-precision or
5046      floating-point +0.0 constant.  */
5047 static bool
5048 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5049 {
5050   if (!CONST_DOUBLE_P (x))
5051     return false;
5052
5053   /* TODO: We could handle moving 0.0 to a TFmode register,
5054      but first we would like to refactor the movtf_aarch64
5055      to be more amicable to split moves properly and
5056      correctly gate on TARGET_SIMD.  For now - reject all
5057      constants which are not to SFmode or DFmode registers.  */
5058   if (!(mode == SFmode || mode == DFmode))
5059     return false;
5060
5061   if (aarch64_float_const_zero_rtx_p (x))
5062     return true;
5063   return aarch64_float_const_representable_p (x);
5064 }
5065
5066 static bool
5067 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5068 {
5069   /* Do not allow vector struct mode constants.  We could support
5070      0 and -1 easily, but they need support in aarch64-simd.md.  */
5071   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5072     return false;
5073
5074   /* This could probably go away because
5075      we now decompose CONST_INTs according to expand_mov_immediate.  */
5076   if ((GET_CODE (x) == CONST_VECTOR
5077        && aarch64_simd_valid_immediate (x, mode, false,
5078                                         NULL, NULL, NULL, NULL, NULL) != -1)
5079       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5080         return !targetm.cannot_force_const_mem (mode, x);
5081
5082   if (GET_CODE (x) == HIGH
5083       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5084     return true;
5085
5086   return aarch64_constant_address_p (x);
5087 }
5088
5089 rtx
5090 aarch64_load_tp (rtx target)
5091 {
5092   if (!target
5093       || GET_MODE (target) != Pmode
5094       || !register_operand (target, Pmode))
5095     target = gen_reg_rtx (Pmode);
5096
5097   /* Can return in any reg.  */
5098   emit_insn (gen_aarch64_load_tp_hard (target));
5099   return target;
5100 }
5101
5102 /* On AAPCS systems, this is the "struct __va_list".  */
5103 static GTY(()) tree va_list_type;
5104
5105 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5106    Return the type to use as __builtin_va_list.
5107
5108    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5109
5110    struct __va_list
5111    {
5112      void *__stack;
5113      void *__gr_top;
5114      void *__vr_top;
5115      int   __gr_offs;
5116      int   __vr_offs;
5117    };  */
5118
5119 static tree
5120 aarch64_build_builtin_va_list (void)
5121 {
5122   tree va_list_name;
5123   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5124
5125   /* Create the type.  */
5126   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5127   /* Give it the required name.  */
5128   va_list_name = build_decl (BUILTINS_LOCATION,
5129                              TYPE_DECL,
5130                              get_identifier ("__va_list"),
5131                              va_list_type);
5132   DECL_ARTIFICIAL (va_list_name) = 1;
5133   TYPE_NAME (va_list_type) = va_list_name;
5134   TYPE_STUB_DECL (va_list_type) = va_list_name;
5135
5136   /* Create the fields.  */
5137   f_stack = build_decl (BUILTINS_LOCATION,
5138                         FIELD_DECL, get_identifier ("__stack"),
5139                         ptr_type_node);
5140   f_grtop = build_decl (BUILTINS_LOCATION,
5141                         FIELD_DECL, get_identifier ("__gr_top"),
5142                         ptr_type_node);
5143   f_vrtop = build_decl (BUILTINS_LOCATION,
5144                         FIELD_DECL, get_identifier ("__vr_top"),
5145                         ptr_type_node);
5146   f_groff = build_decl (BUILTINS_LOCATION,
5147                         FIELD_DECL, get_identifier ("__gr_offs"),
5148                         integer_type_node);
5149   f_vroff = build_decl (BUILTINS_LOCATION,
5150                         FIELD_DECL, get_identifier ("__vr_offs"),
5151                         integer_type_node);
5152
5153   DECL_ARTIFICIAL (f_stack) = 1;
5154   DECL_ARTIFICIAL (f_grtop) = 1;
5155   DECL_ARTIFICIAL (f_vrtop) = 1;
5156   DECL_ARTIFICIAL (f_groff) = 1;
5157   DECL_ARTIFICIAL (f_vroff) = 1;
5158
5159   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5160   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5161   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5162   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5163   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5164
5165   TYPE_FIELDS (va_list_type) = f_stack;
5166   DECL_CHAIN (f_stack) = f_grtop;
5167   DECL_CHAIN (f_grtop) = f_vrtop;
5168   DECL_CHAIN (f_vrtop) = f_groff;
5169   DECL_CHAIN (f_groff) = f_vroff;
5170
5171   /* Compute its layout.  */
5172   layout_type (va_list_type);
5173
5174   return va_list_type;
5175 }
5176
5177 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5178 static void
5179 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5180 {
5181   const CUMULATIVE_ARGS *cum;
5182   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5183   tree stack, grtop, vrtop, groff, vroff;
5184   tree t;
5185   int gr_save_area_size;
5186   int vr_save_area_size;
5187   int vr_offset;
5188
5189   cum = &crtl->args.info;
5190   gr_save_area_size
5191     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5192   vr_save_area_size
5193     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5194
5195   if (TARGET_GENERAL_REGS_ONLY)
5196     {
5197       if (cum->aapcs_nvrn > 0)
5198         sorry ("%qs and floating point or vector arguments",
5199                "-mgeneral-regs-only");
5200       vr_save_area_size = 0;
5201     }
5202
5203   f_stack = TYPE_FIELDS (va_list_type_node);
5204   f_grtop = DECL_CHAIN (f_stack);
5205   f_vrtop = DECL_CHAIN (f_grtop);
5206   f_groff = DECL_CHAIN (f_vrtop);
5207   f_vroff = DECL_CHAIN (f_groff);
5208
5209   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5210                   NULL_TREE);
5211   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5212                   NULL_TREE);
5213   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5214                   NULL_TREE);
5215   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5216                   NULL_TREE);
5217   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5218                   NULL_TREE);
5219
5220   /* Emit code to initialize STACK, which points to the next varargs stack
5221      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5222      by named arguments.  STACK is 8-byte aligned.  */
5223   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5224   if (cum->aapcs_stack_size > 0)
5225     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5226   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5227   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5228
5229   /* Emit code to initialize GRTOP, the top of the GR save area.
5230      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5231   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5232   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5233   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5234
5235   /* Emit code to initialize VRTOP, the top of the VR save area.
5236      This address is gr_save_area_bytes below GRTOP, rounded
5237      down to the next 16-byte boundary.  */
5238   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5239   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5240                              STACK_BOUNDARY / BITS_PER_UNIT);
5241
5242   if (vr_offset)
5243     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5244   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5245   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246
5247   /* Emit code to initialize GROFF, the offset from GRTOP of the
5248      next GPR argument.  */
5249   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5250               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5251   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252
5253   /* Likewise emit code to initialize VROFF, the offset from FTOP
5254      of the next VR argument.  */
5255   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5256               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5257   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5258 }
5259
5260 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5261
5262 static tree
5263 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5264                               gimple_seq *post_p ATTRIBUTE_UNUSED)
5265 {
5266   tree addr;
5267   bool indirect_p;
5268   bool is_ha;           /* is HFA or HVA.  */
5269   bool dw_align;        /* double-word align.  */
5270   enum machine_mode ag_mode = VOIDmode;
5271   int nregs;
5272   enum machine_mode mode;
5273
5274   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5275   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5276   HOST_WIDE_INT size, rsize, adjust, align;
5277   tree t, u, cond1, cond2;
5278
5279   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5280   if (indirect_p)
5281     type = build_pointer_type (type);
5282
5283   mode = TYPE_MODE (type);
5284
5285   f_stack = TYPE_FIELDS (va_list_type_node);
5286   f_grtop = DECL_CHAIN (f_stack);
5287   f_vrtop = DECL_CHAIN (f_grtop);
5288   f_groff = DECL_CHAIN (f_vrtop);
5289   f_vroff = DECL_CHAIN (f_groff);
5290
5291   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5292                   f_stack, NULL_TREE);
5293   size = int_size_in_bytes (type);
5294   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5295
5296   dw_align = false;
5297   adjust = 0;
5298   if (aarch64_vfp_is_call_or_return_candidate (mode,
5299                                                type,
5300                                                &ag_mode,
5301                                                &nregs,
5302                                                &is_ha))
5303     {
5304       /* TYPE passed in fp/simd registers.  */
5305       if (TARGET_GENERAL_REGS_ONLY)
5306         sorry ("%qs and floating point or vector arguments",
5307                "-mgeneral-regs-only");
5308
5309       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5310                       unshare_expr (valist), f_vrtop, NULL_TREE);
5311       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5312                       unshare_expr (valist), f_vroff, NULL_TREE);
5313
5314       rsize = nregs * UNITS_PER_VREG;
5315
5316       if (is_ha)
5317         {
5318           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5319             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5320         }
5321       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5322                && size < UNITS_PER_VREG)
5323         {
5324           adjust = UNITS_PER_VREG - size;
5325         }
5326     }
5327   else
5328     {
5329       /* TYPE passed in general registers.  */
5330       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5331                       unshare_expr (valist), f_grtop, NULL_TREE);
5332       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5333                       unshare_expr (valist), f_groff, NULL_TREE);
5334       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5335       nregs = rsize / UNITS_PER_WORD;
5336
5337       if (align > 8)
5338         dw_align = true;
5339
5340       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5341           && size < UNITS_PER_WORD)
5342         {
5343           adjust = UNITS_PER_WORD  - size;
5344         }
5345     }
5346
5347   /* Get a local temporary for the field value.  */
5348   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5349
5350   /* Emit code to branch if off >= 0.  */
5351   t = build2 (GE_EXPR, boolean_type_node, off,
5352               build_int_cst (TREE_TYPE (off), 0));
5353   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5354
5355   if (dw_align)
5356     {
5357       /* Emit: offs = (offs + 15) & -16.  */
5358       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5359                   build_int_cst (TREE_TYPE (off), 15));
5360       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5361                   build_int_cst (TREE_TYPE (off), -16));
5362       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5363     }
5364   else
5365     roundup = NULL;
5366
5367   /* Update ap.__[g|v]r_offs  */
5368   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5369               build_int_cst (TREE_TYPE (off), rsize));
5370   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5371
5372   /* String up.  */
5373   if (roundup)
5374     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5375
5376   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5377   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5378               build_int_cst (TREE_TYPE (f_off), 0));
5379   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5380
5381   /* String up: make sure the assignment happens before the use.  */
5382   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5383   COND_EXPR_ELSE (cond1) = t;
5384
5385   /* Prepare the trees handling the argument that is passed on the stack;
5386      the top level node will store in ON_STACK.  */
5387   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5388   if (align > 8)
5389     {
5390       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5391       t = fold_convert (intDI_type_node, arg);
5392       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5393                   build_int_cst (TREE_TYPE (t), 15));
5394       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5395                   build_int_cst (TREE_TYPE (t), -16));
5396       t = fold_convert (TREE_TYPE (arg), t);
5397       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5398     }
5399   else
5400     roundup = NULL;
5401   /* Advance ap.__stack  */
5402   t = fold_convert (intDI_type_node, arg);
5403   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5404               build_int_cst (TREE_TYPE (t), size + 7));
5405   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5406               build_int_cst (TREE_TYPE (t), -8));
5407   t = fold_convert (TREE_TYPE (arg), t);
5408   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5409   /* String up roundup and advance.  */
5410   if (roundup)
5411     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5412   /* String up with arg */
5413   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5414   /* Big-endianness related address adjustment.  */
5415   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5416       && size < UNITS_PER_WORD)
5417   {
5418     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5419                 size_int (UNITS_PER_WORD - size));
5420     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5421   }
5422
5423   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5424   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5425
5426   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5427   t = off;
5428   if (adjust)
5429     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5430                 build_int_cst (TREE_TYPE (off), adjust));
5431
5432   t = fold_convert (sizetype, t);
5433   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5434
5435   if (is_ha)
5436     {
5437       /* type ha; // treat as "struct {ftype field[n];}"
5438          ... [computing offs]
5439          for (i = 0; i <nregs; ++i, offs += 16)
5440            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5441          return ha;  */
5442       int i;
5443       tree tmp_ha, field_t, field_ptr_t;
5444
5445       /* Declare a local variable.  */
5446       tmp_ha = create_tmp_var_raw (type, "ha");
5447       gimple_add_tmp_var (tmp_ha);
5448
5449       /* Establish the base type.  */
5450       switch (ag_mode)
5451         {
5452         case SFmode:
5453           field_t = float_type_node;
5454           field_ptr_t = float_ptr_type_node;
5455           break;
5456         case DFmode:
5457           field_t = double_type_node;
5458           field_ptr_t = double_ptr_type_node;
5459           break;
5460         case TFmode:
5461           field_t = long_double_type_node;
5462           field_ptr_t = long_double_ptr_type_node;
5463           break;
5464 /* The half precision and quad precision are not fully supported yet.  Enable
5465    the following code after the support is complete.  Need to find the correct
5466    type node for __fp16 *.  */
5467 #if 0
5468         case HFmode:
5469           field_t = float_type_node;
5470           field_ptr_t = float_ptr_type_node;
5471           break;
5472 #endif
5473         case V2SImode:
5474         case V4SImode:
5475             {
5476               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5477               field_t = build_vector_type_for_mode (innertype, ag_mode);
5478               field_ptr_t = build_pointer_type (field_t);
5479             }
5480           break;
5481         default:
5482           gcc_assert (0);
5483         }
5484
5485       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5486       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5487       addr = t;
5488       t = fold_convert (field_ptr_t, addr);
5489       t = build2 (MODIFY_EXPR, field_t,
5490                   build1 (INDIRECT_REF, field_t, tmp_ha),
5491                   build1 (INDIRECT_REF, field_t, t));
5492
5493       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5494       for (i = 1; i < nregs; ++i)
5495         {
5496           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5497           u = fold_convert (field_ptr_t, addr);
5498           u = build2 (MODIFY_EXPR, field_t,
5499                       build2 (MEM_REF, field_t, tmp_ha,
5500                               build_int_cst (field_ptr_t,
5501                                              (i *
5502                                               int_size_in_bytes (field_t)))),
5503                       build1 (INDIRECT_REF, field_t, u));
5504           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5505         }
5506
5507       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5508       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5509     }
5510
5511   COND_EXPR_ELSE (cond2) = t;
5512   addr = fold_convert (build_pointer_type (type), cond1);
5513   addr = build_va_arg_indirect_ref (addr);
5514
5515   if (indirect_p)
5516     addr = build_va_arg_indirect_ref (addr);
5517
5518   return addr;
5519 }
5520
5521 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5522
5523 static void
5524 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5525                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5526                                 int no_rtl)
5527 {
5528   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5529   CUMULATIVE_ARGS local_cum;
5530   int gr_saved, vr_saved;
5531
5532   /* The caller has advanced CUM up to, but not beyond, the last named
5533      argument.  Advance a local copy of CUM past the last "real" named
5534      argument, to find out how many registers are left over.  */
5535   local_cum = *cum;
5536   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5537
5538   /* Found out how many registers we need to save.  */
5539   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5540   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5541
5542   if (TARGET_GENERAL_REGS_ONLY)
5543     {
5544       if (local_cum.aapcs_nvrn > 0)
5545         sorry ("%qs and floating point or vector arguments",
5546                "-mgeneral-regs-only");
5547       vr_saved = 0;
5548     }
5549
5550   if (!no_rtl)
5551     {
5552       if (gr_saved > 0)
5553         {
5554           rtx ptr, mem;
5555
5556           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5557           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5558                                - gr_saved * UNITS_PER_WORD);
5559           mem = gen_frame_mem (BLKmode, ptr);
5560           set_mem_alias_set (mem, get_varargs_alias_set ());
5561
5562           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5563                                mem, gr_saved);
5564         }
5565       if (vr_saved > 0)
5566         {
5567           /* We can't use move_block_from_reg, because it will use
5568              the wrong mode, storing D regs only.  */
5569           enum machine_mode mode = TImode;
5570           int off, i;
5571
5572           /* Set OFF to the offset from virtual_incoming_args_rtx of
5573              the first vector register.  The VR save area lies below
5574              the GR one, and is aligned to 16 bytes.  */
5575           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5576                                    STACK_BOUNDARY / BITS_PER_UNIT);
5577           off -= vr_saved * UNITS_PER_VREG;
5578
5579           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5580             {
5581               rtx ptr, mem;
5582
5583               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5584               mem = gen_frame_mem (mode, ptr);
5585               set_mem_alias_set (mem, get_varargs_alias_set ());
5586               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5587               off += UNITS_PER_VREG;
5588             }
5589         }
5590     }
5591
5592   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5593      any complication of having crtl->args.pretend_args_size changed.  */
5594   cfun->machine->saved_varargs_size
5595     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5596                       STACK_BOUNDARY / BITS_PER_UNIT)
5597        + vr_saved * UNITS_PER_VREG);
5598 }
5599
5600 static void
5601 aarch64_conditional_register_usage (void)
5602 {
5603   int i;
5604   if (!TARGET_FLOAT)
5605     {
5606       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5607         {
5608           fixed_regs[i] = 1;
5609           call_used_regs[i] = 1;
5610         }
5611     }
5612 }
5613
5614 /* Walk down the type tree of TYPE counting consecutive base elements.
5615    If *MODEP is VOIDmode, then set it to the first valid floating point
5616    type.  If a non-floating point type is found, or if a floating point
5617    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5618    otherwise return the count in the sub-tree.  */
5619 static int
5620 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5621 {
5622   enum machine_mode mode;
5623   HOST_WIDE_INT size;
5624
5625   switch (TREE_CODE (type))
5626     {
5627     case REAL_TYPE:
5628       mode = TYPE_MODE (type);
5629       if (mode != DFmode && mode != SFmode && mode != TFmode)
5630         return -1;
5631
5632       if (*modep == VOIDmode)
5633         *modep = mode;
5634
5635       if (*modep == mode)
5636         return 1;
5637
5638       break;
5639
5640     case COMPLEX_TYPE:
5641       mode = TYPE_MODE (TREE_TYPE (type));
5642       if (mode != DFmode && mode != SFmode && mode != TFmode)
5643         return -1;
5644
5645       if (*modep == VOIDmode)
5646         *modep = mode;
5647
5648       if (*modep == mode)
5649         return 2;
5650
5651       break;
5652
5653     case VECTOR_TYPE:
5654       /* Use V2SImode and V4SImode as representatives of all 64-bit
5655          and 128-bit vector types.  */
5656       size = int_size_in_bytes (type);
5657       switch (size)
5658         {
5659         case 8:
5660           mode = V2SImode;
5661           break;
5662         case 16:
5663           mode = V4SImode;
5664           break;
5665         default:
5666           return -1;
5667         }
5668
5669       if (*modep == VOIDmode)
5670         *modep = mode;
5671
5672       /* Vector modes are considered to be opaque: two vectors are
5673          equivalent for the purposes of being homogeneous aggregates
5674          if they are the same size.  */
5675       if (*modep == mode)
5676         return 1;
5677
5678       break;
5679
5680     case ARRAY_TYPE:
5681       {
5682         int count;
5683         tree index = TYPE_DOMAIN (type);
5684
5685         /* Can't handle incomplete types.  */
5686         if (!COMPLETE_TYPE_P (type))
5687           return -1;
5688
5689         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5690         if (count == -1
5691             || !index
5692             || !TYPE_MAX_VALUE (index)
5693             || !host_integerp (TYPE_MAX_VALUE (index), 1)
5694             || !TYPE_MIN_VALUE (index)
5695             || !host_integerp (TYPE_MIN_VALUE (index), 1)
5696             || count < 0)
5697           return -1;
5698
5699         count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5700                       - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5701
5702         /* There must be no padding.  */
5703         if (!host_integerp (TYPE_SIZE (type), 1)
5704             || (tree_low_cst (TYPE_SIZE (type), 1)
5705                 != count * GET_MODE_BITSIZE (*modep)))
5706           return -1;
5707
5708         return count;
5709       }
5710
5711     case RECORD_TYPE:
5712       {
5713         int count = 0;
5714         int sub_count;
5715         tree field;
5716
5717         /* Can't handle incomplete types.  */
5718         if (!COMPLETE_TYPE_P (type))
5719           return -1;
5720
5721         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5722           {
5723             if (TREE_CODE (field) != FIELD_DECL)
5724               continue;
5725
5726             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5727             if (sub_count < 0)
5728               return -1;
5729             count += sub_count;
5730           }
5731
5732         /* There must be no padding.  */
5733         if (!host_integerp (TYPE_SIZE (type), 1)
5734             || (tree_low_cst (TYPE_SIZE (type), 1)
5735                 != count * GET_MODE_BITSIZE (*modep)))
5736           return -1;
5737
5738         return count;
5739       }
5740
5741     case UNION_TYPE:
5742     case QUAL_UNION_TYPE:
5743       {
5744         /* These aren't very interesting except in a degenerate case.  */
5745         int count = 0;
5746         int sub_count;
5747         tree field;
5748
5749         /* Can't handle incomplete types.  */
5750         if (!COMPLETE_TYPE_P (type))
5751           return -1;
5752
5753         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5754           {
5755             if (TREE_CODE (field) != FIELD_DECL)
5756               continue;
5757
5758             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5759             if (sub_count < 0)
5760               return -1;
5761             count = count > sub_count ? count : sub_count;
5762           }
5763
5764         /* There must be no padding.  */
5765         if (!host_integerp (TYPE_SIZE (type), 1)
5766             || (tree_low_cst (TYPE_SIZE (type), 1)
5767                 != count * GET_MODE_BITSIZE (*modep)))
5768           return -1;
5769
5770         return count;
5771       }
5772
5773     default:
5774       break;
5775     }
5776
5777   return -1;
5778 }
5779
5780 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5781    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5782    array types.  The C99 floating-point complex types are also considered
5783    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5784    types, which are GCC extensions and out of the scope of AAPCS64, are
5785    treated as composite types here as well.
5786
5787    Note that MODE itself is not sufficient in determining whether a type
5788    is such a composite type or not.  This is because
5789    stor-layout.c:compute_record_mode may have already changed the MODE
5790    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5791    structure with only one field may have its MODE set to the mode of the
5792    field.  Also an integer mode whose size matches the size of the
5793    RECORD_TYPE type may be used to substitute the original mode
5794    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5795    solely relied on.  */
5796
5797 static bool
5798 aarch64_composite_type_p (const_tree type,
5799                           enum machine_mode mode)
5800 {
5801   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5802     return true;
5803
5804   if (mode == BLKmode
5805       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5806       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5807     return true;
5808
5809   return false;
5810 }
5811
5812 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5813    type as described in AAPCS64 \S 4.1.2.
5814
5815    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5816
5817 static bool
5818 aarch64_short_vector_p (const_tree type,
5819                         enum machine_mode mode)
5820 {
5821   HOST_WIDE_INT size = -1;
5822
5823   if (type && TREE_CODE (type) == VECTOR_TYPE)
5824     size = int_size_in_bytes (type);
5825   else if (!aarch64_composite_type_p (type, mode)
5826            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5827                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5828     size = GET_MODE_SIZE (mode);
5829
5830   return (size == 8 || size == 16) ? true : false;
5831 }
5832
5833 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5834    shall be passed or returned in simd/fp register(s) (providing these
5835    parameter passing registers are available).
5836
5837    Upon successful return, *COUNT returns the number of needed registers,
5838    *BASE_MODE returns the mode of the individual register and when IS_HAF
5839    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5840    floating-point aggregate or a homogeneous short-vector aggregate.  */
5841
5842 static bool
5843 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5844                                          const_tree type,
5845                                          enum machine_mode *base_mode,
5846                                          int *count,
5847                                          bool *is_ha)
5848 {
5849   enum machine_mode new_mode = VOIDmode;
5850   bool composite_p = aarch64_composite_type_p (type, mode);
5851
5852   if (is_ha != NULL) *is_ha = false;
5853
5854   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5855       || aarch64_short_vector_p (type, mode))
5856     {
5857       *count = 1;
5858       new_mode = mode;
5859     }
5860   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5861     {
5862       if (is_ha != NULL) *is_ha = true;
5863       *count = 2;
5864       new_mode = GET_MODE_INNER (mode);
5865     }
5866   else if (type && composite_p)
5867     {
5868       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5869
5870       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5871         {
5872           if (is_ha != NULL) *is_ha = true;
5873           *count = ag_count;
5874         }
5875       else
5876         return false;
5877     }
5878   else
5879     return false;
5880
5881   *base_mode = new_mode;
5882   return true;
5883 }
5884
5885 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5886
5887 static rtx
5888 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5889                           int incoming ATTRIBUTE_UNUSED)
5890 {
5891   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5892 }
5893
5894 /* Implements target hook vector_mode_supported_p.  */
5895 static bool
5896 aarch64_vector_mode_supported_p (enum machine_mode mode)
5897 {
5898   if (TARGET_SIMD
5899       && (mode == V4SImode  || mode == V8HImode
5900           || mode == V16QImode || mode == V2DImode
5901           || mode == V2SImode  || mode == V4HImode
5902           || mode == V8QImode || mode == V2SFmode
5903           || mode == V4SFmode || mode == V2DFmode))
5904     return true;
5905
5906   return false;
5907 }
5908
5909 /* Return quad mode as the preferred SIMD mode.  */
5910 static enum machine_mode
5911 aarch64_preferred_simd_mode (enum machine_mode mode)
5912 {
5913   if (TARGET_SIMD)
5914     switch (mode)
5915       {
5916       case DFmode:
5917         return V2DFmode;
5918       case SFmode:
5919         return V4SFmode;
5920       case SImode:
5921         return V4SImode;
5922       case HImode:
5923         return V8HImode;
5924       case QImode:
5925         return V16QImode;
5926       case DImode:
5927           return V2DImode;
5928         break;
5929
5930       default:;
5931       }
5932   return word_mode;
5933 }
5934
5935 /* Return the bitmask of possible vector sizes for the vectorizer
5936    to iterate over.  */
5937 static unsigned int
5938 aarch64_autovectorize_vector_sizes (void)
5939 {
5940   return (16 | 8);
5941 }
5942
5943 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5944    vector types in order to conform to the AAPCS64 (see "Procedure
5945    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
5946    qualify for emission with the mangled names defined in that document,
5947    a vector type must not only be of the correct mode but also be
5948    composed of AdvSIMD vector element types (e.g.
5949    _builtin_aarch64_simd_qi); these types are registered by
5950    aarch64_init_simd_builtins ().  In other words, vector types defined
5951    in other ways e.g. via vector_size attribute will get default
5952    mangled names.  */
5953 typedef struct
5954 {
5955   enum machine_mode mode;
5956   const char *element_type_name;
5957   const char *mangled_name;
5958 } aarch64_simd_mangle_map_entry;
5959
5960 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5961   /* 64-bit containerized types.  */
5962   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
5963   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
5964   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
5965   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
5966   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
5967   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
5968   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
5969   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
5970   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5971   /* 128-bit containerized types.  */
5972   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
5973   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
5974   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
5975   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
5976   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
5977   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
5978   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
5979   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
5980   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
5981   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
5982   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
5983   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
5984   { VOIDmode, NULL, NULL }
5985 };
5986
5987 /* Implement TARGET_MANGLE_TYPE.  */
5988
5989 const char *
5990 aarch64_mangle_type (const_tree type)
5991 {
5992   /* The AArch64 ABI documents say that "__va_list" has to be
5993      managled as if it is in the "std" namespace.  */
5994   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
5995     return "St9__va_list";
5996
5997   /* Check the mode of the vector type, and the name of the vector
5998      element type, against the table.  */
5999   if (TREE_CODE (type) == VECTOR_TYPE)
6000     {
6001       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6002
6003       while (pos->mode != VOIDmode)
6004         {
6005           tree elt_type = TREE_TYPE (type);
6006
6007           if (pos->mode == TYPE_MODE (type)
6008               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6009               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6010                           pos->element_type_name))
6011             return pos->mangled_name;
6012
6013           pos++;
6014         }
6015     }
6016
6017   /* Use the default mangling.  */
6018   return NULL;
6019 }
6020
6021 /* Return the equivalent letter for size.  */
6022 static unsigned char
6023 sizetochar (int size)
6024 {
6025   switch (size)
6026     {
6027     case 64: return 'd';
6028     case 32: return 's';
6029     case 16: return 'h';
6030     case 8 : return 'b';
6031     default: gcc_unreachable ();
6032     }
6033 }
6034
6035 /* Return true iff x is a uniform vector of floating-point
6036    constants, and the constant can be represented in
6037    quarter-precision form.  Note, as aarch64_float_const_representable
6038    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6039 static bool
6040 aarch64_vect_float_const_representable_p (rtx x)
6041 {
6042   int i = 0;
6043   REAL_VALUE_TYPE r0, ri;
6044   rtx x0, xi;
6045
6046   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6047     return false;
6048
6049   x0 = CONST_VECTOR_ELT (x, 0);
6050   if (!CONST_DOUBLE_P (x0))
6051     return false;
6052
6053   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6054
6055   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6056     {
6057       xi = CONST_VECTOR_ELT (x, i);
6058       if (!CONST_DOUBLE_P (xi))
6059         return false;
6060
6061       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6062       if (!REAL_VALUES_EQUAL (r0, ri))
6063         return false;
6064     }
6065
6066   return aarch64_float_const_representable_p (x0);
6067 }
6068
6069 /* TODO: This function returns values similar to those
6070    returned by neon_valid_immediate in gcc/config/arm/arm.c
6071    but the API here is different enough that these magic numbers
6072    are not used.  It should be sufficient to return true or false.  */
6073 static int
6074 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6075                               rtx *modconst, int *elementwidth,
6076                               unsigned char *elementchar,
6077                               int *mvn, int *shift)
6078 {
6079 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
6080   matches = 1;                                          \
6081   for (i = 0; i < idx; i += (STRIDE))                   \
6082     if (!(TEST))                                        \
6083       matches = 0;                                      \
6084   if (matches)                                          \
6085     {                                                   \
6086       immtype = (CLASS);                                \
6087       elsize = (ELSIZE);                                \
6088       elchar = sizetochar (elsize);                     \
6089       eshift = (SHIFT);                                 \
6090       emvn = (NEG);                                     \
6091       break;                                            \
6092     }
6093
6094   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6095   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6096   unsigned char bytes[16];
6097   unsigned char elchar = 0;
6098   int immtype = -1, matches;
6099   unsigned int invmask = inverse ? 0xff : 0;
6100   int eshift, emvn;
6101
6102   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6103     {
6104       bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6105       int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6106
6107       if (!(simd_imm_zero
6108             || aarch64_vect_float_const_representable_p (op)))
6109         return -1;
6110
6111         if (modconst)
6112           *modconst = CONST_VECTOR_ELT (op, 0);
6113
6114         if (elementwidth)
6115           *elementwidth = elem_width;
6116
6117         if (elementchar)
6118           *elementchar = sizetochar (elem_width);
6119
6120         if (shift)
6121           *shift = 0;
6122
6123         if (simd_imm_zero)
6124           return 19;
6125         else
6126           return 18;
6127     }
6128
6129   /* Splat vector constant out into a byte vector.  */
6130   for (i = 0; i < n_elts; i++)
6131     {
6132       rtx el = CONST_VECTOR_ELT (op, i);
6133       unsigned HOST_WIDE_INT elpart;
6134       unsigned int part, parts;
6135
6136       if (GET_CODE (el) == CONST_INT)
6137         {
6138           elpart = INTVAL (el);
6139           parts = 1;
6140         }
6141       else if (GET_CODE (el) == CONST_DOUBLE)
6142         {
6143           elpart = CONST_DOUBLE_LOW (el);
6144           parts = 2;
6145         }
6146       else
6147         gcc_unreachable ();
6148
6149       for (part = 0; part < parts; part++)
6150         {
6151           unsigned int byte;
6152           for (byte = 0; byte < innersize; byte++)
6153             {
6154               bytes[idx++] = (elpart & 0xff) ^ invmask;
6155               elpart >>= BITS_PER_UNIT;
6156             }
6157           if (GET_CODE (el) == CONST_DOUBLE)
6158             elpart = CONST_DOUBLE_HIGH (el);
6159         }
6160     }
6161
6162   /* Sanity check.  */
6163   gcc_assert (idx == GET_MODE_SIZE (mode));
6164
6165   do
6166     {
6167       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6168              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6169
6170       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6171              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6172
6173       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6174              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6175
6176       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6177              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6178
6179       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6180
6181       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6182
6183       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6184              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6185
6186       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6187              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6188
6189       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6190              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6191
6192       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6193              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6194
6195       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6196
6197       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6198
6199       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6200              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6201
6202       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6203              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6204
6205       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6206              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6207
6208       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6209              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6210
6211       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6212
6213       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6214              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6215     }
6216   while (0);
6217
6218   /* TODO: Currently the assembler cannot handle types 12 to 15.
6219      And there is no way to specify cmode through the compiler.
6220      Disable them till there is support in the assembler.  */
6221   if (immtype == -1
6222       || (immtype >= 12 && immtype <= 15)
6223       || immtype == 18)
6224     return -1;
6225
6226
6227   if (elementwidth)
6228     *elementwidth = elsize;
6229
6230   if (elementchar)
6231     *elementchar = elchar;
6232
6233   if (mvn)
6234     *mvn = emvn;
6235
6236   if (shift)
6237     *shift = eshift;
6238
6239   if (modconst)
6240     {
6241       unsigned HOST_WIDE_INT imm = 0;
6242
6243       /* Un-invert bytes of recognized vector, if necessary.  */
6244       if (invmask != 0)
6245         for (i = 0; i < idx; i++)
6246           bytes[i] ^= invmask;
6247
6248       if (immtype == 17)
6249         {
6250           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6251           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6252
6253           for (i = 0; i < 8; i++)
6254             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6255               << (i * BITS_PER_UNIT);
6256
6257           *modconst = GEN_INT (imm);
6258         }
6259       else
6260         {
6261           unsigned HOST_WIDE_INT imm = 0;
6262
6263           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6264             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6265
6266           /* Construct 'abcdefgh' because the assembler cannot handle
6267              generic constants.  */
6268           gcc_assert (shift != NULL && mvn != NULL);
6269           if (*mvn)
6270             imm = ~imm;
6271           imm = (imm >> *shift) & 0xff;
6272           *modconst = GEN_INT (imm);
6273         }
6274     }
6275
6276   return immtype;
6277 #undef CHECK
6278 }
6279
6280 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6281    (or, implicitly, MVNI) immediate.  Write back width per element
6282    to *ELEMENTWIDTH, and a modified constant (whatever should be output
6283    for a MOVI instruction) in *MODCONST.  */
6284 int
6285 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6286                                        rtx *modconst, int *elementwidth,
6287                                        unsigned char *elementchar,
6288                                        int *mvn, int *shift)
6289 {
6290   rtx tmpconst;
6291   int tmpwidth;
6292   unsigned char tmpwidthc;
6293   int tmpmvn = 0, tmpshift = 0;
6294   int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6295                                              &tmpwidth, &tmpwidthc,
6296                                              &tmpmvn, &tmpshift);
6297
6298   if (retval == -1)
6299     return 0;
6300
6301   if (modconst)
6302     *modconst = tmpconst;
6303
6304   if (elementwidth)
6305     *elementwidth = tmpwidth;
6306
6307   if (elementchar)
6308     *elementchar = tmpwidthc;
6309
6310   if (mvn)
6311     *mvn = tmpmvn;
6312
6313   if (shift)
6314     *shift = tmpshift;
6315
6316   return 1;
6317 }
6318
6319 static bool
6320 aarch64_const_vec_all_same_int_p (rtx x,
6321                                   HOST_WIDE_INT minval,
6322                                   HOST_WIDE_INT maxval)
6323 {
6324   HOST_WIDE_INT firstval;
6325   int count, i;
6326
6327   if (GET_CODE (x) != CONST_VECTOR
6328       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6329     return false;
6330
6331   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6332   if (firstval < minval || firstval > maxval)
6333     return false;
6334
6335   count = CONST_VECTOR_NUNITS (x);
6336   for (i = 1; i < count; i++)
6337     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6338       return false;
6339
6340   return true;
6341 }
6342
6343 /* Check of immediate shift constants are within range.  */
6344 bool
6345 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6346 {
6347   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6348   if (left)
6349     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6350   else
6351     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6352 }
6353
6354 /* Return true if X is a uniform vector where all elements
6355    are either the floating-point constant 0.0 or the
6356    integer constant 0.  */
6357 bool
6358 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6359 {
6360   return x == CONST0_RTX (mode);
6361 }
6362
6363 bool
6364 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6365 {
6366   HOST_WIDE_INT imm = INTVAL (x);
6367   int i;
6368
6369   for (i = 0; i < 8; i++)
6370     {
6371       unsigned int byte = imm & 0xff;
6372       if (byte != 0xff && byte != 0)
6373        return false;
6374       imm >>= 8;
6375     }
6376
6377   return true;
6378 }
6379
6380 /* Return a const_int vector of VAL.  */
6381 rtx
6382 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6383 {
6384   int nunits = GET_MODE_NUNITS (mode);
6385   rtvec v = rtvec_alloc (nunits);
6386   int i;
6387
6388   for (i=0; i < nunits; i++)
6389     RTVEC_ELT (v, i) = GEN_INT (val);
6390
6391   return gen_rtx_CONST_VECTOR (mode, v);
6392 }
6393
6394 /* Construct and return a PARALLEL RTX vector.  */
6395 rtx
6396 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6397 {
6398   int nunits = GET_MODE_NUNITS (mode);
6399   rtvec v = rtvec_alloc (nunits / 2);
6400   int base = high ? nunits / 2 : 0;
6401   rtx t1;
6402   int i;
6403
6404   for (i=0; i < nunits / 2; i++)
6405     RTVEC_ELT (v, i) = GEN_INT (base + i);
6406
6407   t1 = gen_rtx_PARALLEL (mode, v);
6408   return t1;
6409 }
6410
6411 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6412    HIGH (exclusive).  */
6413 void
6414 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6415 {
6416   HOST_WIDE_INT lane;
6417   gcc_assert (GET_CODE (operand) == CONST_INT);
6418   lane = INTVAL (operand);
6419
6420   if (lane < low || lane >= high)
6421     error ("lane out of range");
6422 }
6423
6424 void
6425 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6426 {
6427   gcc_assert (GET_CODE (operand) == CONST_INT);
6428   HOST_WIDE_INT lane = INTVAL (operand);
6429
6430   if (lane < low || lane >= high)
6431     error ("constant out of range");
6432 }
6433
6434 /* Emit code to reinterpret one AdvSIMD type as another,
6435    without altering bits.  */
6436 void
6437 aarch64_simd_reinterpret (rtx dest, rtx src)
6438 {
6439   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6440 }
6441
6442 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6443    registers).  */
6444 void
6445 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6446                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6447                             rtx op1)
6448 {
6449   rtx mem = gen_rtx_MEM (mode, destaddr);
6450   rtx tmp1 = gen_reg_rtx (mode);
6451   rtx tmp2 = gen_reg_rtx (mode);
6452
6453   emit_insn (intfn (tmp1, op1, tmp2));
6454
6455   emit_move_insn (mem, tmp1);
6456   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6457   emit_move_insn (mem, tmp2);
6458 }
6459
6460 /* Return TRUE if OP is a valid vector addressing mode.  */
6461 bool
6462 aarch64_simd_mem_operand_p (rtx op)
6463 {
6464   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6465                         || GET_CODE (XEXP (op, 0)) == REG);
6466 }
6467
6468 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6469    not to early-clobber SRC registers in the process.
6470
6471    We assume that the operands described by SRC and DEST represent a
6472    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6473    number of components into which the copy has been decomposed.  */
6474 void
6475 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6476                                 rtx *src, unsigned int count)
6477 {
6478   unsigned int i;
6479
6480   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6481       || REGNO (operands[0]) < REGNO (operands[1]))
6482     {
6483       for (i = 0; i < count; i++)
6484         {
6485           operands[2 * i] = dest[i];
6486           operands[2 * i + 1] = src[i];
6487         }
6488     }
6489   else
6490     {
6491       for (i = 0; i < count; i++)
6492         {
6493           operands[2 * i] = dest[count - i - 1];
6494           operands[2 * i + 1] = src[count - i - 1];
6495         }
6496     }
6497 }
6498
6499 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6500    one of VSTRUCT modes: OI, CI or XI.  */
6501 int
6502 aarch64_simd_attr_length_move (rtx insn)
6503 {
6504   enum machine_mode mode;
6505
6506   extract_insn_cached (insn);
6507
6508   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6509     {
6510       mode = GET_MODE (recog_data.operand[0]);
6511       switch (mode)
6512         {
6513         case OImode:
6514           return 8;
6515         case CImode:
6516           return 12;
6517         case XImode:
6518           return 16;
6519         default:
6520           gcc_unreachable ();
6521         }
6522     }
6523   return 4;
6524 }
6525
6526 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6527    alignment of a vector to 128 bits.  */
6528 static HOST_WIDE_INT
6529 aarch64_simd_vector_alignment (const_tree type)
6530 {
6531   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6532   return MIN (align, 128);
6533 }
6534
6535 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6536 static bool
6537 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6538 {
6539   if (is_packed)
6540     return false;
6541
6542   /* We guarantee alignment for vectors up to 128-bits.  */
6543   if (tree_int_cst_compare (TYPE_SIZE (type),
6544                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6545     return false;
6546
6547   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6548   return true;
6549 }
6550
6551 static unsigned HOST_WIDE_INT
6552 aarch64_shift_truncation_mask (enum machine_mode mode)
6553 {
6554   return
6555     (aarch64_vector_mode_supported_p (mode)
6556      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6557 }
6558
6559 #ifndef TLS_SECTION_ASM_FLAG
6560 #define TLS_SECTION_ASM_FLAG 'T'
6561 #endif
6562
6563 void
6564 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6565                                tree decl ATTRIBUTE_UNUSED)
6566 {
6567   char flagchars[10], *f = flagchars;
6568
6569   /* If we have already declared this section, we can use an
6570      abbreviated form to switch back to it -- unless this section is
6571      part of a COMDAT groups, in which case GAS requires the full
6572      declaration every time.  */
6573   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6574       && (flags & SECTION_DECLARED))
6575     {
6576       fprintf (asm_out_file, "\t.section\t%s\n", name);
6577       return;
6578     }
6579
6580   if (!(flags & SECTION_DEBUG))
6581     *f++ = 'a';
6582   if (flags & SECTION_WRITE)
6583     *f++ = 'w';
6584   if (flags & SECTION_CODE)
6585     *f++ = 'x';
6586   if (flags & SECTION_SMALL)
6587     *f++ = 's';
6588   if (flags & SECTION_MERGE)
6589     *f++ = 'M';
6590   if (flags & SECTION_STRINGS)
6591     *f++ = 'S';
6592   if (flags & SECTION_TLS)
6593     *f++ = TLS_SECTION_ASM_FLAG;
6594   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6595     *f++ = 'G';
6596   *f = '\0';
6597
6598   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6599
6600   if (!(flags & SECTION_NOTYPE))
6601     {
6602       const char *type;
6603       const char *format;
6604
6605       if (flags & SECTION_BSS)
6606         type = "nobits";
6607       else
6608         type = "progbits";
6609
6610 #ifdef TYPE_OPERAND_FMT
6611       format = "," TYPE_OPERAND_FMT;
6612 #else
6613       format = ",@%s";
6614 #endif
6615
6616       fprintf (asm_out_file, format, type);
6617
6618       if (flags & SECTION_ENTSIZE)
6619         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6620       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6621         {
6622           if (TREE_CODE (decl) == IDENTIFIER_NODE)
6623             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6624           else
6625             fprintf (asm_out_file, ",%s,comdat",
6626                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6627         }
6628     }
6629
6630   putc ('\n', asm_out_file);
6631 }
6632
6633 /* Select a format to encode pointers in exception handling data.  */
6634 int
6635 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6636 {
6637    int type;
6638    switch (aarch64_cmodel)
6639      {
6640      case AARCH64_CMODEL_TINY:
6641      case AARCH64_CMODEL_TINY_PIC:
6642      case AARCH64_CMODEL_SMALL:
6643      case AARCH64_CMODEL_SMALL_PIC:
6644        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6645           for everything.  */
6646        type = DW_EH_PE_sdata4;
6647        break;
6648      default:
6649        /* No assumptions here.  8-byte relocs required.  */
6650        type = DW_EH_PE_sdata8;
6651        break;
6652      }
6653    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6654 }
6655
6656 /* Emit load exclusive.  */
6657
6658 static void
6659 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6660                              rtx mem, rtx model_rtx)
6661 {
6662   rtx (*gen) (rtx, rtx, rtx);
6663
6664   switch (mode)
6665     {
6666     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6667     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6668     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6669     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6670     default:
6671       gcc_unreachable ();
6672     }
6673
6674   emit_insn (gen (rval, mem, model_rtx));
6675 }
6676
6677 /* Emit store exclusive.  */
6678
6679 static void
6680 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6681                               rtx rval, rtx mem, rtx model_rtx)
6682 {
6683   rtx (*gen) (rtx, rtx, rtx, rtx);
6684
6685   switch (mode)
6686     {
6687     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6688     case HImode: gen = gen_aarch64_store_exclusivehi; break;
6689     case SImode: gen = gen_aarch64_store_exclusivesi; break;
6690     case DImode: gen = gen_aarch64_store_exclusivedi; break;
6691     default:
6692       gcc_unreachable ();
6693     }
6694
6695   emit_insn (gen (bval, rval, mem, model_rtx));
6696 }
6697
6698 /* Mark the previous jump instruction as unlikely.  */
6699
6700 static void
6701 aarch64_emit_unlikely_jump (rtx insn)
6702 {
6703   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6704
6705   insn = emit_jump_insn (insn);
6706   add_reg_note (insn, REG_BR_PROB, very_unlikely);
6707 }
6708
6709 /* Expand a compare and swap pattern.  */
6710
6711 void
6712 aarch64_expand_compare_and_swap (rtx operands[])
6713 {
6714   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6715   enum machine_mode mode, cmp_mode;
6716   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6717
6718   bval = operands[0];
6719   rval = operands[1];
6720   mem = operands[2];
6721   oldval = operands[3];
6722   newval = operands[4];
6723   is_weak = operands[5];
6724   mod_s = operands[6];
6725   mod_f = operands[7];
6726   mode = GET_MODE (mem);
6727   cmp_mode = mode;
6728
6729   /* Normally the succ memory model must be stronger than fail, but in the
6730      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6731      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
6732
6733   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6734       && INTVAL (mod_s) == MEMMODEL_RELEASE)
6735     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6736
6737   switch (mode)
6738     {
6739     case QImode:
6740     case HImode:
6741       /* For short modes, we're going to perform the comparison in SImode,
6742          so do the zero-extension now.  */
6743       cmp_mode = SImode;
6744       rval = gen_reg_rtx (SImode);
6745       oldval = convert_modes (SImode, mode, oldval, true);
6746       /* Fall through.  */
6747
6748     case SImode:
6749     case DImode:
6750       /* Force the value into a register if needed.  */
6751       if (!aarch64_plus_operand (oldval, mode))
6752         oldval = force_reg (cmp_mode, oldval);
6753       break;
6754
6755     default:
6756       gcc_unreachable ();
6757     }
6758
6759   switch (mode)
6760     {
6761     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6762     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6763     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6764     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6765     default:
6766       gcc_unreachable ();
6767     }
6768
6769   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6770
6771   if (mode == QImode || mode == HImode)
6772     emit_move_insn (operands[1], gen_lowpart (mode, rval));
6773
6774   x = gen_rtx_REG (CCmode, CC_REGNUM);
6775   x = gen_rtx_EQ (SImode, x, const0_rtx);
6776   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6777 }
6778
6779 /* Split a compare and swap pattern.  */
6780
6781 void
6782 aarch64_split_compare_and_swap (rtx operands[])
6783 {
6784   rtx rval, mem, oldval, newval, scratch;
6785   enum machine_mode mode;
6786   bool is_weak;
6787   rtx label1, label2, x, cond;
6788
6789   rval = operands[0];
6790   mem = operands[1];
6791   oldval = operands[2];
6792   newval = operands[3];
6793   is_weak = (operands[4] != const0_rtx);
6794   scratch = operands[7];
6795   mode = GET_MODE (mem);
6796
6797   label1 = NULL_RTX;
6798   if (!is_weak)
6799     {
6800       label1 = gen_label_rtx ();
6801       emit_label (label1);
6802     }
6803   label2 = gen_label_rtx ();
6804
6805   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6806
6807   cond = aarch64_gen_compare_reg (NE, rval, oldval);
6808   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6809   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6810                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6811   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6812
6813   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6814
6815   if (!is_weak)
6816     {
6817       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6818       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6819                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6820       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6821     }
6822   else
6823     {
6824       cond = gen_rtx_REG (CCmode, CC_REGNUM);
6825       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6826       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6827     }
6828
6829   emit_label (label2);
6830 }
6831
6832 /* Split an atomic operation.  */
6833
6834 void
6835 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6836                      rtx value, rtx model_rtx, rtx cond)
6837 {
6838   enum machine_mode mode = GET_MODE (mem);
6839   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
6840   rtx label, x;
6841
6842   label = gen_label_rtx ();
6843   emit_label (label);
6844
6845   if (new_out)
6846     new_out = gen_lowpart (wmode, new_out);
6847   if (old_out)
6848     old_out = gen_lowpart (wmode, old_out);
6849   else
6850     old_out = new_out;
6851   value = simplify_gen_subreg (wmode, value, mode, 0);
6852
6853   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
6854
6855   switch (code)
6856     {
6857     case SET:
6858       new_out = value;
6859       break;
6860
6861     case NOT:
6862       x = gen_rtx_AND (wmode, old_out, value);
6863       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6864       x = gen_rtx_NOT (wmode, new_out);
6865       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6866       break;
6867
6868     case MINUS:
6869       if (CONST_INT_P (value))
6870         {
6871           value = GEN_INT (-INTVAL (value));
6872           code = PLUS;
6873         }
6874       /* Fall through.  */
6875
6876     default:
6877       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
6878       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6879       break;
6880     }
6881
6882   aarch64_emit_store_exclusive (mode, cond, mem,
6883                                 gen_lowpart (mode, new_out), model_rtx);
6884
6885   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6886   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6887                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
6888   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6889 }
6890
6891 static void
6892 aarch64_start_file (void)
6893 {
6894   if (selected_arch)
6895     asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
6896   else if (selected_cpu)
6897     asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
6898   default_file_start();
6899 }
6900
6901 /* Target hook for c_mode_for_suffix.  */
6902 static enum machine_mode
6903 aarch64_c_mode_for_suffix (char suffix)
6904 {
6905   if (suffix == 'q')
6906     return TFmode;
6907
6908   return VOIDmode;
6909 }
6910
6911 /* We can only represent floating point constants which will fit in
6912    "quarter-precision" values.  These values are characterised by
6913    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
6914    by:
6915
6916    (-1)^s * (n/16) * 2^r
6917
6918    Where:
6919      's' is the sign bit.
6920      'n' is an integer in the range 16 <= n <= 31.
6921      'r' is an integer in the range -3 <= r <= 4.  */
6922
6923 /* Return true iff X can be represented by a quarter-precision
6924    floating point immediate operand X.  Note, we cannot represent 0.0.  */
6925 bool
6926 aarch64_float_const_representable_p (rtx x)
6927 {
6928   /* This represents our current view of how many bits
6929      make up the mantissa.  */
6930   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6931   int sign, exponent;
6932   unsigned HOST_WIDE_INT mantissa, mask;
6933   HOST_WIDE_INT m1, m2;
6934   REAL_VALUE_TYPE r, m;
6935
6936   if (!CONST_DOUBLE_P (x))
6937     return false;
6938
6939   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6940
6941   /* We cannot represent infinities, NaNs or +/-zero.  We won't
6942      know if we have +zero until we analyse the mantissa, but we
6943      can reject the other invalid values.  */
6944   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
6945       || REAL_VALUE_MINUS_ZERO (r))
6946     return false;
6947
6948   /* Extract sign and exponent.  */
6949   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6950   r = real_value_abs (&r);
6951   exponent = REAL_EXP (&r);
6952
6953   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6954      highest (sign) bit, with a fixed binary point at bit point_pos.
6955      m1 holds the low part of the mantissa, m2 the high part.
6956      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
6957      bits for the mantissa, this can fail (low bits will be lost).  */
6958   real_ldexp (&m, &r, point_pos - exponent);
6959   REAL_VALUE_TO_INT (&m1, &m2, m);
6960
6961   /* If the low part of the mantissa has bits set we cannot represent
6962      the value.  */
6963   if (m1 != 0)
6964     return false;
6965   /* We have rejected the lower HOST_WIDE_INT, so update our
6966      understanding of how many bits lie in the mantissa and
6967      look only at the high HOST_WIDE_INT.  */
6968   mantissa = m2;
6969   point_pos -= HOST_BITS_PER_WIDE_INT;
6970
6971   /* We can only represent values with a mantissa of the form 1.xxxx.  */
6972   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6973   if ((mantissa & mask) != 0)
6974     return false;
6975
6976   /* Having filtered unrepresentable values, we may now remove all
6977      but the highest 5 bits.  */
6978   mantissa >>= point_pos - 5;
6979
6980   /* We cannot represent the value 0.0, so reject it.  This is handled
6981      elsewhere.  */
6982   if (mantissa == 0)
6983     return false;
6984
6985   /* Then, as bit 4 is always set, we can mask it off, leaving
6986      the mantissa in the range [0, 15].  */
6987   mantissa &= ~(1 << 4);
6988   gcc_assert (mantissa <= 15);
6989
6990   /* GCC internally does not use IEEE754-like encoding (where normalized
6991      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
6992      Our mantissa values are shifted 4 places to the left relative to
6993      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
6994      by 5 places to correct for GCC's representation.  */
6995   exponent = 5 - exponent;
6996
6997   return (exponent >= 0 && exponent <= 7);
6998 }
6999
7000 char*
7001 aarch64_output_simd_mov_immediate (rtx *const_vector,
7002                                    enum machine_mode mode,
7003                                    unsigned width)
7004 {
7005   int is_valid;
7006   unsigned char widthc;
7007   int lane_width_bits;
7008   static char templ[40];
7009   int shift = 0, mvn = 0;
7010   const char *mnemonic;
7011   unsigned int lane_count = 0;
7012
7013   is_valid =
7014     aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7015                                            const_vector, &lane_width_bits,
7016                                            &widthc, &mvn, &shift);
7017   gcc_assert (is_valid);
7018
7019   mode = GET_MODE_INNER (mode);
7020   if (mode == SFmode || mode == DFmode)
7021     {
7022       bool zero_p =
7023         aarch64_float_const_zero_rtx_p (*const_vector);
7024       gcc_assert (shift == 0);
7025       mnemonic = zero_p ? "movi" : "fmov";
7026     }
7027   else
7028     mnemonic = mvn ? "mvni" : "movi";
7029
7030   gcc_assert (lane_width_bits != 0);
7031   lane_count = width / lane_width_bits;
7032
7033   if (lane_count == 1)
7034     snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7035   else if (shift)
7036     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7037               mnemonic, lane_count, widthc, shift);
7038   else
7039     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7040               mnemonic, lane_count, widthc);
7041   return templ;
7042 }
7043
7044 /* Split operands into moves from op[1] + op[2] into op[0].  */
7045
7046 void
7047 aarch64_split_combinev16qi (rtx operands[3])
7048 {
7049   unsigned int dest = REGNO (operands[0]);
7050   unsigned int src1 = REGNO (operands[1]);
7051   unsigned int src2 = REGNO (operands[2]);
7052   enum machine_mode halfmode = GET_MODE (operands[1]);
7053   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7054   rtx destlo, desthi;
7055
7056   gcc_assert (halfmode == V16QImode);
7057
7058   if (src1 == dest && src2 == dest + halfregs)
7059     {
7060       /* No-op move.  Can't split to nothing; emit something.  */
7061       emit_note (NOTE_INSN_DELETED);
7062       return;
7063     }
7064
7065   /* Preserve register attributes for variable tracking.  */
7066   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7067   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7068                                GET_MODE_SIZE (halfmode));
7069
7070   /* Special case of reversed high/low parts.  */
7071   if (reg_overlap_mentioned_p (operands[2], destlo)
7072       && reg_overlap_mentioned_p (operands[1], desthi))
7073     {
7074       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7075       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7076       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7077     }
7078   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7079     {
7080       /* Try to avoid unnecessary moves if part of the result
7081          is in the right place already.  */
7082       if (src1 != dest)
7083         emit_move_insn (destlo, operands[1]);
7084       if (src2 != dest + halfregs)
7085         emit_move_insn (desthi, operands[2]);
7086     }
7087   else
7088     {
7089       if (src2 != dest + halfregs)
7090         emit_move_insn (desthi, operands[2]);
7091       if (src1 != dest)
7092         emit_move_insn (destlo, operands[1]);
7093     }
7094 }
7095
7096 /* vec_perm support.  */
7097
7098 #define MAX_VECT_LEN 16
7099
7100 struct expand_vec_perm_d
7101 {
7102   rtx target, op0, op1;
7103   unsigned char perm[MAX_VECT_LEN];
7104   enum machine_mode vmode;
7105   unsigned char nelt;
7106   bool one_vector_p;
7107   bool testing_p;
7108 };
7109
7110 /* Generate a variable permutation.  */
7111
7112 static void
7113 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7114 {
7115   enum machine_mode vmode = GET_MODE (target);
7116   bool one_vector_p = rtx_equal_p (op0, op1);
7117
7118   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7119   gcc_checking_assert (GET_MODE (op0) == vmode);
7120   gcc_checking_assert (GET_MODE (op1) == vmode);
7121   gcc_checking_assert (GET_MODE (sel) == vmode);
7122   gcc_checking_assert (TARGET_SIMD);
7123
7124   if (one_vector_p)
7125     {
7126       if (vmode == V8QImode)
7127         {
7128           /* Expand the argument to a V16QI mode by duplicating it.  */
7129           rtx pair = gen_reg_rtx (V16QImode);
7130           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7131           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7132         }
7133       else
7134         {
7135           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7136         }
7137     }
7138   else
7139     {
7140       rtx pair;
7141
7142       if (vmode == V8QImode)
7143         {
7144           pair = gen_reg_rtx (V16QImode);
7145           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7146           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7147         }
7148       else
7149         {
7150           pair = gen_reg_rtx (OImode);
7151           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7152           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7153         }
7154     }
7155 }
7156
7157 void
7158 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7159 {
7160   enum machine_mode vmode = GET_MODE (target);
7161   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7162   bool one_vector_p = rtx_equal_p (op0, op1);
7163   rtx rmask[MAX_VECT_LEN], mask;
7164
7165   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7166
7167   /* The TBL instruction does not use a modulo index, so we must take care
7168      of that ourselves.  */
7169   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7170   for (i = 0; i < nelt; ++i)
7171     rmask[i] = mask;
7172   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7173   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7174
7175   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7176 }
7177
7178 /* Recognize patterns suitable for the TRN instructions.  */
7179 static bool
7180 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7181 {
7182   unsigned int i, odd, mask, nelt = d->nelt;
7183   rtx out, in0, in1, x;
7184   rtx (*gen) (rtx, rtx, rtx);
7185   enum machine_mode vmode = d->vmode;
7186
7187   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7188     return false;
7189
7190   /* Note that these are little-endian tests.
7191      We correct for big-endian later.  */
7192   if (d->perm[0] == 0)
7193     odd = 0;
7194   else if (d->perm[0] == 1)
7195     odd = 1;
7196   else
7197     return false;
7198   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7199
7200   for (i = 0; i < nelt; i += 2)
7201     {
7202       if (d->perm[i] != i + odd)
7203         return false;
7204       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7205         return false;
7206     }
7207
7208   /* Success!  */
7209   if (d->testing_p)
7210     return true;
7211
7212   in0 = d->op0;
7213   in1 = d->op1;
7214   if (BYTES_BIG_ENDIAN)
7215     {
7216       x = in0, in0 = in1, in1 = x;
7217       odd = !odd;
7218     }
7219   out = d->target;
7220
7221   if (odd)
7222     {
7223       switch (vmode)
7224         {
7225         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7226         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7227         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7228         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7229         case V4SImode: gen = gen_aarch64_trn2v4si; break;
7230         case V2SImode: gen = gen_aarch64_trn2v2si; break;
7231         case V2DImode: gen = gen_aarch64_trn2v2di; break;
7232         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7233         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7234         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7235         default:
7236           return false;
7237         }
7238     }
7239   else
7240     {
7241       switch (vmode)
7242         {
7243         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7244         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7245         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7246         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7247         case V4SImode: gen = gen_aarch64_trn1v4si; break;
7248         case V2SImode: gen = gen_aarch64_trn1v2si; break;
7249         case V2DImode: gen = gen_aarch64_trn1v2di; break;
7250         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7251         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7252         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7253         default:
7254           return false;
7255         }
7256     }
7257
7258   emit_insn (gen (out, in0, in1));
7259   return true;
7260 }
7261
7262 /* Recognize patterns suitable for the UZP instructions.  */
7263 static bool
7264 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7265 {
7266   unsigned int i, odd, mask, nelt = d->nelt;
7267   rtx out, in0, in1, x;
7268   rtx (*gen) (rtx, rtx, rtx);
7269   enum machine_mode vmode = d->vmode;
7270
7271   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7272     return false;
7273
7274   /* Note that these are little-endian tests.
7275      We correct for big-endian later.  */
7276   if (d->perm[0] == 0)
7277     odd = 0;
7278   else if (d->perm[0] == 1)
7279     odd = 1;
7280   else
7281     return false;
7282   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7283
7284   for (i = 0; i < nelt; i++)
7285     {
7286       unsigned elt = (i * 2 + odd) & mask;
7287       if (d->perm[i] != elt)
7288         return false;
7289     }
7290
7291   /* Success!  */
7292   if (d->testing_p)
7293     return true;
7294
7295   in0 = d->op0;
7296   in1 = d->op1;
7297   if (BYTES_BIG_ENDIAN)
7298     {
7299       x = in0, in0 = in1, in1 = x;
7300       odd = !odd;
7301     }
7302   out = d->target;
7303
7304   if (odd)
7305     {
7306       switch (vmode)
7307         {
7308         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7309         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7310         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7311         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7312         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7313         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7314         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7315         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7316         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7317         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7318         default:
7319           return false;
7320         }
7321     }
7322   else
7323     {
7324       switch (vmode)
7325         {
7326         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7327         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7328         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7329         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7330         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7331         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7332         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7333         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7334         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7335         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7336         default:
7337           return false;
7338         }
7339     }
7340
7341   emit_insn (gen (out, in0, in1));
7342   return true;
7343 }
7344
7345 /* Recognize patterns suitable for the ZIP instructions.  */
7346 static bool
7347 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7348 {
7349   unsigned int i, high, mask, nelt = d->nelt;
7350   rtx out, in0, in1, x;
7351   rtx (*gen) (rtx, rtx, rtx);
7352   enum machine_mode vmode = d->vmode;
7353
7354   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7355     return false;
7356
7357   /* Note that these are little-endian tests.
7358      We correct for big-endian later.  */
7359   high = nelt / 2;
7360   if (d->perm[0] == high)
7361     /* Do Nothing.  */
7362     ;
7363   else if (d->perm[0] == 0)
7364     high = 0;
7365   else
7366     return false;
7367   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7368
7369   for (i = 0; i < nelt / 2; i++)
7370     {
7371       unsigned elt = (i + high) & mask;
7372       if (d->perm[i * 2] != elt)
7373         return false;
7374       elt = (elt + nelt) & mask;
7375       if (d->perm[i * 2 + 1] != elt)
7376         return false;
7377     }
7378
7379   /* Success!  */
7380   if (d->testing_p)
7381     return true;
7382
7383   in0 = d->op0;
7384   in1 = d->op1;
7385   if (BYTES_BIG_ENDIAN)
7386     {
7387       x = in0, in0 = in1, in1 = x;
7388       high = !high;
7389     }
7390   out = d->target;
7391
7392   if (high)
7393     {
7394       switch (vmode)
7395         {
7396         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7397         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7398         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7399         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7400         case V4SImode: gen = gen_aarch64_zip2v4si; break;
7401         case V2SImode: gen = gen_aarch64_zip2v2si; break;
7402         case V2DImode: gen = gen_aarch64_zip2v2di; break;
7403         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7404         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7405         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7406         default:
7407           return false;
7408         }
7409     }
7410   else
7411     {
7412       switch (vmode)
7413         {
7414         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7415         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7416         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7417         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7418         case V4SImode: gen = gen_aarch64_zip1v4si; break;
7419         case V2SImode: gen = gen_aarch64_zip1v2si; break;
7420         case V2DImode: gen = gen_aarch64_zip1v2di; break;
7421         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7422         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7423         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7424         default:
7425           return false;
7426         }
7427     }
7428
7429   emit_insn (gen (out, in0, in1));
7430   return true;
7431 }
7432
7433 static bool
7434 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7435 {
7436   rtx rperm[MAX_VECT_LEN], sel;
7437   enum machine_mode vmode = d->vmode;
7438   unsigned int i, nelt = d->nelt;
7439
7440   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7441      numbering of elements for big-endian, we must reverse the order.  */
7442   if (BYTES_BIG_ENDIAN)
7443     return false;
7444
7445   if (d->testing_p)
7446     return true;
7447
7448   /* Generic code will try constant permutation twice.  Once with the
7449      original mode and again with the elements lowered to QImode.
7450      So wait and don't do the selector expansion ourselves.  */
7451   if (vmode != V8QImode && vmode != V16QImode)
7452     return false;
7453
7454   for (i = 0; i < nelt; ++i)
7455     rperm[i] = GEN_INT (d->perm[i]);
7456   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7457   sel = force_reg (vmode, sel);
7458
7459   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7460   return true;
7461 }
7462
7463 static bool
7464 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7465 {
7466   /* The pattern matching functions above are written to look for a small
7467      number to begin the sequence (0, 1, N/2).  If we begin with an index
7468      from the second operand, we can swap the operands.  */
7469   if (d->perm[0] >= d->nelt)
7470     {
7471       unsigned i, nelt = d->nelt;
7472       rtx x;
7473
7474       for (i = 0; i < nelt; ++i)
7475         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7476
7477       x = d->op0;
7478       d->op0 = d->op1;
7479       d->op1 = x;
7480     }
7481
7482   if (TARGET_SIMD)
7483     {
7484       if (aarch64_evpc_zip (d))
7485         return true;
7486       else if (aarch64_evpc_uzp (d))
7487         return true;
7488       else if (aarch64_evpc_trn (d))
7489         return true;
7490       return aarch64_evpc_tbl (d);
7491     }
7492   return false;
7493 }
7494
7495 /* Expand a vec_perm_const pattern.  */
7496
7497 bool
7498 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7499 {
7500   struct expand_vec_perm_d d;
7501   int i, nelt, which;
7502
7503   d.target = target;
7504   d.op0 = op0;
7505   d.op1 = op1;
7506
7507   d.vmode = GET_MODE (target);
7508   gcc_assert (VECTOR_MODE_P (d.vmode));
7509   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7510   d.testing_p = false;
7511
7512   for (i = which = 0; i < nelt; ++i)
7513     {
7514       rtx e = XVECEXP (sel, 0, i);
7515       int ei = INTVAL (e) & (2 * nelt - 1);
7516       which |= (ei < nelt ? 1 : 2);
7517       d.perm[i] = ei;
7518     }
7519
7520   switch (which)
7521     {
7522     default:
7523       gcc_unreachable ();
7524
7525     case 3:
7526       d.one_vector_p = false;
7527       if (!rtx_equal_p (op0, op1))
7528         break;
7529
7530       /* The elements of PERM do not suggest that only the first operand
7531          is used, but both operands are identical.  Allow easier matching
7532          of the permutation by folding the permutation into the single
7533          input vector.  */
7534       /* Fall Through.  */
7535     case 2:
7536       for (i = 0; i < nelt; ++i)
7537         d.perm[i] &= nelt - 1;
7538       d.op0 = op1;
7539       d.one_vector_p = true;
7540       break;
7541
7542     case 1:
7543       d.op1 = op0;
7544       d.one_vector_p = true;
7545       break;
7546     }
7547
7548   return aarch64_expand_vec_perm_const_1 (&d);
7549 }
7550
7551 static bool
7552 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7553                                      const unsigned char *sel)
7554 {
7555   struct expand_vec_perm_d d;
7556   unsigned int i, nelt, which;
7557   bool ret;
7558
7559   d.vmode = vmode;
7560   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7561   d.testing_p = true;
7562   memcpy (d.perm, sel, nelt);
7563
7564   /* Calculate whether all elements are in one vector.  */
7565   for (i = which = 0; i < nelt; ++i)
7566     {
7567       unsigned char e = d.perm[i];
7568       gcc_assert (e < 2 * nelt);
7569       which |= (e < nelt ? 1 : 2);
7570     }
7571
7572   /* If all elements are from the second vector, reindex as if from the
7573      first vector.  */
7574   if (which == 2)
7575     for (i = 0; i < nelt; ++i)
7576       d.perm[i] -= nelt;
7577
7578   /* Check whether the mask can be applied to a single vector.  */
7579   d.one_vector_p = (which != 3);
7580
7581   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7582   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7583   if (!d.one_vector_p)
7584     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7585
7586   start_sequence ();
7587   ret = aarch64_expand_vec_perm_const_1 (&d);
7588   end_sequence ();
7589
7590   return ret;
7591 }
7592
7593 #undef TARGET_ADDRESS_COST
7594 #define TARGET_ADDRESS_COST aarch64_address_cost
7595
7596 /* This hook will determines whether unnamed bitfields affect the alignment
7597    of the containing structure.  The hook returns true if the structure
7598    should inherit the alignment requirements of an unnamed bitfield's
7599    type.  */
7600 #undef TARGET_ALIGN_ANON_BITFIELD
7601 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7602
7603 #undef TARGET_ASM_ALIGNED_DI_OP
7604 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7605
7606 #undef TARGET_ASM_ALIGNED_HI_OP
7607 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7608
7609 #undef TARGET_ASM_ALIGNED_SI_OP
7610 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7611
7612 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7613 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7614   hook_bool_const_tree_hwi_hwi_const_tree_true
7615
7616 #undef TARGET_ASM_FILE_START
7617 #define TARGET_ASM_FILE_START aarch64_start_file
7618
7619 #undef TARGET_ASM_OUTPUT_MI_THUNK
7620 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7621
7622 #undef TARGET_ASM_SELECT_RTX_SECTION
7623 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7624
7625 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7626 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7627
7628 #undef TARGET_BUILD_BUILTIN_VA_LIST
7629 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7630
7631 #undef TARGET_CALLEE_COPIES
7632 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7633
7634 #undef TARGET_CAN_ELIMINATE
7635 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7636
7637 #undef TARGET_CANNOT_FORCE_CONST_MEM
7638 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7639
7640 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7641 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7642
7643 /* Only the least significant bit is used for initialization guard
7644    variables.  */
7645 #undef TARGET_CXX_GUARD_MASK_BIT
7646 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7647
7648 #undef TARGET_C_MODE_FOR_SUFFIX
7649 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7650
7651 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7652 #undef  TARGET_DEFAULT_TARGET_FLAGS
7653 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7654 #endif
7655
7656 #undef TARGET_CLASS_MAX_NREGS
7657 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7658
7659 #undef TARGET_BUILTIN_DECL
7660 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7661
7662 #undef  TARGET_EXPAND_BUILTIN
7663 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7664
7665 #undef TARGET_EXPAND_BUILTIN_VA_START
7666 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7667
7668 #undef TARGET_FUNCTION_ARG
7669 #define TARGET_FUNCTION_ARG aarch64_function_arg
7670
7671 #undef TARGET_FUNCTION_ARG_ADVANCE
7672 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7673
7674 #undef TARGET_FUNCTION_ARG_BOUNDARY
7675 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7676
7677 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7678 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7679
7680 #undef TARGET_FUNCTION_VALUE
7681 #define TARGET_FUNCTION_VALUE aarch64_function_value
7682
7683 #undef TARGET_FUNCTION_VALUE_REGNO_P
7684 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7685
7686 #undef TARGET_FRAME_POINTER_REQUIRED
7687 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7688
7689 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7690 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7691
7692 #undef  TARGET_INIT_BUILTINS
7693 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
7694
7695 #undef TARGET_LEGITIMATE_ADDRESS_P
7696 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7697
7698 #undef TARGET_LEGITIMATE_CONSTANT_P
7699 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7700
7701 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7702 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7703
7704 #undef TARGET_MANGLE_TYPE
7705 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7706
7707 #undef TARGET_MEMORY_MOVE_COST
7708 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7709
7710 #undef TARGET_MUST_PASS_IN_STACK
7711 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7712
7713 /* This target hook should return true if accesses to volatile bitfields
7714    should use the narrowest mode possible.  It should return false if these
7715    accesses should use the bitfield container type.  */
7716 #undef TARGET_NARROW_VOLATILE_BITFIELD
7717 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7718
7719 #undef  TARGET_OPTION_OVERRIDE
7720 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7721
7722 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7723 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7724   aarch64_override_options_after_change
7725
7726 #undef TARGET_PASS_BY_REFERENCE
7727 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7728
7729 #undef TARGET_PREFERRED_RELOAD_CLASS
7730 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7731
7732 #undef TARGET_SECONDARY_RELOAD
7733 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7734
7735 #undef TARGET_SHIFT_TRUNCATION_MASK
7736 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7737
7738 #undef TARGET_SETUP_INCOMING_VARARGS
7739 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7740
7741 #undef TARGET_STRUCT_VALUE_RTX
7742 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
7743
7744 #undef TARGET_REGISTER_MOVE_COST
7745 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7746
7747 #undef TARGET_RETURN_IN_MEMORY
7748 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7749
7750 #undef TARGET_RETURN_IN_MSB
7751 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7752
7753 #undef TARGET_RTX_COSTS
7754 #define TARGET_RTX_COSTS aarch64_rtx_costs
7755
7756 #undef TARGET_TRAMPOLINE_INIT
7757 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7758
7759 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7760 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7761
7762 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7763 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7764
7765 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7766 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7767
7768 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7769 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7770
7771 #undef TARGET_VECTORIZE_BUILTINS
7772 #define TARGET_VECTORIZE_BUILTINS
7773
7774 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7775 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7776   aarch64_builtin_vectorized_function
7777
7778 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7779 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7780   aarch64_autovectorize_vector_sizes
7781
7782 /* Section anchor support.  */
7783
7784 #undef TARGET_MIN_ANCHOR_OFFSET
7785 #define TARGET_MIN_ANCHOR_OFFSET -256
7786
7787 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7788    byte offset; we can do much more for larger data types, but have no way
7789    to determine the size of the access.  We assume accesses are aligned.  */
7790 #undef TARGET_MAX_ANCHOR_OFFSET
7791 #define TARGET_MAX_ANCHOR_OFFSET 4095
7792
7793 #undef TARGET_VECTOR_ALIGNMENT
7794 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7795
7796 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7797 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7798   aarch64_simd_vector_alignment_reachable
7799
7800 /* vec_perm support.  */
7801
7802 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7803 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7804   aarch64_vectorize_vec_perm_const_ok
7805
7806
7807 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
7808
7809 struct gcc_target targetm = TARGET_INITIALIZER;
7810
7811 #include "gt-aarch64.h"