gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 146
 147 /* The processor for which instructions should be scheduled.  */
 148 enum aarch64_processor aarch64_tune = cortexa53;
 149
 150 /* The current tuning set.  */
 151 const struct tune_params *aarch64_tune_params;
 152
 153 /* Mask to specify which instructions we are allowed to generate.  */
 154 unsigned long aarch64_isa_flags = 0;
 155
 156 /* Mask to specify which instruction scheduling options should be used.  */
 157 unsigned long aarch64_tune_flags = 0;
 158
 159 /* Tuning parameters.  */
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS
 162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 163 #else
 164 #define NAMED_PARAM(NAME, VAL) (VAL)
 165 #endif
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_addrcost_table generic_addrcost_table =
 175 {
 176 #if HAVE_DESIGNATED_INITIALIZERS
 177   .addr_scale_costs =
 178 #endif
 179     {
 180       NAMED_PARAM (qi, 0),
 181       NAMED_PARAM (hi, 0),
 182       NAMED_PARAM (si, 0),
 183       NAMED_PARAM (ti, 0),
 184     },
 185   NAMED_PARAM (pre_modify, 0),
 186   NAMED_PARAM (post_modify, 0),
 187   NAMED_PARAM (register_offset, 0),
 188   NAMED_PARAM (register_extend, 0),
 189   NAMED_PARAM (imm_offset, 0)
 190 };
 191
 192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 193 __extension__
 194 #endif
 195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 196 {
 197 #if HAVE_DESIGNATED_INITIALIZERS
 198   .addr_scale_costs =
 199 #endif
 200     {
 201       NAMED_PARAM (qi, 0),
 202       NAMED_PARAM (hi, 1),
 203       NAMED_PARAM (si, 0),
 204       NAMED_PARAM (ti, 1),
 205     },
 206   NAMED_PARAM (pre_modify, 0),
 207   NAMED_PARAM (post_modify, 0),
 208   NAMED_PARAM (register_offset, 0),
 209   NAMED_PARAM (register_extend, 0),
 210   NAMED_PARAM (imm_offset, 0),
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct cpu_regmove_cost generic_regmove_cost =
 217 {
 218   NAMED_PARAM (GP2GP, 1),
 219   NAMED_PARAM (GP2FP, 2),
 220   NAMED_PARAM (FP2GP, 2),
 221   /* We currently do not provide direct support for TFmode Q->Q move.
 222      Therefore we need to raise the cost above 2 in order to have
 223      reload handle the situation.  */
 224   NAMED_PARAM (FP2FP, 4)
 225 };
 226
 227 /* Generic costs for vector insn classes.  */
 228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 229 __extension__
 230 #endif
 231 static const struct cpu_vector_cost generic_vector_cost =
 232 {
 233   NAMED_PARAM (scalar_stmt_cost, 1),
 234   NAMED_PARAM (scalar_load_cost, 1),
 235   NAMED_PARAM (scalar_store_cost, 1),
 236   NAMED_PARAM (vec_stmt_cost, 1),
 237   NAMED_PARAM (vec_to_scalar_cost, 1),
 238   NAMED_PARAM (scalar_to_vec_cost, 1),
 239   NAMED_PARAM (vec_align_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_store_cost, 1),
 242   NAMED_PARAM (vec_store_cost, 1),
 243   NAMED_PARAM (cond_taken_branch_cost, 3),
 244   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 245 };
 246
 247 /* Generic costs for vector insn classes.  */
 248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 249 __extension__
 250 #endif
 251 static const struct cpu_vector_cost cortexa57_vector_cost =
 252 {
 253   NAMED_PARAM (scalar_stmt_cost, 1),
 254   NAMED_PARAM (scalar_load_cost, 4),
 255   NAMED_PARAM (scalar_store_cost, 1),
 256   NAMED_PARAM (vec_stmt_cost, 3),
 257   NAMED_PARAM (vec_to_scalar_cost, 8),
 258   NAMED_PARAM (scalar_to_vec_cost, 8),
 259   NAMED_PARAM (vec_align_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_store_cost, 1),
 262   NAMED_PARAM (vec_store_cost, 1),
 263   NAMED_PARAM (cond_taken_branch_cost, 1),
 264   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 265 };
 266
 267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 268 __extension__
 269 #endif
 270 static const struct tune_params generic_tunings =
 271 {
 272   &cortexa57_extra_costs,
 273   &generic_addrcost_table,
 274   &generic_regmove_cost,
 275   &generic_vector_cost,
 276   NAMED_PARAM (memmov_cost, 4),
 277   NAMED_PARAM (issue_rate, 2)
 278 };
 279
 280 static const struct tune_params cortexa53_tunings =
 281 {
 282   &cortexa53_extra_costs,
 283   &generic_addrcost_table,
 284   &generic_regmove_cost,
 285   &generic_vector_cost,
 286   NAMED_PARAM (memmov_cost, 4),
 287   NAMED_PARAM (issue_rate, 2)
 288 };
 289
 290 static const struct tune_params cortexa57_tunings =
 291 {
 292   &cortexa57_extra_costs,
 293   &cortexa57_addrcost_table,
 294   &generic_regmove_cost,
 295   &cortexa57_vector_cost,
 296   NAMED_PARAM (memmov_cost, 4),
 297   NAMED_PARAM (issue_rate, 3)
 298 };
 299
 300 /* A processor implementing AArch64.  */
 301 struct processor
 302 {
 303   const char *const name;
 304   enum aarch64_processor core;
 305   const char *arch;
 306   const unsigned long flags;
 307   const struct tune_params *const tune;
 308 };
 309
 310 /* Processor cores implementing AArch64.  */
 311 static const struct processor all_cores[] =
 312 {
 313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 314   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 315 #include "aarch64-cores.def"
 316 #undef AARCH64_CORE
 317   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 318   {NULL, aarch64_none, NULL, 0, NULL}
 319 };
 320
 321 /* Architectures implementing AArch64.  */
 322 static const struct processor all_architectures[] =
 323 {
 324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 325   {NAME, CORE, #ARCH, FLAGS, NULL},
 326 #include "aarch64-arches.def"
 327 #undef AARCH64_ARCH
 328   {NULL, aarch64_none, NULL, 0, NULL}
 329 };
 330
 331 /* Target specification.  These are populated as commandline arguments
 332    are processed, or NULL if not specified.  */
 333 static const struct processor *selected_arch;
 334 static const struct processor *selected_cpu;
 335 static const struct processor *selected_tune;
 336
 337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 338
 339 /* An ISA extension in the co-processor and main instruction set space.  */
 340 struct aarch64_option_extension
 341 {
 342   const char *const name;
 343   const unsigned long flags_on;
 344   const unsigned long flags_off;
 345 };
 346
 347 /* ISA extensions in AArch64.  */
 348 static const struct aarch64_option_extension all_extensions[] =
 349 {
 350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 351   {NAME, FLAGS_ON, FLAGS_OFF},
 352 #include "aarch64-option-extensions.def"
 353 #undef AARCH64_OPT_EXTENSION
 354   {NULL, 0, 0}
 355 };
 356
 357 /* Used to track the size of an address when generating a pre/post
 358    increment address.  */
 359 static enum machine_mode aarch64_memory_reference_mode;
 360
 361 /* Used to force GTY into this file.  */
 362 static GTY(()) int gty_dummy;
 363
 364 /* A table of valid AArch64 "bitmask immediate" values for
 365    logical instructions.  */
 366
 367 #define AARCH64_NUM_BITMASKS  5334
 368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 369
 370 typedef enum aarch64_cond_code
 371 {
 372   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 373   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 374   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 375 }
 376 aarch64_cc;
 377
 378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 379
 380 /* The condition codes of the processor, and the inverse function.  */
 381 static const char * const aarch64_condition_codes[] =
 382 {
 383   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 384   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 385 };
 386
 387 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 388 unsigned
 389 aarch64_dbx_register_number (unsigned regno)
 390 {
 391    if (GP_REGNUM_P (regno))
 392      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 393    else if (regno == SP_REGNUM)
 394      return AARCH64_DWARF_SP;
 395    else if (FP_REGNUM_P (regno))
 396      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 397
 398    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 399       equivalent DWARF register.  */
 400    return DWARF_FRAME_REGISTERS;
 401 }
 402
 403 /* Return TRUE if MODE is any of the large INT modes.  */
 404 static bool
 405 aarch64_vect_struct_mode_p (enum machine_mode mode)
 406 {
 407   return mode == OImode || mode == CImode || mode == XImode;
 408 }
 409
 410 /* Return TRUE if MODE is any of the vector modes.  */
 411 static bool
 412 aarch64_vector_mode_p (enum machine_mode mode)
 413 {
 414   return aarch64_vector_mode_supported_p (mode)
 415          || aarch64_vect_struct_mode_p (mode);
 416 }
 417
 418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 419 static bool
 420 aarch64_array_mode_supported_p (enum machine_mode mode,
 421                                 unsigned HOST_WIDE_INT nelems)
 422 {
 423   if (TARGET_SIMD
 424       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 425       && (nelems >= 2 && nelems <= 4))
 426     return true;
 427
 428   return false;
 429 }
 430
 431 /* Implement HARD_REGNO_NREGS.  */
 432
 433 int
 434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 435 {
 436   switch (aarch64_regno_regclass (regno))
 437     {
 438     case FP_REGS:
 439     case FP_LO_REGS:
 440       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 441     default:
 442       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 443     }
 444   gcc_unreachable ();
 445 }
 446
 447 /* Implement HARD_REGNO_MODE_OK.  */
 448
 449 int
 450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 451 {
 452   if (GET_MODE_CLASS (mode) == MODE_CC)
 453     return regno == CC_REGNUM;
 454
 455   if (regno == SP_REGNUM)
 456     /* The purpose of comparing with ptr_mode is to support the
 457        global register variable associated with the stack pointer
 458        register via the syntax of asm ("wsp") in ILP32.  */
 459     return mode == Pmode || mode == ptr_mode;
 460
 461   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 462     return mode == Pmode;
 463
 464   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 465     return 1;
 466
 467   if (FP_REGNUM_P (regno))
 468     {
 469       if (aarch64_vect_struct_mode_p (mode))
 470         return
 471           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 472       else
 473         return 1;
 474     }
 475
 476   return 0;
 477 }
 478
 479 /* Return true if calls to DECL should be treated as
 480    long-calls (ie called via a register).  */
 481 static bool
 482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 483 {
 484   return false;
 485 }
 486
 487 /* Return true if calls to symbol-ref SYM should be treated as
 488    long-calls (ie called via a register).  */
 489 bool
 490 aarch64_is_long_call_p (rtx sym)
 491 {
 492   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 493 }
 494
 495 /* Return true if the offsets to a zero/sign-extract operation
 496    represent an expression that matches an extend operation.  The
 497    operands represent the paramters from
 498
 499    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 500 bool
 501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 502                                 rtx extract_imm)
 503 {
 504   HOST_WIDE_INT mult_val, extract_val;
 505
 506   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 507     return false;
 508
 509   mult_val = INTVAL (mult_imm);
 510   extract_val = INTVAL (extract_imm);
 511
 512   if (extract_val > 8
 513       && extract_val < GET_MODE_BITSIZE (mode)
 514       && exact_log2 (extract_val & ~7) > 0
 515       && (extract_val & 7) <= 4
 516       && mult_val == (1 << (extract_val & 7)))
 517     return true;
 518
 519   return false;
 520 }
 521
 522 /* Emit an insn that's a simple single-set.  Both the operands must be
 523    known to be valid.  */
 524 inline static rtx
 525 emit_set_insn (rtx x, rtx y)
 526 {
 527   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 528 }
 529
 530 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 531    return the rtx for register 0 in the proper mode.  */
 532 rtx
 533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 534 {
 535   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 536   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 537
 538   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 539   return cc_reg;
 540 }
 541
 542 /* Build the SYMBOL_REF for __tls_get_addr.  */
 543
 544 static GTY(()) rtx tls_get_addr_libfunc;
 545
 546 rtx
 547 aarch64_tls_get_addr (void)
 548 {
 549   if (!tls_get_addr_libfunc)
 550     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 551   return tls_get_addr_libfunc;
 552 }
 553
 554 /* Return the TLS model to use for ADDR.  */
 555
 556 static enum tls_model
 557 tls_symbolic_operand_type (rtx addr)
 558 {
 559   enum tls_model tls_kind = TLS_MODEL_NONE;
 560   rtx sym, addend;
 561
 562   if (GET_CODE (addr) == CONST)
 563     {
 564       split_const (addr, &sym, &addend);
 565       if (GET_CODE (sym) == SYMBOL_REF)
 566         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 567     }
 568   else if (GET_CODE (addr) == SYMBOL_REF)
 569     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 570
 571   return tls_kind;
 572 }
 573
 574 /* We'll allow lo_sum's in addresses in our legitimate addresses
 575    so that combine would take care of combining addresses where
 576    necessary, but for generation purposes, we'll generate the address
 577    as :
 578    RTL                               Absolute
 579    tmp = hi (symbol_ref);            adrp  x1, foo
 580    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 581                                      nop
 582
 583    PIC                               TLS
 584    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 585    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 586                                      bl   __tls_get_addr
 587                                      nop
 588
 589    Load TLS symbol, depending on TLS mechanism and TLS access model.
 590
 591    Global Dynamic - Traditional TLS:
 592    adrp tmp, :tlsgd:imm
 593    add  dest, tmp, #:tlsgd_lo12:imm
 594    bl   __tls_get_addr
 595
 596    Global Dynamic - TLS Descriptors:
 597    adrp dest, :tlsdesc:imm
 598    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 599    add  dest, dest, #:tlsdesc_lo12:imm
 600    blr  tmp
 601    mrs  tp, tpidr_el0
 602    add  dest, dest, tp
 603
 604    Initial Exec:
 605    mrs  tp, tpidr_el0
 606    adrp tmp, :gottprel:imm
 607    ldr  dest, [tmp, #:gottprel_lo12:imm]
 608    add  dest, dest, tp
 609
 610    Local Exec:
 611    mrs  tp, tpidr_el0
 612    add  t0, tp, #:tprel_hi12:imm
 613    add  t0, #:tprel_lo12_nc:imm
 614 */
 615
 616 static void
 617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 618                                    enum aarch64_symbol_type type)
 619 {
 620   switch (type)
 621     {
 622     case SYMBOL_SMALL_ABSOLUTE:
 623       {
 624         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 625         rtx tmp_reg = dest;
 626         enum machine_mode mode = GET_MODE (dest);
 627
 628         gcc_assert (mode == Pmode || mode == ptr_mode);
 629
 630         if (can_create_pseudo_p ())
 631           tmp_reg = gen_reg_rtx (mode);
 632
 633         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 634         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 635         return;
 636       }
 637
 638     case SYMBOL_TINY_ABSOLUTE:
 639       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 640       return;
 641
 642     case SYMBOL_SMALL_GOT:
 643       {
 644         /* In ILP32, the mode of dest can be either SImode or DImode,
 645            while the got entry is always of SImode size.  The mode of
 646            dest depends on how dest is used: if dest is assigned to a
 647            pointer (e.g. in the memory), it has SImode; it may have
 648            DImode if dest is dereferenced to access the memeory.
 649            This is why we have to handle three different ldr_got_small
 650            patterns here (two patterns for ILP32).  */
 651         rtx tmp_reg = dest;
 652         enum machine_mode mode = GET_MODE (dest);
 653
 654         if (can_create_pseudo_p ())
 655           tmp_reg = gen_reg_rtx (mode);
 656
 657         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 658         if (mode == ptr_mode)
 659           {
 660             if (mode == DImode)
 661               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 662             else
 663               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 664           }
 665         else
 666           {
 667             gcc_assert (mode == Pmode);
 668             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 669           }
 670
 671         return;
 672       }
 673
 674     case SYMBOL_SMALL_TLSGD:
 675       {
 676         rtx insns;
 677         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 678
 679         start_sequence ();
 680         emit_call_insn (gen_tlsgd_small (result, imm));
 681         insns = get_insns ();
 682         end_sequence ();
 683
 684         RTL_CONST_CALL_P (insns) = 1;
 685         emit_libcall_block (insns, dest, result, imm);
 686         return;
 687       }
 688
 689     case SYMBOL_SMALL_TLSDESC:
 690       {
 691         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 692         rtx tp;
 693
 694         emit_insn (gen_tlsdesc_small (imm));
 695         tp = aarch64_load_tp (NULL);
 696         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 697         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 698         return;
 699       }
 700
 701     case SYMBOL_SMALL_GOTTPREL:
 702       {
 703         rtx tmp_reg = gen_reg_rtx (Pmode);
 704         rtx tp = aarch64_load_tp (NULL);
 705         emit_insn (gen_tlsie_small (tmp_reg, imm));
 706         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 707         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 708         return;
 709       }
 710
 711     case SYMBOL_SMALL_TPREL:
 712       {
 713         rtx tp = aarch64_load_tp (NULL);
 714
 715         if (GET_MODE (dest) != Pmode)
 716           tp = gen_lowpart (GET_MODE (dest), tp);
 717
 718         emit_insn (gen_tlsle_small (dest, tp, imm));
 719         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 720         return;
 721       }
 722
 723     case SYMBOL_TINY_GOT:
 724       emit_insn (gen_ldr_got_tiny (dest, imm));
 725       return;
 726
 727     default:
 728       gcc_unreachable ();
 729     }
 730 }
 731
 732 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 733    handle all moves if !can_create_pseudo_p ().  The distinction is
 734    important because, unlike emit_move_insn, the move expanders know
 735    how to force Pmode objects into the constant pool even when the
 736    constant pool address is not itself legitimate.  */
 737 static rtx
 738 aarch64_emit_move (rtx dest, rtx src)
 739 {
 740   return (can_create_pseudo_p ()
 741           ? emit_move_insn (dest, src)
 742           : emit_move_insn_1 (dest, src));
 743 }
 744
 745 /* Split a 128-bit move operation into two 64-bit move operations,
 746    taking care to handle partial overlap of register to register
 747    copies.  Special cases are needed when moving between GP regs and
 748    FP regs.  SRC can be a register, constant or memory; DST a register
 749    or memory.  If either operand is memory it must not have any side
 750    effects.  */
 751 void
 752 aarch64_split_128bit_move (rtx dst, rtx src)
 753 {
 754   rtx dst_lo, dst_hi;
 755   rtx src_lo, src_hi;
 756
 757   enum machine_mode mode = GET_MODE (dst);
 758
 759   gcc_assert (mode == TImode || mode == TFmode);
 760   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 761   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 762
 763   if (REG_P (dst) && REG_P (src))
 764     {
 765       int src_regno = REGNO (src);
 766       int dst_regno = REGNO (dst);
 767
 768       /* Handle FP <-> GP regs.  */
 769       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 770         {
 771           src_lo = gen_lowpart (word_mode, src);
 772           src_hi = gen_highpart (word_mode, src);
 773
 774           if (mode == TImode)
 775             {
 776               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 777               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 778             }
 779           else
 780             {
 781               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 782               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 783             }
 784           return;
 785         }
 786       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 787         {
 788           dst_lo = gen_lowpart (word_mode, dst);
 789           dst_hi = gen_highpart (word_mode, dst);
 790
 791           if (mode == TImode)
 792             {
 793               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 794               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 795             }
 796           else
 797             {
 798               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 799               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 800             }
 801           return;
 802         }
 803     }
 804
 805   dst_lo = gen_lowpart (word_mode, dst);
 806   dst_hi = gen_highpart (word_mode, dst);
 807   src_lo = gen_lowpart (word_mode, src);
 808   src_hi = gen_highpart_mode (word_mode, mode, src);
 809
 810   /* At most one pairing may overlap.  */
 811   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 812     {
 813       aarch64_emit_move (dst_hi, src_hi);
 814       aarch64_emit_move (dst_lo, src_lo);
 815     }
 816   else
 817     {
 818       aarch64_emit_move (dst_lo, src_lo);
 819       aarch64_emit_move (dst_hi, src_hi);
 820     }
 821 }
 822
 823 bool
 824 aarch64_split_128bit_move_p (rtx dst, rtx src)
 825 {
 826   return (! REG_P (src)
 827           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 828 }
 829
 830 /* Split a complex SIMD combine.  */
 831
 832 void
 833 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 834 {
 835   enum machine_mode src_mode = GET_MODE (src1);
 836   enum machine_mode dst_mode = GET_MODE (dst);
 837
 838   gcc_assert (VECTOR_MODE_P (dst_mode));
 839
 840   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 841     {
 842       rtx (*gen) (rtx, rtx, rtx);
 843
 844       switch (src_mode)
 845         {
 846         case V8QImode:
 847           gen = gen_aarch64_simd_combinev8qi;
 848           break;
 849         case V4HImode:
 850           gen = gen_aarch64_simd_combinev4hi;
 851           break;
 852         case V2SImode:
 853           gen = gen_aarch64_simd_combinev2si;
 854           break;
 855         case V2SFmode:
 856           gen = gen_aarch64_simd_combinev2sf;
 857           break;
 858         case DImode:
 859           gen = gen_aarch64_simd_combinedi;
 860           break;
 861         case DFmode:
 862           gen = gen_aarch64_simd_combinedf;
 863           break;
 864         default:
 865           gcc_unreachable ();
 866         }
 867
 868       emit_insn (gen (dst, src1, src2));
 869       return;
 870     }
 871 }
 872
 873 /* Split a complex SIMD move.  */
 874
 875 void
 876 aarch64_split_simd_move (rtx dst, rtx src)
 877 {
 878   enum machine_mode src_mode = GET_MODE (src);
 879   enum machine_mode dst_mode = GET_MODE (dst);
 880
 881   gcc_assert (VECTOR_MODE_P (dst_mode));
 882
 883   if (REG_P (dst) && REG_P (src))
 884     {
 885       rtx (*gen) (rtx, rtx);
 886
 887       gcc_assert (VECTOR_MODE_P (src_mode));
 888
 889       switch (src_mode)
 890         {
 891         case V16QImode:
 892           gen = gen_aarch64_split_simd_movv16qi;
 893           break;
 894         case V8HImode:
 895           gen = gen_aarch64_split_simd_movv8hi;
 896           break;
 897         case V4SImode:
 898           gen = gen_aarch64_split_simd_movv4si;
 899           break;
 900         case V2DImode:
 901           gen = gen_aarch64_split_simd_movv2di;
 902           break;
 903         case V4SFmode:
 904           gen = gen_aarch64_split_simd_movv4sf;
 905           break;
 906         case V2DFmode:
 907           gen = gen_aarch64_split_simd_movv2df;
 908           break;
 909         default:
 910           gcc_unreachable ();
 911         }
 912
 913       emit_insn (gen (dst, src));
 914       return;
 915     }
 916 }
 917
 918 static rtx
 919 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 920 {
 921   if (can_create_pseudo_p ())
 922     return force_reg (mode, value);
 923   else
 924     {
 925       x = aarch64_emit_move (x, value);
 926       return x;
 927     }
 928 }
 929
 930
 931 static rtx
 932 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 933 {
 934   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 935     {
 936       rtx high;
 937       /* Load the full offset into a register.  This
 938          might be improvable in the future.  */
 939       high = GEN_INT (offset);
 940       offset = 0;
 941       high = aarch64_force_temporary (mode, temp, high);
 942       reg = aarch64_force_temporary (mode, temp,
 943                                      gen_rtx_PLUS (mode, high, reg));
 944     }
 945   return plus_constant (mode, reg, offset);
 946 }
 947
 948 void
 949 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 950 {
 951   enum machine_mode mode = GET_MODE (dest);
 952   unsigned HOST_WIDE_INT mask;
 953   int i;
 954   bool first;
 955   unsigned HOST_WIDE_INT val;
 956   bool subtargets;
 957   rtx subtarget;
 958   int one_match, zero_match;
 959
 960   gcc_assert (mode == SImode || mode == DImode);
 961
 962   /* Check on what type of symbol it is.  */
 963   if (GET_CODE (imm) == SYMBOL_REF
 964       || GET_CODE (imm) == LABEL_REF
 965       || GET_CODE (imm) == CONST)
 966     {
 967       rtx mem, base, offset;
 968       enum aarch64_symbol_type sty;
 969
 970       /* If we have (const (plus symbol offset)), separate out the offset
 971          before we start classifying the symbol.  */
 972       split_const (imm, &base, &offset);
 973
 974       sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
 975       switch (sty)
 976         {
 977         case SYMBOL_FORCE_TO_MEM:
 978           if (offset != const0_rtx
 979               && targetm.cannot_force_const_mem (mode, imm))
 980             {
 981               gcc_assert (can_create_pseudo_p ());
 982               base = aarch64_force_temporary (mode, dest, base);
 983               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 984               aarch64_emit_move (dest, base);
 985               return;
 986             }
 987           mem = force_const_mem (ptr_mode, imm);
 988           gcc_assert (mem);
 989           if (mode != ptr_mode)
 990             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 991           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 992           return;
 993
 994         case SYMBOL_SMALL_TLSGD:
 995         case SYMBOL_SMALL_TLSDESC:
 996         case SYMBOL_SMALL_GOTTPREL:
 997         case SYMBOL_SMALL_GOT:
 998         case SYMBOL_TINY_GOT:
 999           if (offset != const0_rtx)
1000             {
1001               gcc_assert(can_create_pseudo_p ());
1002               base = aarch64_force_temporary (mode, dest, base);
1003               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1004               aarch64_emit_move (dest, base);
1005               return;
1006             }
1007           /* FALLTHRU */
1008
1009         case SYMBOL_SMALL_TPREL:
1010         case SYMBOL_SMALL_ABSOLUTE:
1011         case SYMBOL_TINY_ABSOLUTE:
1012           aarch64_load_symref_appropriately (dest, imm, sty);
1013           return;
1014
1015         default:
1016           gcc_unreachable ();
1017         }
1018     }
1019
1020   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1021     {
1022       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1023       return;
1024     }
1025
1026   if (!CONST_INT_P (imm))
1027     {
1028       if (GET_CODE (imm) == HIGH)
1029         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1030       else
1031         {
1032           rtx mem = force_const_mem (mode, imm);
1033           gcc_assert (mem);
1034           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1035         }
1036
1037       return;
1038     }
1039
1040   if (mode == SImode)
1041     {
1042       /* We know we can't do this in 1 insn, and we must be able to do it
1043          in two; so don't mess around looking for sequences that don't buy
1044          us anything.  */
1045       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1046       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1047                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1048       return;
1049     }
1050
1051   /* Remaining cases are all for DImode.  */
1052
1053   val = INTVAL (imm);
1054   subtargets = optimize && can_create_pseudo_p ();
1055
1056   one_match = 0;
1057   zero_match = 0;
1058   mask = 0xffff;
1059
1060   for (i = 0; i < 64; i += 16, mask <<= 16)
1061     {
1062       if ((val & mask) == 0)
1063         zero_match++;
1064       else if ((val & mask) == mask)
1065         one_match++;
1066     }
1067
1068   if (one_match == 2)
1069     {
1070       mask = 0xffff;
1071       for (i = 0; i < 64; i += 16, mask <<= 16)
1072         {
1073           if ((val & mask) != mask)
1074             {
1075               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1076               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1077                                          GEN_INT ((val >> i) & 0xffff)));
1078               return;
1079             }
1080         }
1081       gcc_unreachable ();
1082     }
1083
1084   if (zero_match == 2)
1085     goto simple_sequence;
1086
1087   mask = 0x0ffff0000UL;
1088   for (i = 16; i < 64; i += 16, mask <<= 16)
1089     {
1090       HOST_WIDE_INT comp = mask & ~(mask - 1);
1091
1092       if (aarch64_uimm12_shift (val - (val & mask)))
1093         {
1094           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1095
1096           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1097           emit_insn (gen_adddi3 (dest, subtarget,
1098                                  GEN_INT (val - (val & mask))));
1099           return;
1100         }
1101       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1102         {
1103           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1104
1105           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1106                                   GEN_INT ((val + comp) & mask)));
1107           emit_insn (gen_adddi3 (dest, subtarget,
1108                                  GEN_INT (val - ((val + comp) & mask))));
1109           return;
1110         }
1111       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1112         {
1113           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1114
1115           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1116                                   GEN_INT ((val - comp) | ~mask)));
1117           emit_insn (gen_adddi3 (dest, subtarget,
1118                                  GEN_INT (val - ((val - comp) | ~mask))));
1119           return;
1120         }
1121       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1122         {
1123           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1124
1125           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1126                                   GEN_INT (val | ~mask)));
1127           emit_insn (gen_adddi3 (dest, subtarget,
1128                                  GEN_INT (val - (val | ~mask))));
1129           return;
1130         }
1131     }
1132
1133   /* See if we can do it by arithmetically combining two
1134      immediates.  */
1135   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1136     {
1137       int j;
1138       mask = 0xffff;
1139
1140       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1141           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1142         {
1143           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1144           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1145                                   GEN_INT (aarch64_bitmasks[i])));
1146           emit_insn (gen_adddi3 (dest, subtarget,
1147                                  GEN_INT (val - aarch64_bitmasks[i])));
1148           return;
1149         }
1150
1151       for (j = 0; j < 64; j += 16, mask <<= 16)
1152         {
1153           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1154             {
1155               emit_insn (gen_rtx_SET (VOIDmode, dest,
1156                                       GEN_INT (aarch64_bitmasks[i])));
1157               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1158                                          GEN_INT ((val >> j) & 0xffff)));
1159               return;
1160             }
1161         }
1162     }
1163
1164   /* See if we can do it by logically combining two immediates.  */
1165   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1166     {
1167       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1168         {
1169           int j;
1170
1171           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1172             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1173               {
1174                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1175                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176                                         GEN_INT (aarch64_bitmasks[i])));
1177                 emit_insn (gen_iordi3 (dest, subtarget,
1178                                        GEN_INT (aarch64_bitmasks[j])));
1179                 return;
1180               }
1181         }
1182       else if ((val & aarch64_bitmasks[i]) == val)
1183         {
1184           int j;
1185
1186           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1187             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1188               {
1189
1190                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1191                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1192                                         GEN_INT (aarch64_bitmasks[j])));
1193                 emit_insn (gen_anddi3 (dest, subtarget,
1194                                        GEN_INT (aarch64_bitmasks[i])));
1195                 return;
1196               }
1197         }
1198     }
1199
1200  simple_sequence:
1201   first = true;
1202   mask = 0xffff;
1203   for (i = 0; i < 64; i += 16, mask <<= 16)
1204     {
1205       if ((val & mask) != 0)
1206         {
1207           if (first)
1208             {
1209               emit_insn (gen_rtx_SET (VOIDmode, dest,
1210                                       GEN_INT (val & mask)));
1211               first = false;
1212             }
1213           else
1214             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1215                                        GEN_INT ((val >> i) & 0xffff)));
1216         }
1217     }
1218 }
1219
1220 static bool
1221 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1222 {
1223   /* Indirect calls are not currently supported.  */
1224   if (decl == NULL)
1225     return false;
1226
1227   /* Cannot tail-call to long-calls, since these are outside of the
1228      range of a branch instruction (we could handle this if we added
1229      support for indirect tail-calls.  */
1230   if (aarch64_decl_is_long_call_p (decl))
1231     return false;
1232
1233   return true;
1234 }
1235
1236 /* Implement TARGET_PASS_BY_REFERENCE.  */
1237
1238 static bool
1239 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1240                            enum machine_mode mode,
1241                            const_tree type,
1242                            bool named ATTRIBUTE_UNUSED)
1243 {
1244   HOST_WIDE_INT size;
1245   enum machine_mode dummymode;
1246   int nregs;
1247
1248   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1249   size = (mode == BLKmode && type)
1250     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1251
1252   /* Aggregates are passed by reference based on their size.  */
1253   if (type && AGGREGATE_TYPE_P (type))
1254     {
1255       size = int_size_in_bytes (type);
1256     }
1257
1258   /* Variable sized arguments are always returned by reference.  */
1259   if (size < 0)
1260     return true;
1261
1262   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1263   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1264                                                &dummymode, &nregs,
1265                                                NULL))
1266     return false;
1267
1268   /* Arguments which are variable sized or larger than 2 registers are
1269      passed by reference unless they are a homogenous floating point
1270      aggregate.  */
1271   return size > 2 * UNITS_PER_WORD;
1272 }
1273
1274 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1275 static bool
1276 aarch64_return_in_msb (const_tree valtype)
1277 {
1278   enum machine_mode dummy_mode;
1279   int dummy_int;
1280
1281   /* Never happens in little-endian mode.  */
1282   if (!BYTES_BIG_ENDIAN)
1283     return false;
1284
1285   /* Only composite types smaller than or equal to 16 bytes can
1286      be potentially returned in registers.  */
1287   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1288       || int_size_in_bytes (valtype) <= 0
1289       || int_size_in_bytes (valtype) > 16)
1290     return false;
1291
1292   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1293      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1294      is always passed/returned in the least significant bits of fp/simd
1295      register(s).  */
1296   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1297                                                &dummy_mode, &dummy_int, NULL))
1298     return false;
1299
1300   return true;
1301 }
1302
1303 /* Implement TARGET_FUNCTION_VALUE.
1304    Define how to find the value returned by a function.  */
1305
1306 static rtx
1307 aarch64_function_value (const_tree type, const_tree func,
1308                         bool outgoing ATTRIBUTE_UNUSED)
1309 {
1310   enum machine_mode mode;
1311   int unsignedp;
1312   int count;
1313   enum machine_mode ag_mode;
1314
1315   mode = TYPE_MODE (type);
1316   if (INTEGRAL_TYPE_P (type))
1317     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1318
1319   if (aarch64_return_in_msb (type))
1320     {
1321       HOST_WIDE_INT size = int_size_in_bytes (type);
1322
1323       if (size % UNITS_PER_WORD != 0)
1324         {
1325           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1326           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1327         }
1328     }
1329
1330   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1331                                                &ag_mode, &count, NULL))
1332     {
1333       if (!aarch64_composite_type_p (type, mode))
1334         {
1335           gcc_assert (count == 1 && mode == ag_mode);
1336           return gen_rtx_REG (mode, V0_REGNUM);
1337         }
1338       else
1339         {
1340           int i;
1341           rtx par;
1342
1343           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1344           for (i = 0; i < count; i++)
1345             {
1346               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1347               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1348                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1349               XVECEXP (par, 0, i) = tmp;
1350             }
1351           return par;
1352         }
1353     }
1354   else
1355     return gen_rtx_REG (mode, R0_REGNUM);
1356 }
1357
1358 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1359    Return true if REGNO is the number of a hard register in which the values
1360    of called function may come back.  */
1361
1362 static bool
1363 aarch64_function_value_regno_p (const unsigned int regno)
1364 {
1365   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1366      of 16-byte return values are: 128-bit integers and 16-byte small
1367      structures (excluding homogeneous floating-point aggregates).  */
1368   if (regno == R0_REGNUM || regno == R1_REGNUM)
1369     return true;
1370
1371   /* Up to four fp/simd registers can return a function value, e.g. a
1372      homogeneous floating-point aggregate having four members.  */
1373   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1374     return !TARGET_GENERAL_REGS_ONLY;
1375
1376   return false;
1377 }
1378
1379 /* Implement TARGET_RETURN_IN_MEMORY.
1380
1381    If the type T of the result of a function is such that
1382      void func (T arg)
1383    would require that arg be passed as a value in a register (or set of
1384    registers) according to the parameter passing rules, then the result
1385    is returned in the same registers as would be used for such an
1386    argument.  */
1387
1388 static bool
1389 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1390 {
1391   HOST_WIDE_INT size;
1392   enum machine_mode ag_mode;
1393   int count;
1394
1395   if (!AGGREGATE_TYPE_P (type)
1396       && TREE_CODE (type) != COMPLEX_TYPE
1397       && TREE_CODE (type) != VECTOR_TYPE)
1398     /* Simple scalar types always returned in registers.  */
1399     return false;
1400
1401   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1402                                                type,
1403                                                &ag_mode,
1404                                                &count,
1405                                                NULL))
1406     return false;
1407
1408   /* Types larger than 2 registers returned in memory.  */
1409   size = int_size_in_bytes (type);
1410   return (size < 0 || size > 2 * UNITS_PER_WORD);
1411 }
1412
1413 static bool
1414 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1415                                const_tree type, int *nregs)
1416 {
1417   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1418   return aarch64_vfp_is_call_or_return_candidate (mode,
1419                                                   type,
1420                                                   &pcum->aapcs_vfp_rmode,
1421                                                   nregs,
1422                                                   NULL);
1423 }
1424
1425 /* Given MODE and TYPE of a function argument, return the alignment in
1426    bits.  The idea is to suppress any stronger alignment requested by
1427    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1428    This is a helper function for local use only.  */
1429
1430 static unsigned int
1431 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1432 {
1433   unsigned int alignment;
1434
1435   if (type)
1436     {
1437       if (!integer_zerop (TYPE_SIZE (type)))
1438         {
1439           if (TYPE_MODE (type) == mode)
1440             alignment = TYPE_ALIGN (type);
1441           else
1442             alignment = GET_MODE_ALIGNMENT (mode);
1443         }
1444       else
1445         alignment = 0;
1446     }
1447   else
1448     alignment = GET_MODE_ALIGNMENT (mode);
1449
1450   return alignment;
1451 }
1452
1453 /* Layout a function argument according to the AAPCS64 rules.  The rule
1454    numbers refer to the rule numbers in the AAPCS64.  */
1455
1456 static void
1457 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1458                     const_tree type,
1459                     bool named ATTRIBUTE_UNUSED)
1460 {
1461   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1462   int ncrn, nvrn, nregs;
1463   bool allocate_ncrn, allocate_nvrn;
1464   HOST_WIDE_INT size;
1465
1466   /* We need to do this once per argument.  */
1467   if (pcum->aapcs_arg_processed)
1468     return;
1469
1470   pcum->aapcs_arg_processed = true;
1471
1472   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1473   size
1474     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1475                         UNITS_PER_WORD);
1476
1477   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1478   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1479                                                  mode,
1480                                                  type,
1481                                                  &nregs);
1482
1483   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1484      The following code thus handles passing by SIMD/FP registers first.  */
1485
1486   nvrn = pcum->aapcs_nvrn;
1487
1488   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1489      and homogenous short-vector aggregates (HVA).  */
1490   if (allocate_nvrn)
1491     {
1492       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1493         {
1494           pcum->aapcs_nextnvrn = nvrn + nregs;
1495           if (!aarch64_composite_type_p (type, mode))
1496             {
1497               gcc_assert (nregs == 1);
1498               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1499             }
1500           else
1501             {
1502               rtx par;
1503               int i;
1504               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1505               for (i = 0; i < nregs; i++)
1506                 {
1507                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1508                                          V0_REGNUM + nvrn + i);
1509                   tmp = gen_rtx_EXPR_LIST
1510                     (VOIDmode, tmp,
1511                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1512                   XVECEXP (par, 0, i) = tmp;
1513                 }
1514               pcum->aapcs_reg = par;
1515             }
1516           return;
1517         }
1518       else
1519         {
1520           /* C.3 NSRN is set to 8.  */
1521           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1522           goto on_stack;
1523         }
1524     }
1525
1526   ncrn = pcum->aapcs_ncrn;
1527   nregs = size / UNITS_PER_WORD;
1528
1529   /* C6 - C9.  though the sign and zero extension semantics are
1530      handled elsewhere.  This is the case where the argument fits
1531      entirely general registers.  */
1532   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1533     {
1534       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1535
1536       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1537
1538       /* C.8 if the argument has an alignment of 16 then the NGRN is
1539          rounded up to the next even number.  */
1540       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1541         {
1542           ++ncrn;
1543           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1544         }
1545       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1546          A reg is still generated for it, but the caller should be smart
1547          enough not to use it.  */
1548       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1549         {
1550           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1551         }
1552       else
1553         {
1554           rtx par;
1555           int i;
1556
1557           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1558           for (i = 0; i < nregs; i++)
1559             {
1560               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1561               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1562                                        GEN_INT (i * UNITS_PER_WORD));
1563               XVECEXP (par, 0, i) = tmp;
1564             }
1565           pcum->aapcs_reg = par;
1566         }
1567
1568       pcum->aapcs_nextncrn = ncrn + nregs;
1569       return;
1570     }
1571
1572   /* C.11  */
1573   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1574
1575   /* The argument is passed on stack; record the needed number of words for
1576      this argument and align the total size if necessary.  */
1577 on_stack:
1578   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1579   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1580     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1581                                                16 / UNITS_PER_WORD);
1582   return;
1583 }
1584
1585 /* Implement TARGET_FUNCTION_ARG.  */
1586
1587 static rtx
1588 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1589                       const_tree type, bool named)
1590 {
1591   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1592   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1593
1594   if (mode == VOIDmode)
1595     return NULL_RTX;
1596
1597   aarch64_layout_arg (pcum_v, mode, type, named);
1598   return pcum->aapcs_reg;
1599 }
1600
1601 void
1602 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1603                            const_tree fntype ATTRIBUTE_UNUSED,
1604                            rtx libname ATTRIBUTE_UNUSED,
1605                            const_tree fndecl ATTRIBUTE_UNUSED,
1606                            unsigned n_named ATTRIBUTE_UNUSED)
1607 {
1608   pcum->aapcs_ncrn = 0;
1609   pcum->aapcs_nvrn = 0;
1610   pcum->aapcs_nextncrn = 0;
1611   pcum->aapcs_nextnvrn = 0;
1612   pcum->pcs_variant = ARM_PCS_AAPCS64;
1613   pcum->aapcs_reg = NULL_RTX;
1614   pcum->aapcs_arg_processed = false;
1615   pcum->aapcs_stack_words = 0;
1616   pcum->aapcs_stack_size = 0;
1617
1618   return;
1619 }
1620
1621 static void
1622 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1623                               enum machine_mode mode,
1624                               const_tree type,
1625                               bool named)
1626 {
1627   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1628   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1629     {
1630       aarch64_layout_arg (pcum_v, mode, type, named);
1631       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1632                   != (pcum->aapcs_stack_words != 0));
1633       pcum->aapcs_arg_processed = false;
1634       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1635       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1636       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1637       pcum->aapcs_stack_words = 0;
1638       pcum->aapcs_reg = NULL_RTX;
1639     }
1640 }
1641
1642 bool
1643 aarch64_function_arg_regno_p (unsigned regno)
1644 {
1645   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1646           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1647 }
1648
1649 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1650    PARM_BOUNDARY bits of alignment, but will be given anything up
1651    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1652    that both before and after the layout of each argument, the Next
1653    Stacked Argument Address (NSAA) will have a minimum alignment of
1654    8 bytes.  */
1655
1656 static unsigned int
1657 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1658 {
1659   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1660
1661   if (alignment < PARM_BOUNDARY)
1662     alignment = PARM_BOUNDARY;
1663   if (alignment > STACK_BOUNDARY)
1664     alignment = STACK_BOUNDARY;
1665   return alignment;
1666 }
1667
1668 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1669
1670    Return true if an argument passed on the stack should be padded upwards,
1671    i.e. if the least-significant byte of the stack slot has useful data.
1672
1673    Small aggregate types are placed in the lowest memory address.
1674
1675    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1676
1677 bool
1678 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1679 {
1680   /* On little-endian targets, the least significant byte of every stack
1681      argument is passed at the lowest byte address of the stack slot.  */
1682   if (!BYTES_BIG_ENDIAN)
1683     return true;
1684
1685   /* Otherwise, integral, floating-point and pointer types are padded downward:
1686      the least significant byte of a stack argument is passed at the highest
1687      byte address of the stack slot.  */
1688   if (type
1689       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1690          || POINTER_TYPE_P (type))
1691       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1692     return false;
1693
1694   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1695   return true;
1696 }
1697
1698 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1699
1700    It specifies padding for the last (may also be the only)
1701    element of a block move between registers and memory.  If
1702    assuming the block is in the memory, padding upward means that
1703    the last element is padded after its highest significant byte,
1704    while in downward padding, the last element is padded at the
1705    its least significant byte side.
1706
1707    Small aggregates and small complex types are always padded
1708    upwards.
1709
1710    We don't need to worry about homogeneous floating-point or
1711    short-vector aggregates; their move is not affected by the
1712    padding direction determined here.  Regardless of endianness,
1713    each element of such an aggregate is put in the least
1714    significant bits of a fp/simd register.
1715
1716    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1717    register has useful data, and return the opposite if the most
1718    significant byte does.  */
1719
1720 bool
1721 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1722                      bool first ATTRIBUTE_UNUSED)
1723 {
1724
1725   /* Small composite types are always padded upward.  */
1726   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1727     {
1728       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1729                             : GET_MODE_SIZE (mode));
1730       if (size < 2 * UNITS_PER_WORD)
1731         return true;
1732     }
1733
1734   /* Otherwise, use the default padding.  */
1735   return !BYTES_BIG_ENDIAN;
1736 }
1737
1738 static enum machine_mode
1739 aarch64_libgcc_cmp_return_mode (void)
1740 {
1741   return SImode;
1742 }
1743
1744 static bool
1745 aarch64_frame_pointer_required (void)
1746 {
1747   /* If the function contains dynamic stack allocations, we need to
1748      use the frame pointer to access the static parts of the frame.  */
1749   if (cfun->calls_alloca)
1750     return true;
1751
1752   /* In aarch64_override_options_after_change
1753      flag_omit_leaf_frame_pointer turns off the frame pointer by
1754      default.  Turn it back on now if we've not got a leaf
1755      function.  */
1756   if (flag_omit_leaf_frame_pointer
1757       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1758     return true;
1759
1760   return false;
1761 }
1762
1763 /* Mark the registers that need to be saved by the callee and calculate
1764    the size of the callee-saved registers area and frame record (both FP
1765    and LR may be omitted).  */
1766 static void
1767 aarch64_layout_frame (void)
1768 {
1769   HOST_WIDE_INT offset = 0;
1770   int regno;
1771
1772   if (reload_completed && cfun->machine->frame.laid_out)
1773     return;
1774
1775   cfun->machine->frame.fp_lr_offset = 0;
1776
1777   /* First mark all the registers that really need to be saved...  */
1778   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1779     cfun->machine->frame.reg_offset[regno] = -1;
1780
1781   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1782     cfun->machine->frame.reg_offset[regno] = -1;
1783
1784   /* ... that includes the eh data registers (if needed)...  */
1785   if (crtl->calls_eh_return)
1786     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1787       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1788
1789   /* ... and any callee saved register that dataflow says is live.  */
1790   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1791     if (df_regs_ever_live_p (regno)
1792         && !call_used_regs[regno])
1793       cfun->machine->frame.reg_offset[regno] = 0;
1794
1795   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1796     if (df_regs_ever_live_p (regno)
1797         && !call_used_regs[regno])
1798       cfun->machine->frame.reg_offset[regno] = 0;
1799
1800   if (frame_pointer_needed)
1801     {
1802       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1803       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1804       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1805     }
1806
1807   /* Now assign stack slots for them.  */
1808   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1809     if (cfun->machine->frame.reg_offset[regno] != -1)
1810       {
1811         cfun->machine->frame.reg_offset[regno] = offset;
1812         offset += UNITS_PER_WORD;
1813       }
1814
1815   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1816     if (cfun->machine->frame.reg_offset[regno] != -1)
1817       {
1818         cfun->machine->frame.reg_offset[regno] = offset;
1819         offset += UNITS_PER_WORD;
1820       }
1821
1822   if (frame_pointer_needed)
1823     {
1824       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1825       offset += UNITS_PER_WORD;
1826       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1827     }
1828
1829   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1830     {
1831       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1832       offset += UNITS_PER_WORD;
1833       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1834     }
1835
1836   cfun->machine->frame.padding0 =
1837     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1838   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1839
1840   cfun->machine->frame.saved_regs_size = offset;
1841   cfun->machine->frame.laid_out = true;
1842 }
1843
1844 /* Make the last instruction frame-related and note that it performs
1845    the operation described by FRAME_PATTERN.  */
1846
1847 static void
1848 aarch64_set_frame_expr (rtx frame_pattern)
1849 {
1850   rtx insn;
1851
1852   insn = get_last_insn ();
1853   RTX_FRAME_RELATED_P (insn) = 1;
1854   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1855   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1856                                       frame_pattern,
1857                                       REG_NOTES (insn));
1858 }
1859
1860 static bool
1861 aarch64_register_saved_on_entry (int regno)
1862 {
1863   return cfun->machine->frame.reg_offset[regno] != -1;
1864 }
1865
1866
1867 static void
1868 aarch64_save_or_restore_fprs (int start_offset, int increment,
1869                               bool restore, rtx base_rtx)
1870
1871 {
1872   unsigned regno;
1873   unsigned regno2;
1874   rtx insn;
1875   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1876     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1877
1878
1879   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1880     {
1881       if (aarch64_register_saved_on_entry (regno))
1882         {
1883           rtx mem;
1884           mem = gen_mem_ref (DFmode,
1885                              plus_constant (Pmode,
1886                                             base_rtx,
1887                                             start_offset));
1888
1889           for (regno2 = regno + 1;
1890                regno2 <= V31_REGNUM
1891                  && !aarch64_register_saved_on_entry (regno2);
1892                regno2++)
1893             {
1894               /* Empty loop.  */
1895             }
1896           if (regno2 <= V31_REGNUM &&
1897               aarch64_register_saved_on_entry (regno2))
1898             {
1899               rtx mem2;
1900               /* Next highest register to be saved.  */
1901               mem2 = gen_mem_ref (DFmode,
1902                                   plus_constant
1903                                   (Pmode,
1904                                    base_rtx,
1905                                    start_offset + increment));
1906               if (restore == false)
1907                 {
1908                   insn = emit_insn
1909                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1910                                         mem2, gen_rtx_REG (DFmode, regno2)));
1911
1912                 }
1913               else
1914                 {
1915                   insn = emit_insn
1916                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1917                                        gen_rtx_REG (DFmode, regno2), mem2));
1918
1919                   add_reg_note (insn, REG_CFA_RESTORE,
1920                                 gen_rtx_REG (DFmode, regno));
1921                   add_reg_note (insn, REG_CFA_RESTORE,
1922                                 gen_rtx_REG (DFmode, regno2));
1923                 }
1924
1925                   /* The first part of a frame-related parallel insn
1926                      is always assumed to be relevant to the frame
1927                      calculations; subsequent parts, are only
1928                      frame-related if explicitly marked.  */
1929               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1930               regno = regno2;
1931               start_offset += increment * 2;
1932             }
1933           else
1934             {
1935               if (restore == false)
1936                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1937               else
1938                 {
1939                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1940                   add_reg_note (insn, REG_CFA_RESTORE,
1941                                 gen_rtx_REG (DImode, regno));
1942                 }
1943               start_offset += increment;
1944             }
1945           RTX_FRAME_RELATED_P (insn) = 1;
1946         }
1947     }
1948
1949 }
1950
1951
1952 /* offset from the stack pointer of where the saves and
1953    restore's have to happen.  */
1954 static void
1955 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1956                                             bool restore)
1957 {
1958   rtx insn;
1959   rtx base_rtx = stack_pointer_rtx;
1960   HOST_WIDE_INT start_offset = offset;
1961   HOST_WIDE_INT increment = UNITS_PER_WORD;
1962   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1963   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1964   unsigned regno;
1965   unsigned regno2;
1966
1967   for (regno = R0_REGNUM; regno <= limit; regno++)
1968     {
1969       if (aarch64_register_saved_on_entry (regno))
1970         {
1971           rtx mem;
1972           mem = gen_mem_ref (Pmode,
1973                              plus_constant (Pmode,
1974                                             base_rtx,
1975                                             start_offset));
1976
1977           for (regno2 = regno + 1;
1978                regno2 <= limit
1979                  && !aarch64_register_saved_on_entry (regno2);
1980                regno2++)
1981             {
1982               /* Empty loop.  */
1983             }
1984           if (regno2 <= limit &&
1985               aarch64_register_saved_on_entry (regno2))
1986             {
1987               rtx mem2;
1988               /* Next highest register to be saved.  */
1989               mem2 = gen_mem_ref (Pmode,
1990                                   plus_constant
1991                                   (Pmode,
1992                                    base_rtx,
1993                                    start_offset + increment));
1994               if (restore == false)
1995                 {
1996                   insn = emit_insn
1997                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1998                                         mem2, gen_rtx_REG (DImode, regno2)));
1999
2000                 }
2001               else
2002                 {
2003                   insn = emit_insn
2004                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2005                                      gen_rtx_REG (DImode, regno2), mem2));
2006
2007                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2008                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2009                 }
2010
2011                   /* The first part of a frame-related parallel insn
2012                      is always assumed to be relevant to the frame
2013                      calculations; subsequent parts, are only
2014                      frame-related if explicitly marked.  */
2015               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2016                                             1)) = 1;
2017               regno = regno2;
2018               start_offset += increment * 2;
2019             }
2020           else
2021             {
2022               if (restore == false)
2023                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2024               else
2025                 {
2026                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2027                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2028                 }
2029               start_offset += increment;
2030             }
2031           RTX_FRAME_RELATED_P (insn) = 1;
2032         }
2033     }
2034
2035   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2036
2037 }
2038
2039 /* AArch64 stack frames generated by this compiler look like:
2040
2041         +-------------------------------+
2042         |                               |
2043         |  incoming stack arguments     |
2044         |                               |
2045         +-------------------------------+ <-- arg_pointer_rtx
2046         |                               |
2047         |  callee-allocated save area   |
2048         |  for register varargs         |
2049         |                               |
2050         +-------------------------------+ <-- frame_pointer_rtx
2051         |                               |
2052         |  local variables              |
2053         |                               |
2054         +-------------------------------+
2055         |  padding0                     | \
2056         +-------------------------------+  |
2057         |                               |  |
2058         |                               |  |
2059         |  callee-saved registers       |  | frame.saved_regs_size
2060         |                               |  |
2061         +-------------------------------+  |
2062         |  LR'                          |  |
2063         +-------------------------------+  |
2064         |  FP'                          | /
2065       P +-------------------------------+ <-- hard_frame_pointer_rtx
2066         |  dynamic allocation           |
2067         +-------------------------------+
2068         |                               |
2069         |  outgoing stack arguments     |
2070         |                               |
2071         +-------------------------------+ <-- stack_pointer_rtx
2072
2073    Dynamic stack allocations such as alloca insert data at point P.
2074    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2075    hard_frame_pointer_rtx unchanged.  */
2076
2077 /* Generate the prologue instructions for entry into a function.
2078    Establish the stack frame by decreasing the stack pointer with a
2079    properly calculated size and, if necessary, create a frame record
2080    filled with the values of LR and previous frame pointer.  The
2081    current FP is also set up if it is in use.  */
2082
2083 void
2084 aarch64_expand_prologue (void)
2085 {
2086   /* sub sp, sp, #<frame_size>
2087      stp {fp, lr}, [sp, #<frame_size> - 16]
2088      add fp, sp, #<frame_size> - hardfp_offset
2089      stp {cs_reg}, [fp, #-16] etc.
2090
2091      sub sp, sp, <final_adjustment_if_any>
2092   */
2093   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2094   HOST_WIDE_INT frame_size, offset;
2095   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2096   rtx insn;
2097
2098   aarch64_layout_frame ();
2099   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2100   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2101               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2102   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2103                 + crtl->outgoing_args_size);
2104   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2105                                           STACK_BOUNDARY / BITS_PER_UNIT);
2106
2107   if (flag_stack_usage_info)
2108     current_function_static_stack_size = frame_size;
2109
2110   fp_offset = (offset
2111                - original_frame_size
2112                - cfun->machine->frame.saved_regs_size);
2113
2114   /* Store pairs and load pairs have a range only -512 to 504.  */
2115   if (offset >= 512)
2116     {
2117       /* When the frame has a large size, an initial decrease is done on
2118          the stack pointer to jump over the callee-allocated save area for
2119          register varargs, the local variable area and/or the callee-saved
2120          register area.  This will allow the pre-index write-back
2121          store pair instructions to be used for setting up the stack frame
2122          efficiently.  */
2123       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2124       if (offset >= 512)
2125         offset = cfun->machine->frame.saved_regs_size;
2126
2127       frame_size -= (offset + crtl->outgoing_args_size);
2128       fp_offset = 0;
2129
2130       if (frame_size >= 0x1000000)
2131         {
2132           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2133           emit_move_insn (op0, GEN_INT (-frame_size));
2134           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2135           aarch64_set_frame_expr (gen_rtx_SET
2136                                   (Pmode, stack_pointer_rtx,
2137                                    plus_constant (Pmode,
2138                                                   stack_pointer_rtx,
2139                                                   -frame_size)));
2140         }
2141       else if (frame_size > 0)
2142         {
2143           if ((frame_size & 0xfff) != frame_size)
2144             {
2145               insn = emit_insn (gen_add2_insn
2146                                 (stack_pointer_rtx,
2147                                  GEN_INT (-(frame_size
2148                                             & ~(HOST_WIDE_INT)0xfff))));
2149               RTX_FRAME_RELATED_P (insn) = 1;
2150             }
2151           if ((frame_size & 0xfff) != 0)
2152             {
2153               insn = emit_insn (gen_add2_insn
2154                                 (stack_pointer_rtx,
2155                                  GEN_INT (-(frame_size
2156                                             & (HOST_WIDE_INT)0xfff))));
2157               RTX_FRAME_RELATED_P (insn) = 1;
2158             }
2159         }
2160     }
2161   else
2162     frame_size = -1;
2163
2164   if (offset > 0)
2165     {
2166       /* Save the frame pointer and lr if the frame pointer is needed
2167          first.  Make the frame pointer point to the location of the
2168          old frame pointer on the stack.  */
2169       if (frame_pointer_needed)
2170         {
2171           rtx mem_fp, mem_lr;
2172
2173           if (fp_offset)
2174             {
2175               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2176                                                GEN_INT (-offset)));
2177               RTX_FRAME_RELATED_P (insn) = 1;
2178               aarch64_set_frame_expr (gen_rtx_SET
2179                                       (Pmode, stack_pointer_rtx,
2180                                        gen_rtx_MINUS (Pmode,
2181                                                       stack_pointer_rtx,
2182                                                       GEN_INT (offset))));
2183               mem_fp = gen_frame_mem (DImode,
2184                                       plus_constant (Pmode,
2185                                                      stack_pointer_rtx,
2186                                                      fp_offset));
2187               mem_lr = gen_frame_mem (DImode,
2188                                       plus_constant (Pmode,
2189                                                      stack_pointer_rtx,
2190                                                      fp_offset
2191                                                      + UNITS_PER_WORD));
2192               insn = emit_insn (gen_store_pairdi (mem_fp,
2193                                                   hard_frame_pointer_rtx,
2194                                                   mem_lr,
2195                                                   gen_rtx_REG (DImode,
2196                                                                LR_REGNUM)));
2197             }
2198           else
2199             {
2200               insn = emit_insn (gen_storewb_pairdi_di
2201                                 (stack_pointer_rtx, stack_pointer_rtx,
2202                                  hard_frame_pointer_rtx,
2203                                  gen_rtx_REG (DImode, LR_REGNUM),
2204                                  GEN_INT (-offset),
2205                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2206               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2207             }
2208
2209           /* The first part of a frame-related parallel insn is always
2210              assumed to be relevant to the frame calculations;
2211              subsequent parts, are only frame-related if explicitly
2212              marked.  */
2213           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2214           RTX_FRAME_RELATED_P (insn) = 1;
2215
2216           /* Set up frame pointer to point to the location of the
2217              previous frame pointer on the stack.  */
2218           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2219                                            stack_pointer_rtx,
2220                                            GEN_INT (fp_offset)));
2221           aarch64_set_frame_expr (gen_rtx_SET
2222                                   (Pmode, hard_frame_pointer_rtx,
2223                                    plus_constant (Pmode,
2224                                                   stack_pointer_rtx,
2225                                                   fp_offset)));
2226           RTX_FRAME_RELATED_P (insn) = 1;
2227           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2228                                            hard_frame_pointer_rtx));
2229         }
2230       else
2231         {
2232           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2233                                            GEN_INT (-offset)));
2234           RTX_FRAME_RELATED_P (insn) = 1;
2235         }
2236
2237       aarch64_save_or_restore_callee_save_registers
2238         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2239     }
2240
2241   /* when offset >= 512,
2242      sub sp, sp, #<outgoing_args_size> */
2243   if (frame_size > -1)
2244     {
2245       if (crtl->outgoing_args_size > 0)
2246         {
2247           insn = emit_insn (gen_add2_insn
2248                             (stack_pointer_rtx,
2249                              GEN_INT (- crtl->outgoing_args_size)));
2250           RTX_FRAME_RELATED_P (insn) = 1;
2251         }
2252     }
2253 }
2254
2255 /* Generate the epilogue instructions for returning from a function.  */
2256 void
2257 aarch64_expand_epilogue (bool for_sibcall)
2258 {
2259   HOST_WIDE_INT original_frame_size, frame_size, offset;
2260   HOST_WIDE_INT fp_offset;
2261   rtx insn;
2262   rtx cfa_reg;
2263
2264   aarch64_layout_frame ();
2265   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2266   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2267                 + crtl->outgoing_args_size);
2268   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2269                                           STACK_BOUNDARY / BITS_PER_UNIT);
2270
2271   fp_offset = (offset
2272                - original_frame_size
2273                - cfun->machine->frame.saved_regs_size);
2274
2275   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2276
2277   /* Store pairs and load pairs have a range only -512 to 504.  */
2278   if (offset >= 512)
2279     {
2280       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2281       if (offset >= 512)
2282         offset = cfun->machine->frame.saved_regs_size;
2283
2284       frame_size -= (offset + crtl->outgoing_args_size);
2285       fp_offset = 0;
2286       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2287         {
2288           insn = emit_insn (gen_add2_insn
2289                             (stack_pointer_rtx,
2290                              GEN_INT (crtl->outgoing_args_size)));
2291           RTX_FRAME_RELATED_P (insn) = 1;
2292         }
2293     }
2294   else
2295     frame_size = -1;
2296
2297   /* If there were outgoing arguments or we've done dynamic stack
2298      allocation, then restore the stack pointer from the frame
2299      pointer.  This is at most one insn and more efficient than using
2300      GCC's internal mechanism.  */
2301   if (frame_pointer_needed
2302       && (crtl->outgoing_args_size || cfun->calls_alloca))
2303     {
2304       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2305                                        hard_frame_pointer_rtx,
2306                                        GEN_INT (- fp_offset)));
2307       RTX_FRAME_RELATED_P (insn) = 1;
2308       /* As SP is set to (FP - fp_offset), according to the rules in
2309          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2310          from the value of SP from now on.  */
2311       cfa_reg = stack_pointer_rtx;
2312     }
2313
2314   aarch64_save_or_restore_callee_save_registers
2315     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2316
2317   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2318   if (offset > 0)
2319     {
2320       if (frame_pointer_needed)
2321         {
2322           rtx mem_fp, mem_lr;
2323
2324           if (fp_offset)
2325             {
2326               mem_fp = gen_frame_mem (DImode,
2327                                       plus_constant (Pmode,
2328                                                      stack_pointer_rtx,
2329                                                      fp_offset));
2330               mem_lr = gen_frame_mem (DImode,
2331                                       plus_constant (Pmode,
2332                                                      stack_pointer_rtx,
2333                                                      fp_offset
2334                                                      + UNITS_PER_WORD));
2335               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2336                                                  mem_fp,
2337                                                  gen_rtx_REG (DImode,
2338                                                               LR_REGNUM),
2339                                                  mem_lr));
2340             }
2341           else
2342             {
2343               insn = emit_insn (gen_loadwb_pairdi_di
2344                                 (stack_pointer_rtx,
2345                                  stack_pointer_rtx,
2346                                  hard_frame_pointer_rtx,
2347                                  gen_rtx_REG (DImode, LR_REGNUM),
2348                                  GEN_INT (offset),
2349                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2350               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2351               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2352                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2353                                           plus_constant (Pmode, cfa_reg,
2354                                                          offset))));
2355             }
2356
2357           /* The first part of a frame-related parallel insn
2358              is always assumed to be relevant to the frame
2359              calculations; subsequent parts, are only
2360              frame-related if explicitly marked.  */
2361           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2362           RTX_FRAME_RELATED_P (insn) = 1;
2363           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2364           add_reg_note (insn, REG_CFA_RESTORE,
2365                         gen_rtx_REG (DImode, LR_REGNUM));
2366
2367           if (fp_offset)
2368             {
2369               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2370                                                GEN_INT (offset)));
2371               RTX_FRAME_RELATED_P (insn) = 1;
2372             }
2373         }
2374       else
2375         {
2376           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2377                                            GEN_INT (offset)));
2378           RTX_FRAME_RELATED_P (insn) = 1;
2379         }
2380     }
2381
2382   /* Stack adjustment for exception handler.  */
2383   if (crtl->calls_eh_return)
2384     {
2385       /* We need to unwind the stack by the offset computed by
2386          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2387          based on SP.  Ideally we would update the SP and define the
2388          CFA along the lines of:
2389
2390          SP = SP + EH_RETURN_STACKADJ_RTX
2391          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2392
2393          However the dwarf emitter only understands a constant
2394          register offset.
2395
2396          The solution chosen here is to use the otherwise unused IP0
2397          as a temporary register to hold the current SP value.  The
2398          CFA is described using IP0 then SP is modified.  */
2399
2400       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2401
2402       insn = emit_move_insn (ip0, stack_pointer_rtx);
2403       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2404       RTX_FRAME_RELATED_P (insn) = 1;
2405
2406       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2407
2408       /* Ensure the assignment to IP0 does not get optimized away.  */
2409       emit_use (ip0);
2410     }
2411
2412   if (frame_size > -1)
2413     {
2414       if (frame_size >= 0x1000000)
2415         {
2416           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2417           emit_move_insn (op0, GEN_INT (frame_size));
2418           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2419           aarch64_set_frame_expr (gen_rtx_SET
2420                                   (Pmode, stack_pointer_rtx,
2421                                    plus_constant (Pmode,
2422                                                   stack_pointer_rtx,
2423                                                   frame_size)));
2424         }
2425       else if (frame_size > 0)
2426         {
2427           if ((frame_size & 0xfff) != 0)
2428             {
2429               insn = emit_insn (gen_add2_insn
2430                                 (stack_pointer_rtx,
2431                                  GEN_INT ((frame_size
2432                                            & (HOST_WIDE_INT) 0xfff))));
2433               RTX_FRAME_RELATED_P (insn) = 1;
2434             }
2435           if ((frame_size & 0xfff) != frame_size)
2436             {
2437               insn = emit_insn (gen_add2_insn
2438                                 (stack_pointer_rtx,
2439                                  GEN_INT ((frame_size
2440                                            & ~ (HOST_WIDE_INT) 0xfff))));
2441               RTX_FRAME_RELATED_P (insn) = 1;
2442             }
2443         }
2444
2445         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2446                                              plus_constant (Pmode,
2447                                                             stack_pointer_rtx,
2448                                                             offset)));
2449     }
2450
2451   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2452   if (!for_sibcall)
2453     emit_jump_insn (ret_rtx);
2454 }
2455
2456 /* Return the place to copy the exception unwinding return address to.
2457    This will probably be a stack slot, but could (in theory be the
2458    return register).  */
2459 rtx
2460 aarch64_final_eh_return_addr (void)
2461 {
2462   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2463   aarch64_layout_frame ();
2464   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2465   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2466                 + crtl->outgoing_args_size);
2467   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2468                                           STACK_BOUNDARY / BITS_PER_UNIT);
2469   fp_offset = offset
2470     - original_frame_size
2471     - cfun->machine->frame.saved_regs_size;
2472
2473   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2474     return gen_rtx_REG (DImode, LR_REGNUM);
2475
2476   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2477      result in a store to save LR introduced by builtin_eh_return () being
2478      incorrectly deleted because the alias is not detected.
2479      So in the calculation of the address to copy the exception unwinding
2480      return address to, we note 2 cases.
2481      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2482      we return a SP-relative location since all the addresses are SP-relative
2483      in this case.  This prevents the store from being optimized away.
2484      If the fp_offset is not 0, then the addresses will be FP-relative and
2485      therefore we return a FP-relative location.  */
2486
2487   if (frame_pointer_needed)
2488     {
2489       if (fp_offset)
2490         return gen_frame_mem (DImode,
2491                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2492       else
2493         return gen_frame_mem (DImode,
2494                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2495     }
2496
2497   /* If FP is not needed, we calculate the location of LR, which would be
2498      at the top of the saved registers block.  */
2499
2500   return gen_frame_mem (DImode,
2501                         plus_constant (Pmode,
2502                                        stack_pointer_rtx,
2503                                        fp_offset
2504                                        + cfun->machine->frame.saved_regs_size
2505                                        - 2 * UNITS_PER_WORD));
2506 }
2507
2508 /* Possibly output code to build up a constant in a register.  For
2509    the benefit of the costs infrastructure, returns the number of
2510    instructions which would be emitted.  GENERATE inhibits or
2511    enables code generation.  */
2512
2513 static int
2514 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2515 {
2516   int insns = 0;
2517
2518   if (aarch64_bitmask_imm (val, DImode))
2519     {
2520       if (generate)
2521         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2522       insns = 1;
2523     }
2524   else
2525     {
2526       int i;
2527       int ncount = 0;
2528       int zcount = 0;
2529       HOST_WIDE_INT valp = val >> 16;
2530       HOST_WIDE_INT valm;
2531       HOST_WIDE_INT tval;
2532
2533       for (i = 16; i < 64; i += 16)
2534         {
2535           valm = (valp & 0xffff);
2536
2537           if (valm != 0)
2538             ++ zcount;
2539
2540           if (valm != 0xffff)
2541             ++ ncount;
2542
2543           valp >>= 16;
2544         }
2545
2546       /* zcount contains the number of additional MOVK instructions
2547          required if the constant is built up with an initial MOVZ instruction,
2548          while ncount is the number of MOVK instructions required if starting
2549          with a MOVN instruction.  Choose the sequence that yields the fewest
2550          number of instructions, preferring MOVZ instructions when they are both
2551          the same.  */
2552       if (ncount < zcount)
2553         {
2554           if (generate)
2555             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2556                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2557           tval = 0xffff;
2558           insns++;
2559         }
2560       else
2561         {
2562           if (generate)
2563             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2564                             GEN_INT (val & 0xffff));
2565           tval = 0;
2566           insns++;
2567         }
2568
2569       val >>= 16;
2570
2571       for (i = 16; i < 64; i += 16)
2572         {
2573           if ((val & 0xffff) != tval)
2574             {
2575               if (generate)
2576                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2577                                            GEN_INT (i),
2578                                            GEN_INT (val & 0xffff)));
2579               insns++;
2580             }
2581           val >>= 16;
2582         }
2583     }
2584   return insns;
2585 }
2586
2587 static void
2588 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2589 {
2590   HOST_WIDE_INT mdelta = delta;
2591   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2592   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2593
2594   if (mdelta < 0)
2595     mdelta = -mdelta;
2596
2597   if (mdelta >= 4096 * 4096)
2598     {
2599       (void) aarch64_build_constant (scratchreg, delta, true);
2600       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2601     }
2602   else if (mdelta > 0)
2603     {
2604       if (mdelta >= 4096)
2605         {
2606           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2607           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2608           if (delta < 0)
2609             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2610                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2611           else
2612             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2613                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2614         }
2615       if (mdelta % 4096 != 0)
2616         {
2617           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2618           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2619                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2620         }
2621     }
2622 }
2623
2624 /* Output code to add DELTA to the first argument, and then jump
2625    to FUNCTION.  Used for C++ multiple inheritance.  */
2626 static void
2627 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2628                          HOST_WIDE_INT delta,
2629                          HOST_WIDE_INT vcall_offset,
2630                          tree function)
2631 {
2632   /* The this pointer is always in x0.  Note that this differs from
2633      Arm where the this pointer maybe bumped to r1 if r0 is required
2634      to return a pointer to an aggregate.  On AArch64 a result value
2635      pointer will be in x8.  */
2636   int this_regno = R0_REGNUM;
2637   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2638
2639   reload_completed = 1;
2640   emit_note (NOTE_INSN_PROLOGUE_END);
2641
2642   if (vcall_offset == 0)
2643     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2644   else
2645     {
2646       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2647
2648       this_rtx = gen_rtx_REG (Pmode, this_regno);
2649       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2650       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2651
2652       addr = this_rtx;
2653       if (delta != 0)
2654         {
2655           if (delta >= -256 && delta < 256)
2656             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2657                                        plus_constant (Pmode, this_rtx, delta));
2658           else
2659             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2660         }
2661
2662       if (Pmode == ptr_mode)
2663         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2664       else
2665         aarch64_emit_move (temp0,
2666                            gen_rtx_ZERO_EXTEND (Pmode,
2667                                                 gen_rtx_MEM (ptr_mode, addr)));
2668
2669       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2670           addr = plus_constant (Pmode, temp0, vcall_offset);
2671       else
2672         {
2673           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2674           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2675         }
2676
2677       if (Pmode == ptr_mode)
2678         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2679       else
2680         aarch64_emit_move (temp1,
2681                            gen_rtx_SIGN_EXTEND (Pmode,
2682                                                 gen_rtx_MEM (ptr_mode, addr)));
2683
2684       emit_insn (gen_add2_insn (this_rtx, temp1));
2685     }
2686
2687   /* Generate a tail call to the target function.  */
2688   if (!TREE_USED (function))
2689     {
2690       assemble_external (function);
2691       TREE_USED (function) = 1;
2692     }
2693   funexp = XEXP (DECL_RTL (function), 0);
2694   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2695   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2696   SIBLING_CALL_P (insn) = 1;
2697
2698   insn = get_insns ();
2699   shorten_branches (insn);
2700   final_start_function (insn, file, 1);
2701   final (insn, file, 1);
2702   final_end_function ();
2703
2704   /* Stop pretending to be a post-reload pass.  */
2705   reload_completed = 0;
2706 }
2707
2708 static int
2709 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2710 {
2711   if (GET_CODE (*x) == SYMBOL_REF)
2712     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2713
2714   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2715      TLS offsets, not real symbol references.  */
2716   if (GET_CODE (*x) == UNSPEC
2717       && XINT (*x, 1) == UNSPEC_TLS)
2718     return -1;
2719
2720   return 0;
2721 }
2722
2723 static bool
2724 aarch64_tls_referenced_p (rtx x)
2725 {
2726   if (!TARGET_HAVE_TLS)
2727     return false;
2728
2729   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2730 }
2731
2732
2733 static int
2734 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2735 {
2736   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2737   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2738
2739   if (*imm1 < *imm2)
2740     return -1;
2741   if (*imm1 > *imm2)
2742     return +1;
2743   return 0;
2744 }
2745
2746
2747 static void
2748 aarch64_build_bitmask_table (void)
2749 {
2750   unsigned HOST_WIDE_INT mask, imm;
2751   unsigned int log_e, e, s, r;
2752   unsigned int nimms = 0;
2753
2754   for (log_e = 1; log_e <= 6; log_e++)
2755     {
2756       e = 1 << log_e;
2757       if (e == 64)
2758         mask = ~(HOST_WIDE_INT) 0;
2759       else
2760         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2761       for (s = 1; s < e; s++)
2762         {
2763           for (r = 0; r < e; r++)
2764             {
2765               /* set s consecutive bits to 1 (s < 64) */
2766               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2767               /* rotate right by r */
2768               if (r != 0)
2769                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2770               /* replicate the constant depending on SIMD size */
2771               switch (log_e) {
2772               case 1: imm |= (imm <<  2);
2773               case 2: imm |= (imm <<  4);
2774               case 3: imm |= (imm <<  8);
2775               case 4: imm |= (imm << 16);
2776               case 5: imm |= (imm << 32);
2777               case 6:
2778                 break;
2779               default:
2780                 gcc_unreachable ();
2781               }
2782               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2783               aarch64_bitmasks[nimms++] = imm;
2784             }
2785         }
2786     }
2787
2788   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2789   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2790          aarch64_bitmasks_cmp);
2791 }
2792
2793
2794 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2795    a left shift of 0 or 12 bits.  */
2796 bool
2797 aarch64_uimm12_shift (HOST_WIDE_INT val)
2798 {
2799   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2800           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2801           );
2802 }
2803
2804
2805 /* Return true if val is an immediate that can be loaded into a
2806    register by a MOVZ instruction.  */
2807 static bool
2808 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2809 {
2810   if (GET_MODE_SIZE (mode) > 4)
2811     {
2812       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2813           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2814         return 1;
2815     }
2816   else
2817     {
2818       /* Ignore sign extension.  */
2819       val &= (HOST_WIDE_INT) 0xffffffff;
2820     }
2821   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2822           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2823 }
2824
2825
2826 /* Return true if val is a valid bitmask immediate.  */
2827 bool
2828 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2829 {
2830   if (GET_MODE_SIZE (mode) < 8)
2831     {
2832       /* Replicate bit pattern.  */
2833       val &= (HOST_WIDE_INT) 0xffffffff;
2834       val |= val << 32;
2835     }
2836   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2837                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2838 }
2839
2840
2841 /* Return true if val is an immediate that can be loaded into a
2842    register in a single instruction.  */
2843 bool
2844 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2845 {
2846   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2847     return 1;
2848   return aarch64_bitmask_imm (val, mode);
2849 }
2850
2851 static bool
2852 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2853 {
2854   rtx base, offset;
2855
2856   if (GET_CODE (x) == HIGH)
2857     return true;
2858
2859   split_const (x, &base, &offset);
2860   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2861     {
2862       if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
2863           != SYMBOL_FORCE_TO_MEM)
2864         return true;
2865       else
2866         /* Avoid generating a 64-bit relocation in ILP32; leave
2867            to aarch64_expand_mov_immediate to handle it properly.  */
2868         return mode != ptr_mode;
2869     }
2870
2871   return aarch64_tls_referenced_p (x);
2872 }
2873
2874 /* Return true if register REGNO is a valid index register.
2875    STRICT_P is true if REG_OK_STRICT is in effect.  */
2876
2877 bool
2878 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2879 {
2880   if (!HARD_REGISTER_NUM_P (regno))
2881     {
2882       if (!strict_p)
2883         return true;
2884
2885       if (!reg_renumber)
2886         return false;
2887
2888       regno = reg_renumber[regno];
2889     }
2890   return GP_REGNUM_P (regno);
2891 }
2892
2893 /* Return true if register REGNO is a valid base register for mode MODE.
2894    STRICT_P is true if REG_OK_STRICT is in effect.  */
2895
2896 bool
2897 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2898 {
2899   if (!HARD_REGISTER_NUM_P (regno))
2900     {
2901       if (!strict_p)
2902         return true;
2903
2904       if (!reg_renumber)
2905         return false;
2906
2907       regno = reg_renumber[regno];
2908     }
2909
2910   /* The fake registers will be eliminated to either the stack or
2911      hard frame pointer, both of which are usually valid base registers.
2912      Reload deals with the cases where the eliminated form isn't valid.  */
2913   return (GP_REGNUM_P (regno)
2914           || regno == SP_REGNUM
2915           || regno == FRAME_POINTER_REGNUM
2916           || regno == ARG_POINTER_REGNUM);
2917 }
2918
2919 /* Return true if X is a valid base register for mode MODE.
2920    STRICT_P is true if REG_OK_STRICT is in effect.  */
2921
2922 static bool
2923 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2924 {
2925   if (!strict_p && GET_CODE (x) == SUBREG)
2926     x = SUBREG_REG (x);
2927
2928   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2929 }
2930
2931 /* Return true if address offset is a valid index.  If it is, fill in INFO
2932    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2933
2934 static bool
2935 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2936                         enum machine_mode mode, bool strict_p)
2937 {
2938   enum aarch64_address_type type;
2939   rtx index;
2940   int shift;
2941
2942   /* (reg:P) */
2943   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2944       && GET_MODE (x) == Pmode)
2945     {
2946       type = ADDRESS_REG_REG;
2947       index = x;
2948       shift = 0;
2949     }
2950   /* (sign_extend:DI (reg:SI)) */
2951   else if ((GET_CODE (x) == SIGN_EXTEND
2952             || GET_CODE (x) == ZERO_EXTEND)
2953            && GET_MODE (x) == DImode
2954            && GET_MODE (XEXP (x, 0)) == SImode)
2955     {
2956       type = (GET_CODE (x) == SIGN_EXTEND)
2957         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2958       index = XEXP (x, 0);
2959       shift = 0;
2960     }
2961   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2962   else if (GET_CODE (x) == MULT
2963            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2964                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2965            && GET_MODE (XEXP (x, 0)) == DImode
2966            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2967            && CONST_INT_P (XEXP (x, 1)))
2968     {
2969       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2970         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2971       index = XEXP (XEXP (x, 0), 0);
2972       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2973     }
2974   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2975   else if (GET_CODE (x) == ASHIFT
2976            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2977                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2978            && GET_MODE (XEXP (x, 0)) == DImode
2979            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2980            && CONST_INT_P (XEXP (x, 1)))
2981     {
2982       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2983         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2984       index = XEXP (XEXP (x, 0), 0);
2985       shift = INTVAL (XEXP (x, 1));
2986     }
2987   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2988   else if ((GET_CODE (x) == SIGN_EXTRACT
2989             || GET_CODE (x) == ZERO_EXTRACT)
2990            && GET_MODE (x) == DImode
2991            && GET_CODE (XEXP (x, 0)) == MULT
2992            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2993            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2994     {
2995       type = (GET_CODE (x) == SIGN_EXTRACT)
2996         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2997       index = XEXP (XEXP (x, 0), 0);
2998       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2999       if (INTVAL (XEXP (x, 1)) != 32 + shift
3000           || INTVAL (XEXP (x, 2)) != 0)
3001         shift = -1;
3002     }
3003   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3004      (const_int 0xffffffff<<shift)) */
3005   else if (GET_CODE (x) == AND
3006            && GET_MODE (x) == DImode
3007            && GET_CODE (XEXP (x, 0)) == MULT
3008            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3009            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3010            && CONST_INT_P (XEXP (x, 1)))
3011     {
3012       type = ADDRESS_REG_UXTW;
3013       index = XEXP (XEXP (x, 0), 0);
3014       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3015       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3016         shift = -1;
3017     }
3018   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3019   else if ((GET_CODE (x) == SIGN_EXTRACT
3020             || GET_CODE (x) == ZERO_EXTRACT)
3021            && GET_MODE (x) == DImode
3022            && GET_CODE (XEXP (x, 0)) == ASHIFT
3023            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3024            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3025     {
3026       type = (GET_CODE (x) == SIGN_EXTRACT)
3027         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3028       index = XEXP (XEXP (x, 0), 0);
3029       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3030       if (INTVAL (XEXP (x, 1)) != 32 + shift
3031           || INTVAL (XEXP (x, 2)) != 0)
3032         shift = -1;
3033     }
3034   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3035      (const_int 0xffffffff<<shift)) */
3036   else if (GET_CODE (x) == AND
3037            && GET_MODE (x) == DImode
3038            && GET_CODE (XEXP (x, 0)) == ASHIFT
3039            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3040            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3041            && CONST_INT_P (XEXP (x, 1)))
3042     {
3043       type = ADDRESS_REG_UXTW;
3044       index = XEXP (XEXP (x, 0), 0);
3045       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3046       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3047         shift = -1;
3048     }
3049   /* (mult:P (reg:P) (const_int scale)) */
3050   else if (GET_CODE (x) == MULT
3051            && GET_MODE (x) == Pmode
3052            && GET_MODE (XEXP (x, 0)) == Pmode
3053            && CONST_INT_P (XEXP (x, 1)))
3054     {
3055       type = ADDRESS_REG_REG;
3056       index = XEXP (x, 0);
3057       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3058     }
3059   /* (ashift:P (reg:P) (const_int shift)) */
3060   else if (GET_CODE (x) == ASHIFT
3061            && GET_MODE (x) == Pmode
3062            && GET_MODE (XEXP (x, 0)) == Pmode
3063            && CONST_INT_P (XEXP (x, 1)))
3064     {
3065       type = ADDRESS_REG_REG;
3066       index = XEXP (x, 0);
3067       shift = INTVAL (XEXP (x, 1));
3068     }
3069   else
3070     return false;
3071
3072   if (GET_CODE (index) == SUBREG)
3073     index = SUBREG_REG (index);
3074
3075   if ((shift == 0 ||
3076        (shift > 0 && shift <= 3
3077         && (1 << shift) == GET_MODE_SIZE (mode)))
3078       && REG_P (index)
3079       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3080     {
3081       info->type = type;
3082       info->offset = index;
3083       info->shift = shift;
3084       return true;
3085     }
3086
3087   return false;
3088 }
3089
3090 static inline bool
3091 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3092 {
3093   return (offset >= -64 * GET_MODE_SIZE (mode)
3094           && offset < 64 * GET_MODE_SIZE (mode)
3095           && offset % GET_MODE_SIZE (mode) == 0);
3096 }
3097
3098 static inline bool
3099 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3100                                HOST_WIDE_INT offset)
3101 {
3102   return offset >= -256 && offset < 256;
3103 }
3104
3105 static inline bool
3106 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3107 {
3108   return (offset >= 0
3109           && offset < 4096 * GET_MODE_SIZE (mode)
3110           && offset % GET_MODE_SIZE (mode) == 0);
3111 }
3112
3113 /* Return true if X is a valid address for machine mode MODE.  If it is,
3114    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3115    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3116
3117 static bool
3118 aarch64_classify_address (struct aarch64_address_info *info,
3119                           rtx x, enum machine_mode mode,
3120                           RTX_CODE outer_code, bool strict_p)
3121 {
3122   enum rtx_code code = GET_CODE (x);
3123   rtx op0, op1;
3124   bool allow_reg_index_p =
3125     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3126
3127   /* Don't support anything other than POST_INC or REG addressing for
3128      AdvSIMD.  */
3129   if (aarch64_vector_mode_p (mode)
3130       && (code != POST_INC && code != REG))
3131     return false;
3132
3133   switch (code)
3134     {
3135     case REG:
3136     case SUBREG:
3137       info->type = ADDRESS_REG_IMM;
3138       info->base = x;
3139       info->offset = const0_rtx;
3140       return aarch64_base_register_rtx_p (x, strict_p);
3141
3142     case PLUS:
3143       op0 = XEXP (x, 0);
3144       op1 = XEXP (x, 1);
3145       if (GET_MODE_SIZE (mode) != 0
3146           && CONST_INT_P (op1)
3147           && aarch64_base_register_rtx_p (op0, strict_p))
3148         {
3149           HOST_WIDE_INT offset = INTVAL (op1);
3150
3151           info->type = ADDRESS_REG_IMM;
3152           info->base = op0;
3153           info->offset = op1;
3154
3155           /* TImode and TFmode values are allowed in both pairs of X
3156              registers and individual Q registers.  The available
3157              address modes are:
3158              X,X: 7-bit signed scaled offset
3159              Q:   9-bit signed offset
3160              We conservatively require an offset representable in either mode.
3161            */
3162           if (mode == TImode || mode == TFmode)
3163             return (offset_7bit_signed_scaled_p (mode, offset)
3164                     && offset_9bit_signed_unscaled_p (mode, offset));
3165
3166           if (outer_code == PARALLEL)
3167             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3168                     && offset_7bit_signed_scaled_p (mode, offset));
3169           else
3170             return (offset_9bit_signed_unscaled_p (mode, offset)
3171                     || offset_12bit_unsigned_scaled_p (mode, offset));
3172         }
3173
3174       if (allow_reg_index_p)
3175         {
3176           /* Look for base + (scaled/extended) index register.  */
3177           if (aarch64_base_register_rtx_p (op0, strict_p)
3178               && aarch64_classify_index (info, op1, mode, strict_p))
3179             {
3180               info->base = op0;
3181               return true;
3182             }
3183           if (aarch64_base_register_rtx_p (op1, strict_p)
3184               && aarch64_classify_index (info, op0, mode, strict_p))
3185             {
3186               info->base = op1;
3187               return true;
3188             }
3189         }
3190
3191       return false;
3192
3193     case POST_INC:
3194     case POST_DEC:
3195     case PRE_INC:
3196     case PRE_DEC:
3197       info->type = ADDRESS_REG_WB;
3198       info->base = XEXP (x, 0);
3199       info->offset = NULL_RTX;
3200       return aarch64_base_register_rtx_p (info->base, strict_p);
3201
3202     case POST_MODIFY:
3203     case PRE_MODIFY:
3204       info->type = ADDRESS_REG_WB;
3205       info->base = XEXP (x, 0);
3206       if (GET_CODE (XEXP (x, 1)) == PLUS
3207           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3208           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3209           && aarch64_base_register_rtx_p (info->base, strict_p))
3210         {
3211           HOST_WIDE_INT offset;
3212           info->offset = XEXP (XEXP (x, 1), 1);
3213           offset = INTVAL (info->offset);
3214
3215           /* TImode and TFmode values are allowed in both pairs of X
3216              registers and individual Q registers.  The available
3217              address modes are:
3218              X,X: 7-bit signed scaled offset
3219              Q:   9-bit signed offset
3220              We conservatively require an offset representable in either mode.
3221            */
3222           if (mode == TImode || mode == TFmode)
3223             return (offset_7bit_signed_scaled_p (mode, offset)
3224                     && offset_9bit_signed_unscaled_p (mode, offset));
3225
3226           if (outer_code == PARALLEL)
3227             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3228                     && offset_7bit_signed_scaled_p (mode, offset));
3229           else
3230             return offset_9bit_signed_unscaled_p (mode, offset);
3231         }
3232       return false;
3233
3234     case CONST:
3235     case SYMBOL_REF:
3236     case LABEL_REF:
3237       /* load literal: pc-relative constant pool entry.  Only supported
3238          for SI mode or larger.  */
3239       info->type = ADDRESS_SYMBOLIC;
3240       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3241         {
3242           rtx sym, addend;
3243
3244           split_const (x, &sym, &addend);
3245           return (GET_CODE (sym) == LABEL_REF
3246                   || (GET_CODE (sym) == SYMBOL_REF
3247                       && CONSTANT_POOL_ADDRESS_P (sym)));
3248         }
3249       return false;
3250
3251     case LO_SUM:
3252       info->type = ADDRESS_LO_SUM;
3253       info->base = XEXP (x, 0);
3254       info->offset = XEXP (x, 1);
3255       if (allow_reg_index_p
3256           && aarch64_base_register_rtx_p (info->base, strict_p))
3257         {
3258           rtx sym, offs;
3259           split_const (info->offset, &sym, &offs);
3260           if (GET_CODE (sym) == SYMBOL_REF
3261               && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3262                   == SYMBOL_SMALL_ABSOLUTE))
3263             {
3264               /* The symbol and offset must be aligned to the access size.  */
3265               unsigned int align;
3266               unsigned int ref_size;
3267
3268               if (CONSTANT_POOL_ADDRESS_P (sym))
3269                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3270               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3271                 {
3272                   tree exp = SYMBOL_REF_DECL (sym);
3273                   align = TYPE_ALIGN (TREE_TYPE (exp));
3274                   align = CONSTANT_ALIGNMENT (exp, align);
3275                 }
3276               else if (SYMBOL_REF_DECL (sym))
3277                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3278               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3279                        && SYMBOL_REF_BLOCK (sym) != NULL)
3280                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3281               else
3282                 align = BITS_PER_UNIT;
3283
3284               ref_size = GET_MODE_SIZE (mode);
3285               if (ref_size == 0)
3286                 ref_size = GET_MODE_SIZE (DImode);
3287
3288               return ((INTVAL (offs) & (ref_size - 1)) == 0
3289                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3290             }
3291         }
3292       return false;
3293
3294     default:
3295       return false;
3296     }
3297 }
3298
3299 bool
3300 aarch64_symbolic_address_p (rtx x)
3301 {
3302   rtx offset;
3303
3304   split_const (x, &x, &offset);
3305   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3306 }
3307
3308 /* Classify the base of symbolic expression X, given that X appears in
3309    context CONTEXT.  */
3310
3311 enum aarch64_symbol_type
3312 aarch64_classify_symbolic_expression (rtx x,
3313                                       enum aarch64_symbol_context context)
3314 {
3315   rtx offset;
3316
3317   split_const (x, &x, &offset);
3318   return aarch64_classify_symbol (x, offset, context);
3319 }
3320
3321
3322 /* Return TRUE if X is a legitimate address for accessing memory in
3323    mode MODE.  */
3324 static bool
3325 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3326 {
3327   struct aarch64_address_info addr;
3328
3329   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3330 }
3331
3332 /* Return TRUE if X is a legitimate address for accessing memory in
3333    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3334    pair operation.  */
3335 bool
3336 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3337                               RTX_CODE outer_code, bool strict_p)
3338 {
3339   struct aarch64_address_info addr;
3340
3341   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3342 }
3343
3344 /* Return TRUE if rtx X is immediate constant 0.0 */
3345 bool
3346 aarch64_float_const_zero_rtx_p (rtx x)
3347 {
3348   REAL_VALUE_TYPE r;
3349
3350   if (GET_MODE (x) == VOIDmode)
3351     return false;
3352
3353   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3354   if (REAL_VALUE_MINUS_ZERO (r))
3355     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3356   return REAL_VALUES_EQUAL (r, dconst0);
3357 }
3358
3359 /* Return the fixed registers used for condition codes.  */
3360
3361 static bool
3362 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3363 {
3364   *p1 = CC_REGNUM;
3365   *p2 = INVALID_REGNUM;
3366   return true;
3367 }
3368
3369 enum machine_mode
3370 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3371 {
3372   /* All floating point compares return CCFP if it is an equality
3373      comparison, and CCFPE otherwise.  */
3374   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3375     {
3376       switch (code)
3377         {
3378         case EQ:
3379         case NE:
3380         case UNORDERED:
3381         case ORDERED:
3382         case UNLT:
3383         case UNLE:
3384         case UNGT:
3385         case UNGE:
3386         case UNEQ:
3387         case LTGT:
3388           return CCFPmode;
3389
3390         case LT:
3391         case LE:
3392         case GT:
3393         case GE:
3394           return CCFPEmode;
3395
3396         default:
3397           gcc_unreachable ();
3398         }
3399     }
3400
3401   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3402       && y == const0_rtx
3403       && (code == EQ || code == NE || code == LT || code == GE)
3404       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3405           || GET_CODE (x) == NEG))
3406     return CC_NZmode;
3407
3408   /* A compare with a shifted operand.  Because of canonicalization,
3409      the comparison will have to be swapped when we emit the assembly
3410      code.  */
3411   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3412       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3413       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3414           || GET_CODE (x) == LSHIFTRT
3415           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3416     return CC_SWPmode;
3417
3418   /* Similarly for a negated operand, but we can only do this for
3419      equalities.  */
3420   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3421       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3422       && (code == EQ || code == NE)
3423       && GET_CODE (x) == NEG)
3424     return CC_Zmode;
3425
3426   /* A compare of a mode narrower than SI mode against zero can be done
3427      by extending the value in the comparison.  */
3428   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3429       && y == const0_rtx)
3430     /* Only use sign-extension if we really need it.  */
3431     return ((code == GT || code == GE || code == LE || code == LT)
3432             ? CC_SESWPmode : CC_ZESWPmode);
3433
3434   /* For everything else, return CCmode.  */
3435   return CCmode;
3436 }
3437
3438 static unsigned
3439 aarch64_get_condition_code (rtx x)
3440 {
3441   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3442   enum rtx_code comp_code = GET_CODE (x);
3443
3444   if (GET_MODE_CLASS (mode) != MODE_CC)
3445     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3446
3447   switch (mode)
3448     {
3449     case CCFPmode:
3450     case CCFPEmode:
3451       switch (comp_code)
3452         {
3453         case GE: return AARCH64_GE;
3454         case GT: return AARCH64_GT;
3455         case LE: return AARCH64_LS;
3456         case LT: return AARCH64_MI;
3457         case NE: return AARCH64_NE;
3458         case EQ: return AARCH64_EQ;
3459         case ORDERED: return AARCH64_VC;
3460         case UNORDERED: return AARCH64_VS;
3461         case UNLT: return AARCH64_LT;
3462         case UNLE: return AARCH64_LE;
3463         case UNGT: return AARCH64_HI;
3464         case UNGE: return AARCH64_PL;
3465         default: gcc_unreachable ();
3466         }
3467       break;
3468
3469     case CCmode:
3470       switch (comp_code)
3471         {
3472         case NE: return AARCH64_NE;
3473         case EQ: return AARCH64_EQ;
3474         case GE: return AARCH64_GE;
3475         case GT: return AARCH64_GT;
3476         case LE: return AARCH64_LE;
3477         case LT: return AARCH64_LT;
3478         case GEU: return AARCH64_CS;
3479         case GTU: return AARCH64_HI;
3480         case LEU: return AARCH64_LS;
3481         case LTU: return AARCH64_CC;
3482         default: gcc_unreachable ();
3483         }
3484       break;
3485
3486     case CC_SWPmode:
3487     case CC_ZESWPmode:
3488     case CC_SESWPmode:
3489       switch (comp_code)
3490         {
3491         case NE: return AARCH64_NE;
3492         case EQ: return AARCH64_EQ;
3493         case GE: return AARCH64_LE;
3494         case GT: return AARCH64_LT;
3495         case LE: return AARCH64_GE;
3496         case LT: return AARCH64_GT;
3497         case GEU: return AARCH64_LS;
3498         case GTU: return AARCH64_CC;
3499         case LEU: return AARCH64_CS;
3500         case LTU: return AARCH64_HI;
3501         default: gcc_unreachable ();
3502         }
3503       break;
3504
3505     case CC_NZmode:
3506       switch (comp_code)
3507         {
3508         case NE: return AARCH64_NE;
3509         case EQ: return AARCH64_EQ;
3510         case GE: return AARCH64_PL;
3511         case LT: return AARCH64_MI;
3512         default: gcc_unreachable ();
3513         }
3514       break;
3515
3516     case CC_Zmode:
3517       switch (comp_code)
3518         {
3519         case NE: return AARCH64_NE;
3520         case EQ: return AARCH64_EQ;
3521         default: gcc_unreachable ();
3522         }
3523       break;
3524
3525     default:
3526       gcc_unreachable ();
3527       break;
3528     }
3529 }
3530
3531 static unsigned
3532 bit_count (unsigned HOST_WIDE_INT value)
3533 {
3534   unsigned count = 0;
3535
3536   while (value)
3537     {
3538       count++;
3539       value &= value - 1;
3540     }
3541
3542   return count;
3543 }
3544
3545 void
3546 aarch64_print_operand (FILE *f, rtx x, char code)
3547 {
3548   switch (code)
3549     {
3550     /* An integer or symbol address without a preceding # sign.  */
3551     case 'c':
3552       switch (GET_CODE (x))
3553         {
3554         case CONST_INT:
3555           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3556           break;
3557
3558         case SYMBOL_REF:
3559           output_addr_const (f, x);
3560           break;
3561
3562         case CONST:
3563           if (GET_CODE (XEXP (x, 0)) == PLUS
3564               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3565             {
3566               output_addr_const (f, x);
3567               break;
3568             }
3569           /* Fall through.  */
3570
3571         default:
3572           output_operand_lossage ("Unsupported operand for code '%c'", code);
3573         }
3574       break;
3575
3576     case 'e':
3577       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3578       {
3579         int n;
3580
3581         if (GET_CODE (x) != CONST_INT
3582             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3583           {
3584             output_operand_lossage ("invalid operand for '%%%c'", code);
3585             return;
3586           }
3587
3588         switch (n)
3589           {
3590           case 3:
3591             fputc ('b', f);
3592             break;
3593           case 4:
3594             fputc ('h', f);
3595             break;
3596           case 5:
3597             fputc ('w', f);
3598             break;
3599           default:
3600             output_operand_lossage ("invalid operand for '%%%c'", code);
3601             return;
3602           }
3603       }
3604       break;
3605
3606     case 'p':
3607       {
3608         int n;
3609
3610         /* Print N such that 2^N == X.  */
3611         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3612           {
3613             output_operand_lossage ("invalid operand for '%%%c'", code);
3614             return;
3615           }
3616
3617         asm_fprintf (f, "%d", n);
3618       }
3619       break;
3620
3621     case 'P':
3622       /* Print the number of non-zero bits in X (a const_int).  */
3623       if (GET_CODE (x) != CONST_INT)
3624         {
3625           output_operand_lossage ("invalid operand for '%%%c'", code);
3626           return;
3627         }
3628
3629       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3630       break;
3631
3632     case 'H':
3633       /* Print the higher numbered register of a pair (TImode) of regs.  */
3634       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3635         {
3636           output_operand_lossage ("invalid operand for '%%%c'", code);
3637           return;
3638         }
3639
3640       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3641       break;
3642
3643     case 'm':
3644       /* Print a condition (eq, ne, etc).  */
3645
3646       /* CONST_TRUE_RTX means always -- that's the default.  */
3647       if (x == const_true_rtx)
3648         return;
3649
3650       if (!COMPARISON_P (x))
3651         {
3652           output_operand_lossage ("invalid operand for '%%%c'", code);
3653           return;
3654         }
3655
3656       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3657       break;
3658
3659     case 'M':
3660       /* Print the inverse of a condition (eq <-> ne, etc).  */
3661
3662       /* CONST_TRUE_RTX means never -- that's the default.  */
3663       if (x == const_true_rtx)
3664         {
3665           fputs ("nv", f);
3666           return;
3667         }
3668
3669       if (!COMPARISON_P (x))
3670         {
3671           output_operand_lossage ("invalid operand for '%%%c'", code);
3672           return;
3673         }
3674
3675       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3676                                   (aarch64_get_condition_code (x))], f);
3677       break;
3678
3679     case 'b':
3680     case 'h':
3681     case 's':
3682     case 'd':
3683     case 'q':
3684       /* Print a scalar FP/SIMD register name.  */
3685       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3686         {
3687           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3688           return;
3689         }
3690       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3691       break;
3692
3693     case 'S':
3694     case 'T':
3695     case 'U':
3696     case 'V':
3697       /* Print the first FP/SIMD register name in a list.  */
3698       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3699         {
3700           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3701           return;
3702         }
3703       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3704       break;
3705
3706     case 'X':
3707       /* Print bottom 16 bits of integer constant in hex.  */
3708       if (GET_CODE (x) != CONST_INT)
3709         {
3710           output_operand_lossage ("invalid operand for '%%%c'", code);
3711           return;
3712         }
3713       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3714       break;
3715
3716     case 'w':
3717     case 'x':
3718       /* Print a general register name or the zero register (32-bit or
3719          64-bit).  */
3720       if (x == const0_rtx
3721           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3722         {
3723           asm_fprintf (f, "%czr", code);
3724           break;
3725         }
3726
3727       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3728         {
3729           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3730           break;
3731         }
3732
3733       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3734         {
3735           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3736           break;
3737         }
3738
3739       /* Fall through */
3740
3741     case 0:
3742       /* Print a normal operand, if it's a general register, then we
3743          assume DImode.  */
3744       if (x == NULL)
3745         {
3746           output_operand_lossage ("missing operand");
3747           return;
3748         }
3749
3750       switch (GET_CODE (x))
3751         {
3752         case REG:
3753           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3754           break;
3755
3756         case MEM:
3757           aarch64_memory_reference_mode = GET_MODE (x);
3758           output_address (XEXP (x, 0));
3759           break;
3760
3761         case LABEL_REF:
3762         case SYMBOL_REF:
3763           output_addr_const (asm_out_file, x);
3764           break;
3765
3766         case CONST_INT:
3767           asm_fprintf (f, "%wd", INTVAL (x));
3768           break;
3769
3770         case CONST_VECTOR:
3771           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3772             {
3773               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3774                                                             HOST_WIDE_INT_MIN,
3775                                                             HOST_WIDE_INT_MAX));
3776               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3777             }
3778           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3779             {
3780               fputc ('0', f);
3781             }
3782           else
3783             gcc_unreachable ();
3784           break;
3785
3786         case CONST_DOUBLE:
3787           /* CONST_DOUBLE can represent a double-width integer.
3788              In this case, the mode of x is VOIDmode.  */
3789           if (GET_MODE (x) == VOIDmode)
3790             ; /* Do Nothing.  */
3791           else if (aarch64_float_const_zero_rtx_p (x))
3792             {
3793               fputc ('0', f);
3794               break;
3795             }
3796           else if (aarch64_float_const_representable_p (x))
3797             {
3798 #define buf_size 20
3799               char float_buf[buf_size] = {'\0'};
3800               REAL_VALUE_TYPE r;
3801               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3802               real_to_decimal_for_mode (float_buf, &r,
3803                                         buf_size, buf_size,
3804                                         1, GET_MODE (x));
3805               asm_fprintf (asm_out_file, "%s", float_buf);
3806               break;
3807 #undef buf_size
3808             }
3809           output_operand_lossage ("invalid constant");
3810           return;
3811         default:
3812           output_operand_lossage ("invalid operand");
3813           return;
3814         }
3815       break;
3816
3817     case 'A':
3818       if (GET_CODE (x) == HIGH)
3819         x = XEXP (x, 0);
3820
3821       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3822         {
3823         case SYMBOL_SMALL_GOT:
3824           asm_fprintf (asm_out_file, ":got:");
3825           break;
3826
3827         case SYMBOL_SMALL_TLSGD:
3828           asm_fprintf (asm_out_file, ":tlsgd:");
3829           break;
3830
3831         case SYMBOL_SMALL_TLSDESC:
3832           asm_fprintf (asm_out_file, ":tlsdesc:");
3833           break;
3834
3835         case SYMBOL_SMALL_GOTTPREL:
3836           asm_fprintf (asm_out_file, ":gottprel:");
3837           break;
3838
3839         case SYMBOL_SMALL_TPREL:
3840           asm_fprintf (asm_out_file, ":tprel:");
3841           break;
3842
3843         case SYMBOL_TINY_GOT:
3844           gcc_unreachable ();
3845           break;
3846
3847         default:
3848           break;
3849         }
3850       output_addr_const (asm_out_file, x);
3851       break;
3852
3853     case 'L':
3854       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3855         {
3856         case SYMBOL_SMALL_GOT:
3857           asm_fprintf (asm_out_file, ":lo12:");
3858           break;
3859
3860         case SYMBOL_SMALL_TLSGD:
3861           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3862           break;
3863
3864         case SYMBOL_SMALL_TLSDESC:
3865           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3866           break;
3867
3868         case SYMBOL_SMALL_GOTTPREL:
3869           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3870           break;
3871
3872         case SYMBOL_SMALL_TPREL:
3873           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3874           break;
3875
3876         case SYMBOL_TINY_GOT:
3877           asm_fprintf (asm_out_file, ":got:");
3878           break;
3879
3880         default:
3881           break;
3882         }
3883       output_addr_const (asm_out_file, x);
3884       break;
3885
3886     case 'G':
3887
3888       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3889         {
3890         case SYMBOL_SMALL_TPREL:
3891           asm_fprintf (asm_out_file, ":tprel_hi12:");
3892           break;
3893         default:
3894           break;
3895         }
3896       output_addr_const (asm_out_file, x);
3897       break;
3898
3899     default:
3900       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3901       return;
3902     }
3903 }
3904
3905 void
3906 aarch64_print_operand_address (FILE *f, rtx x)
3907 {
3908   struct aarch64_address_info addr;
3909
3910   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3911                              MEM, true))
3912     switch (addr.type)
3913       {
3914       case ADDRESS_REG_IMM:
3915         if (addr.offset == const0_rtx)
3916           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3917         else
3918           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3919                        INTVAL (addr.offset));
3920         return;
3921
3922       case ADDRESS_REG_REG:
3923         if (addr.shift == 0)
3924           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3925                        reg_names [REGNO (addr.offset)]);
3926         else
3927           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3928                        reg_names [REGNO (addr.offset)], addr.shift);
3929         return;
3930
3931       case ADDRESS_REG_UXTW:
3932         if (addr.shift == 0)
3933           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3934                        REGNO (addr.offset) - R0_REGNUM);
3935         else
3936           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3937                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3938         return;
3939
3940       case ADDRESS_REG_SXTW:
3941         if (addr.shift == 0)
3942           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3943                        REGNO (addr.offset) - R0_REGNUM);
3944         else
3945           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3946                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3947         return;
3948
3949       case ADDRESS_REG_WB:
3950         switch (GET_CODE (x))
3951           {
3952           case PRE_INC:
3953             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3954                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3955             return;
3956           case POST_INC:
3957             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3958                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3959             return;
3960           case PRE_DEC:
3961             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3962                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3963             return;
3964           case POST_DEC:
3965             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3966                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3967             return;
3968           case PRE_MODIFY:
3969             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3970                          INTVAL (addr.offset));
3971             return;
3972           case POST_MODIFY:
3973             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3974                          INTVAL (addr.offset));
3975             return;
3976           default:
3977             break;
3978           }
3979         break;
3980
3981       case ADDRESS_LO_SUM:
3982         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3983         output_addr_const (f, addr.offset);
3984         asm_fprintf (f, "]");
3985         return;
3986
3987       case ADDRESS_SYMBOLIC:
3988         break;
3989       }
3990
3991   output_addr_const (f, x);
3992 }
3993
3994 bool
3995 aarch64_label_mentioned_p (rtx x)
3996 {
3997   const char *fmt;
3998   int i;
3999
4000   if (GET_CODE (x) == LABEL_REF)
4001     return true;
4002
4003   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4004      referencing instruction, but they are constant offsets, not
4005      symbols.  */
4006   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4007     return false;
4008
4009   fmt = GET_RTX_FORMAT (GET_CODE (x));
4010   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4011     {
4012       if (fmt[i] == 'E')
4013         {
4014           int j;
4015
4016           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4017             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4018               return 1;
4019         }
4020       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4021         return 1;
4022     }
4023
4024   return 0;
4025 }
4026
4027 /* Implement REGNO_REG_CLASS.  */
4028
4029 enum reg_class
4030 aarch64_regno_regclass (unsigned regno)
4031 {
4032   if (GP_REGNUM_P (regno))
4033     return CORE_REGS;
4034
4035   if (regno == SP_REGNUM)
4036     return STACK_REG;
4037
4038   if (regno == FRAME_POINTER_REGNUM
4039       || regno == ARG_POINTER_REGNUM)
4040     return POINTER_REGS;
4041
4042   if (FP_REGNUM_P (regno))
4043     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4044
4045   return NO_REGS;
4046 }
4047
4048 /* Try a machine-dependent way of reloading an illegitimate address
4049    operand.  If we find one, push the reload and return the new rtx.  */
4050
4051 rtx
4052 aarch64_legitimize_reload_address (rtx *x_p,
4053                                    enum machine_mode mode,
4054                                    int opnum, int type,
4055                                    int ind_levels ATTRIBUTE_UNUSED)
4056 {
4057   rtx x = *x_p;
4058
4059   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4060   if (aarch64_vector_mode_p (mode)
4061       && GET_CODE (x) == PLUS
4062       && REG_P (XEXP (x, 0))
4063       && CONST_INT_P (XEXP (x, 1)))
4064     {
4065       rtx orig_rtx = x;
4066       x = copy_rtx (x);
4067       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4068                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4069                    opnum, (enum reload_type) type);
4070       return x;
4071     }
4072
4073   /* We must recognize output that we have already generated ourselves.  */
4074   if (GET_CODE (x) == PLUS
4075       && GET_CODE (XEXP (x, 0)) == PLUS
4076       && REG_P (XEXP (XEXP (x, 0), 0))
4077       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4078       && CONST_INT_P (XEXP (x, 1)))
4079     {
4080       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4081                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4082                    opnum, (enum reload_type) type);
4083       return x;
4084     }
4085
4086   /* We wish to handle large displacements off a base register by splitting
4087      the addend across an add and the mem insn.  This can cut the number of
4088      extra insns needed from 3 to 1.  It is only useful for load/store of a
4089      single register with 12 bit offset field.  */
4090   if (GET_CODE (x) == PLUS
4091       && REG_P (XEXP (x, 0))
4092       && CONST_INT_P (XEXP (x, 1))
4093       && HARD_REGISTER_P (XEXP (x, 0))
4094       && mode != TImode
4095       && mode != TFmode
4096       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4097     {
4098       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4099       HOST_WIDE_INT low = val & 0xfff;
4100       HOST_WIDE_INT high = val - low;
4101       HOST_WIDE_INT offs;
4102       rtx cst;
4103       enum machine_mode xmode = GET_MODE (x);
4104
4105       /* In ILP32, xmode can be either DImode or SImode.  */
4106       gcc_assert (xmode == DImode || xmode == SImode);
4107
4108       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4109          BLKmode alignment.  */
4110       if (GET_MODE_SIZE (mode) == 0)
4111         return NULL_RTX;
4112
4113       offs = low % GET_MODE_SIZE (mode);
4114
4115       /* Align misaligned offset by adjusting high part to compensate.  */
4116       if (offs != 0)
4117         {
4118           if (aarch64_uimm12_shift (high + offs))
4119             {
4120               /* Align down.  */
4121               low = low - offs;
4122               high = high + offs;
4123             }
4124           else
4125             {
4126               /* Align up.  */
4127               offs = GET_MODE_SIZE (mode) - offs;
4128               low = low + offs;
4129               high = high + (low & 0x1000) - offs;
4130               low &= 0xfff;
4131             }
4132         }
4133
4134       /* Check for overflow.  */
4135       if (high + low != val)
4136         return NULL_RTX;
4137
4138       cst = GEN_INT (high);
4139       if (!aarch64_uimm12_shift (high))
4140         cst = force_const_mem (xmode, cst);
4141
4142       /* Reload high part into base reg, leaving the low part
4143          in the mem instruction.
4144          Note that replacing this gen_rtx_PLUS with plus_constant is
4145          wrong in this case because we rely on the
4146          (plus (plus reg c1) c2) structure being preserved so that
4147          XEXP (*p, 0) in push_reload below uses the correct term.  */
4148       x = gen_rtx_PLUS (xmode,
4149                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4150                         GEN_INT (low));
4151
4152       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4153                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4154                    opnum, (enum reload_type) type);
4155       return x;
4156     }
4157
4158   return NULL_RTX;
4159 }
4160
4161
4162 static reg_class_t
4163 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4164                           reg_class_t rclass,
4165                           enum machine_mode mode,
4166                           secondary_reload_info *sri)
4167 {
4168   /* Without the TARGET_SIMD instructions we cannot move a Q register
4169      to a Q register directly.  We need a scratch.  */
4170   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4171       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4172       && reg_class_subset_p (rclass, FP_REGS))
4173     {
4174       if (mode == TFmode)
4175         sri->icode = CODE_FOR_aarch64_reload_movtf;
4176       else if (mode == TImode)
4177         sri->icode = CODE_FOR_aarch64_reload_movti;
4178       return NO_REGS;
4179     }
4180
4181   /* A TFmode or TImode memory access should be handled via an FP_REGS
4182      because AArch64 has richer addressing modes for LDR/STR instructions
4183      than LDP/STP instructions.  */
4184   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4185       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4186     return FP_REGS;
4187
4188   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4189       return CORE_REGS;
4190
4191   return NO_REGS;
4192 }
4193
4194 static bool
4195 aarch64_can_eliminate (const int from, const int to)
4196 {
4197   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4198      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4199
4200   if (frame_pointer_needed)
4201     {
4202       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4203         return true;
4204       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4205         return false;
4206       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4207           && !cfun->calls_alloca)
4208         return true;
4209       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4210         return true;
4211
4212       return false;
4213     }
4214
4215   return true;
4216 }
4217
4218 HOST_WIDE_INT
4219 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4220 {
4221   HOST_WIDE_INT frame_size;
4222   HOST_WIDE_INT offset;
4223
4224   aarch64_layout_frame ();
4225   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4226                 + crtl->outgoing_args_size
4227                 + cfun->machine->saved_varargs_size);
4228
4229    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4230    offset = frame_size;
4231
4232    if (to == HARD_FRAME_POINTER_REGNUM)
4233      {
4234        if (from == ARG_POINTER_REGNUM)
4235          return offset - crtl->outgoing_args_size;
4236
4237        if (from == FRAME_POINTER_REGNUM)
4238          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4239      }
4240
4241    if (to == STACK_POINTER_REGNUM)
4242      {
4243        if (from == FRAME_POINTER_REGNUM)
4244          {
4245            HOST_WIDE_INT elim = crtl->outgoing_args_size
4246                               + cfun->machine->frame.saved_regs_size
4247                               + get_frame_size ()
4248                               - cfun->machine->frame.fp_lr_offset;
4249            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4250            return elim;
4251          }
4252      }
4253
4254    return offset;
4255 }
4256
4257
4258 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4259    previous frame.  */
4260
4261 rtx
4262 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4263 {
4264   if (count != 0)
4265     return const0_rtx;
4266   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4267 }
4268
4269
4270 static void
4271 aarch64_asm_trampoline_template (FILE *f)
4272 {
4273   if (TARGET_ILP32)
4274     {
4275       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4276       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4277     }
4278   else
4279     {
4280       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4281       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4282     }
4283   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4284   assemble_aligned_integer (4, const0_rtx);
4285   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4286   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4287 }
4288
4289 static void
4290 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4291 {
4292   rtx fnaddr, mem, a_tramp;
4293   const int tramp_code_sz = 16;
4294
4295   /* Don't need to copy the trailing D-words, we fill those in below.  */
4296   emit_block_move (m_tramp, assemble_trampoline_template (),
4297                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4298   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4299   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4300   if (GET_MODE (fnaddr) != ptr_mode)
4301     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4302   emit_move_insn (mem, fnaddr);
4303
4304   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4305   emit_move_insn (mem, chain_value);
4306
4307   /* XXX We should really define a "clear_cache" pattern and use
4308      gen_clear_cache().  */
4309   a_tramp = XEXP (m_tramp, 0);
4310   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4311                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4312                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4313                      ptr_mode);
4314 }
4315
4316 static unsigned char
4317 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4318 {
4319   switch (regclass)
4320     {
4321     case CORE_REGS:
4322     case POINTER_REGS:
4323     case GENERAL_REGS:
4324     case ALL_REGS:
4325     case FP_REGS:
4326     case FP_LO_REGS:
4327       return
4328         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4329                                        (GET_MODE_SIZE (mode) + 7) / 8;
4330     case STACK_REG:
4331       return 1;
4332
4333     case NO_REGS:
4334       return 0;
4335
4336     default:
4337       break;
4338     }
4339   gcc_unreachable ();
4340 }
4341
4342 static reg_class_t
4343 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4344 {
4345   if (regclass == POINTER_REGS)
4346     return GENERAL_REGS;
4347
4348   if (regclass == STACK_REG)
4349     {
4350       if (REG_P(x)
4351           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4352           return regclass;
4353
4354       return NO_REGS;
4355     }
4356
4357   /* If it's an integer immediate that MOVI can't handle, then
4358      FP_REGS is not an option, so we return NO_REGS instead.  */
4359   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4360       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4361     return NO_REGS;
4362
4363   /* Register eliminiation can result in a request for
4364      SP+constant->FP_REGS.  We cannot support such operations which
4365      use SP as source and an FP_REG as destination, so reject out
4366      right now.  */
4367   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4368     {
4369       rtx lhs = XEXP (x, 0);
4370
4371       /* Look through a possible SUBREG introduced by ILP32.  */
4372       if (GET_CODE (lhs) == SUBREG)
4373         lhs = SUBREG_REG (lhs);
4374
4375       gcc_assert (REG_P (lhs));
4376       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4377                                       POINTER_REGS));
4378       return NO_REGS;
4379     }
4380
4381   return regclass;
4382 }
4383
4384 void
4385 aarch64_asm_output_labelref (FILE* f, const char *name)
4386 {
4387   asm_fprintf (f, "%U%s", name);
4388 }
4389
4390 static void
4391 aarch64_elf_asm_constructor (rtx symbol, int priority)
4392 {
4393   if (priority == DEFAULT_INIT_PRIORITY)
4394     default_ctor_section_asm_out_constructor (symbol, priority);
4395   else
4396     {
4397       section *s;
4398       char buf[18];
4399       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4400       s = get_section (buf, SECTION_WRITE, NULL);
4401       switch_to_section (s);
4402       assemble_align (POINTER_SIZE);
4403       assemble_aligned_integer (POINTER_BYTES, symbol);
4404     }
4405 }
4406
4407 static void
4408 aarch64_elf_asm_destructor (rtx symbol, int priority)
4409 {
4410   if (priority == DEFAULT_INIT_PRIORITY)
4411     default_dtor_section_asm_out_destructor (symbol, priority);
4412   else
4413     {
4414       section *s;
4415       char buf[18];
4416       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4417       s = get_section (buf, SECTION_WRITE, NULL);
4418       switch_to_section (s);
4419       assemble_align (POINTER_SIZE);
4420       assemble_aligned_integer (POINTER_BYTES, symbol);
4421     }
4422 }
4423
4424 const char*
4425 aarch64_output_casesi (rtx *operands)
4426 {
4427   char buf[100];
4428   char label[100];
4429   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4430   int index;
4431   static const char *const patterns[4][2] =
4432   {
4433     {
4434       "ldrb\t%w3, [%0,%w1,uxtw]",
4435       "add\t%3, %4, %w3, sxtb #2"
4436     },
4437     {
4438       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4439       "add\t%3, %4, %w3, sxth #2"
4440     },
4441     {
4442       "ldr\t%w3, [%0,%w1,uxtw #2]",
4443       "add\t%3, %4, %w3, sxtw #2"
4444     },
4445     /* We assume that DImode is only generated when not optimizing and
4446        that we don't really need 64-bit address offsets.  That would
4447        imply an object file with 8GB of code in a single function!  */
4448     {
4449       "ldr\t%w3, [%0,%w1,uxtw #2]",
4450       "add\t%3, %4, %w3, sxtw #2"
4451     }
4452   };
4453
4454   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4455
4456   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4457
4458   gcc_assert (index >= 0 && index <= 3);
4459
4460   /* Need to implement table size reduction, by chaning the code below.  */
4461   output_asm_insn (patterns[index][0], operands);
4462   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4463   snprintf (buf, sizeof (buf),
4464             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4465   output_asm_insn (buf, operands);
4466   output_asm_insn (patterns[index][1], operands);
4467   output_asm_insn ("br\t%3", operands);
4468   assemble_label (asm_out_file, label);
4469   return "";
4470 }
4471
4472
4473 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4474    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4475    operator.  */
4476
4477 int
4478 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4479 {
4480   if (shift >= 0 && shift <= 3)
4481     {
4482       int size;
4483       for (size = 8; size <= 32; size *= 2)
4484         {
4485           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4486           if (mask == bits << shift)
4487             return size;
4488         }
4489     }
4490   return 0;
4491 }
4492
4493 static bool
4494 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4495                                    const_rtx x ATTRIBUTE_UNUSED)
4496 {
4497   /* We can't use blocks for constants when we're using a per-function
4498      constant pool.  */
4499   return false;
4500 }
4501
4502 static section *
4503 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4504                             rtx x ATTRIBUTE_UNUSED,
4505                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4506 {
4507   /* Force all constant pool entries into the current function section.  */
4508   return function_section (current_function_decl);
4509 }
4510
4511
4512 /* Costs.  */
4513
4514 /* Helper function for rtx cost calculation.  Strip a shift expression
4515    from X.  Returns the inner operand if successful, or the original
4516    expression on failure.  */
4517 static rtx
4518 aarch64_strip_shift (rtx x)
4519 {
4520   rtx op = x;
4521
4522   if ((GET_CODE (op) == ASHIFT
4523        || GET_CODE (op) == ASHIFTRT
4524        || GET_CODE (op) == LSHIFTRT)
4525       && CONST_INT_P (XEXP (op, 1)))
4526     return XEXP (op, 0);
4527
4528   if (GET_CODE (op) == MULT
4529       && CONST_INT_P (XEXP (op, 1))
4530       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4531     return XEXP (op, 0);
4532
4533   return x;
4534 }
4535
4536 /* Helper function for rtx cost calculation.  Strip an extend
4537    expression from X.  Returns the inner operand if successful, or the
4538    original expression on failure.  We deal with a number of possible
4539    canonicalization variations here.  */
4540 static rtx
4541 aarch64_strip_extend (rtx x)
4542 {
4543   rtx op = x;
4544
4545   /* Zero and sign extraction of a widened value.  */
4546   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4547       && XEXP (op, 2) == const0_rtx
4548       && GET_CODE (XEXP (op, 0)) == MULT
4549       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4550                                          XEXP (op, 1)))
4551     return XEXP (XEXP (op, 0), 0);
4552
4553   /* It can also be represented (for zero-extend) as an AND with an
4554      immediate.  */
4555   if (GET_CODE (op) == AND
4556       && GET_CODE (XEXP (op, 0)) == MULT
4557       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4558       && CONST_INT_P (XEXP (op, 1))
4559       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4560                            INTVAL (XEXP (op, 1))) != 0)
4561     return XEXP (XEXP (op, 0), 0);
4562
4563   /* Now handle extended register, as this may also have an optional
4564      left shift by 1..4.  */
4565   if (GET_CODE (op) == ASHIFT
4566       && CONST_INT_P (XEXP (op, 1))
4567       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4568     op = XEXP (op, 0);
4569
4570   if (GET_CODE (op) == ZERO_EXTEND
4571       || GET_CODE (op) == SIGN_EXTEND)
4572     op = XEXP (op, 0);
4573
4574   if (op != x)
4575     return op;
4576
4577   return x;
4578 }
4579
4580 /* Helper function for rtx cost calculation.  Calculate the cost of
4581    a MULT, which may be part of a multiply-accumulate rtx.  Return
4582    the calculated cost of the expression, recursing manually in to
4583    operands where needed.  */
4584
4585 static int
4586 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4587 {
4588   rtx op0, op1;
4589   const struct cpu_cost_table *extra_cost
4590     = aarch64_tune_params->insn_extra_cost;
4591   int cost = 0;
4592   bool maybe_fma = (outer == PLUS || outer == MINUS);
4593   enum machine_mode mode = GET_MODE (x);
4594
4595   gcc_checking_assert (code == MULT);
4596
4597   op0 = XEXP (x, 0);
4598   op1 = XEXP (x, 1);
4599
4600   if (VECTOR_MODE_P (mode))
4601     mode = GET_MODE_INNER (mode);
4602
4603   /* Integer multiply/fma.  */
4604   if (GET_MODE_CLASS (mode) == MODE_INT)
4605     {
4606       /* The multiply will be canonicalized as a shift, cost it as such.  */
4607       if (CONST_INT_P (op1)
4608           && exact_log2 (INTVAL (op1)) > 0)
4609         {
4610           if (speed)
4611             {
4612               if (maybe_fma)
4613                 /* ADD (shifted register).  */
4614                 cost += extra_cost->alu.arith_shift;
4615               else
4616                 /* LSL (immediate).  */
4617                 cost += extra_cost->alu.shift;
4618             }
4619
4620           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4621
4622           return cost;
4623         }
4624
4625       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4626       if ((GET_CODE (op0) == ZERO_EXTEND
4627            && GET_CODE (op1) == ZERO_EXTEND)
4628           || (GET_CODE (op0) == SIGN_EXTEND
4629               && GET_CODE (op1) == SIGN_EXTEND))
4630         {
4631           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4632                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4633
4634           if (speed)
4635             {
4636               if (maybe_fma)
4637                 /* MADD/SMADDL/UMADDL.  */
4638                 cost += extra_cost->mult[0].extend_add;
4639               else
4640                 /* MUL/SMULL/UMULL.  */
4641                 cost += extra_cost->mult[0].extend;
4642             }
4643
4644           return cost;
4645         }
4646
4647       /* This is either an integer multiply or an FMA.  In both cases
4648          we want to recurse and cost the operands.  */
4649       cost += rtx_cost (op0, MULT, 0, speed)
4650               + rtx_cost (op1, MULT, 1, speed);
4651
4652       if (speed)
4653         {
4654           if (maybe_fma)
4655             /* MADD.  */
4656             cost += extra_cost->mult[mode == DImode].add;
4657           else
4658             /* MUL.  */
4659             cost += extra_cost->mult[mode == DImode].simple;
4660         }
4661
4662       return cost;
4663     }
4664   else
4665     {
4666       if (speed)
4667         {
4668           /* Floating-point FMA can also support negations of the
4669              operands.  */
4670           if (GET_CODE (op0) == NEG)
4671             {
4672               maybe_fma = true;
4673               op0 = XEXP (op0, 0);
4674             }
4675           if (GET_CODE (op1) == NEG)
4676             {
4677               maybe_fma = true;
4678               op1 = XEXP (op1, 0);
4679             }
4680
4681           if (maybe_fma)
4682             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4683             cost += extra_cost->fp[mode == DFmode].fma;
4684           else
4685             /* FMUL.  */
4686             cost += extra_cost->fp[mode == DFmode].mult;
4687         }
4688
4689       cost += rtx_cost (op0, MULT, 0, speed)
4690               + rtx_cost (op1, MULT, 1, speed);
4691       return cost;
4692     }
4693 }
4694
4695 static int
4696 aarch64_address_cost (rtx x,
4697                       enum machine_mode mode,
4698                       addr_space_t as ATTRIBUTE_UNUSED,
4699                       bool speed)
4700 {
4701   enum rtx_code c = GET_CODE (x);
4702   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4703   struct aarch64_address_info info;
4704   int cost = 0;
4705   info.shift = 0;
4706
4707   if (!aarch64_classify_address (&info, x, mode, c, false))
4708     {
4709       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4710         {
4711           /* This is a CONST or SYMBOL ref which will be split
4712              in a different way depending on the code model in use.
4713              Cost it through the generic infrastructure.  */
4714           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4715           /* Divide through by the cost of one instruction to
4716              bring it to the same units as the address costs.  */
4717           cost_symbol_ref /= COSTS_N_INSNS (1);
4718           /* The cost is then the cost of preparing the address,
4719              followed by an immediate (possibly 0) offset.  */
4720           return cost_symbol_ref + addr_cost->imm_offset;
4721         }
4722       else
4723         {
4724           /* This is most likely a jump table from a case
4725              statement.  */
4726           return addr_cost->register_offset;
4727         }
4728     }
4729
4730   switch (info.type)
4731     {
4732       case ADDRESS_LO_SUM:
4733       case ADDRESS_SYMBOLIC:
4734       case ADDRESS_REG_IMM:
4735         cost += addr_cost->imm_offset;
4736         break;
4737
4738       case ADDRESS_REG_WB:
4739         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4740           cost += addr_cost->pre_modify;
4741         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4742           cost += addr_cost->post_modify;
4743         else
4744           gcc_unreachable ();
4745
4746         break;
4747
4748       case ADDRESS_REG_REG:
4749         cost += addr_cost->register_offset;
4750         break;
4751
4752       case ADDRESS_REG_UXTW:
4753       case ADDRESS_REG_SXTW:
4754         cost += addr_cost->register_extend;
4755         break;
4756
4757       default:
4758         gcc_unreachable ();
4759     }
4760
4761
4762   if (info.shift > 0)
4763     {
4764       /* For the sake of calculating the cost of the shifted register
4765          component, we can treat same sized modes in the same way.  */
4766       switch (GET_MODE_BITSIZE (mode))
4767         {
4768           case 16:
4769             cost += addr_cost->addr_scale_costs.hi;
4770             break;
4771
4772           case 32:
4773             cost += addr_cost->addr_scale_costs.si;
4774             break;
4775
4776           case 64:
4777             cost += addr_cost->addr_scale_costs.di;
4778             break;
4779
4780           /* We can't tell, or this is a 128-bit vector.  */
4781           default:
4782             cost += addr_cost->addr_scale_costs.ti;
4783             break;
4784         }
4785     }
4786
4787   return cost;
4788 }
4789
4790 /* Calculate the cost of calculating X, storing it in *COST.  Result
4791    is true if the total cost of the operation has now been calculated.  */
4792 static bool
4793 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4794                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4795 {
4796   rtx op0, op1;
4797   const struct cpu_cost_table *extra_cost
4798     = aarch64_tune_params->insn_extra_cost;
4799   enum machine_mode mode = GET_MODE (x);
4800
4801   /* By default, assume that everything has equivalent cost to the
4802      cheapest instruction.  Any additional costs are applied as a delta
4803      above this default.  */
4804   *cost = COSTS_N_INSNS (1);
4805
4806   /* TODO: The cost infrastructure currently does not handle
4807      vector operations.  Assume that all vector operations
4808      are equally expensive.  */
4809   if (VECTOR_MODE_P (mode))
4810     {
4811       if (speed)
4812         *cost += extra_cost->vect.alu;
4813       return true;
4814     }
4815
4816   switch (code)
4817     {
4818     case SET:
4819       /* The cost depends entirely on the operands to SET.  */
4820       *cost = 0;
4821       op0 = SET_DEST (x);
4822       op1 = SET_SRC (x);
4823
4824       switch (GET_CODE (op0))
4825         {
4826         case MEM:
4827           if (speed)
4828             {
4829               rtx address = XEXP (op0, 0);
4830               if (GET_MODE_CLASS (mode) == MODE_INT)
4831                 *cost += extra_cost->ldst.store;
4832               else if (mode == SFmode)
4833                 *cost += extra_cost->ldst.storef;
4834               else if (mode == DFmode)
4835                 *cost += extra_cost->ldst.stored;
4836
4837               *cost +=
4838                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4839                                                      0, speed));
4840             }
4841
4842           *cost += rtx_cost (op1, SET, 1, speed);
4843           return true;
4844
4845         case SUBREG:
4846           if (! REG_P (SUBREG_REG (op0)))
4847             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4848
4849           /* Fall through.  */
4850         case REG:
4851           /* const0_rtx is in general free, but we will use an
4852              instruction to set a register to 0.  */
4853           if (REG_P (op1) || op1 == const0_rtx)
4854             {
4855               /* The cost is 1 per register copied.  */
4856               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4857                               / UNITS_PER_WORD;
4858               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4859             }
4860           else
4861             /* Cost is just the cost of the RHS of the set.  */
4862             *cost += rtx_cost (op1, SET, 1, speed);
4863           return true;
4864
4865         case ZERO_EXTRACT:
4866         case SIGN_EXTRACT:
4867           /* Bit-field insertion.  Strip any redundant widening of
4868              the RHS to meet the width of the target.  */
4869           if (GET_CODE (op1) == SUBREG)
4870             op1 = SUBREG_REG (op1);
4871           if ((GET_CODE (op1) == ZERO_EXTEND
4872                || GET_CODE (op1) == SIGN_EXTEND)
4873               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4874               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4875                   >= INTVAL (XEXP (op0, 1))))
4876             op1 = XEXP (op1, 0);
4877
4878           if (CONST_INT_P (op1))
4879             {
4880               /* MOV immediate is assumed to always be cheap.  */
4881               *cost = COSTS_N_INSNS (1);
4882             }
4883           else
4884             {
4885               /* BFM.  */
4886               if (speed)
4887                 *cost += extra_cost->alu.bfi;
4888               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4889             }
4890
4891           return true;
4892
4893         default:
4894           /* We can't make sense of this, assume default cost.  */
4895           *cost = COSTS_N_INSNS (1);
4896           break;
4897         }
4898       return false;
4899
4900     case CONST_INT:
4901       /* If an instruction can incorporate a constant within the
4902          instruction, the instruction's expression avoids calling
4903          rtx_cost() on the constant.  If rtx_cost() is called on a
4904          constant, then it is usually because the constant must be
4905          moved into a register by one or more instructions.
4906
4907          The exception is constant 0, which can be expressed
4908          as XZR/WZR and is therefore free.  The exception to this is
4909          if we have (set (reg) (const0_rtx)) in which case we must cost
4910          the move.  However, we can catch that when we cost the SET, so
4911          we don't need to consider that here.  */
4912       if (x == const0_rtx)
4913         *cost = 0;
4914       else
4915         {
4916           /* To an approximation, building any other constant is
4917              proportionally expensive to the number of instructions
4918              required to build that constant.  This is true whether we
4919              are compiling for SPEED or otherwise.  */
4920           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4921                                                          INTVAL (x),
4922                                                          false));
4923         }
4924       return true;
4925
4926     case CONST_DOUBLE:
4927       if (speed)
4928         {
4929           /* mov[df,sf]_aarch64.  */
4930           if (aarch64_float_const_representable_p (x))
4931             /* FMOV (scalar immediate).  */
4932             *cost += extra_cost->fp[mode == DFmode].fpconst;
4933           else if (!aarch64_float_const_zero_rtx_p (x))
4934             {
4935               /* This will be a load from memory.  */
4936               if (mode == DFmode)
4937                 *cost += extra_cost->ldst.loadd;
4938               else
4939                 *cost += extra_cost->ldst.loadf;
4940             }
4941           else
4942             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
4943                or MOV v0.s[0], wzr - neither of which are modeled by the
4944                cost tables.  Just use the default cost.  */
4945             {
4946             }
4947         }
4948
4949       return true;
4950
4951     case MEM:
4952       if (speed)
4953         {
4954           /* For loads we want the base cost of a load, plus an
4955              approximation for the additional cost of the addressing
4956              mode.  */
4957           rtx address = XEXP (x, 0);
4958           if (GET_MODE_CLASS (mode) == MODE_INT)
4959             *cost += extra_cost->ldst.load;
4960           else if (mode == SFmode)
4961             *cost += extra_cost->ldst.loadf;
4962           else if (mode == DFmode)
4963             *cost += extra_cost->ldst.loadd;
4964
4965           *cost +=
4966                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4967                                                      0, speed));
4968         }
4969
4970       return true;
4971
4972     case NEG:
4973       op0 = XEXP (x, 0);
4974
4975       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4976        {
4977           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4978               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4979             {
4980               /* CSETM.  */
4981               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4982               return true;
4983             }
4984
4985           /* Cost this as SUB wzr, X.  */
4986           op0 = CONST0_RTX (GET_MODE (x));
4987           op1 = XEXP (x, 0);
4988           goto cost_minus;
4989         }
4990
4991       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4992         {
4993           /* Support (neg(fma...)) as a single instruction only if
4994              sign of zeros is unimportant.  This matches the decision
4995              making in aarch64.md.  */
4996           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4997             {
4998               /* FNMADD.  */
4999               *cost = rtx_cost (op0, NEG, 0, speed);
5000               return true;
5001             }
5002           if (speed)
5003             /* FNEG.  */
5004             *cost += extra_cost->fp[mode == DFmode].neg;
5005           return false;
5006         }
5007
5008       return false;
5009
5010     case COMPARE:
5011       op0 = XEXP (x, 0);
5012       op1 = XEXP (x, 1);
5013
5014       if (op1 == const0_rtx
5015           && GET_CODE (op0) == AND)
5016         {
5017           x = op0;
5018           goto cost_logic;
5019         }
5020
5021       /* Comparisons can work if the order is swapped.
5022          Canonicalization puts the more complex operation first, but
5023          we want it in op1.  */
5024       if (! (REG_P (op0)
5025              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5026         {
5027           op0 = XEXP (x, 1);
5028           op1 = XEXP (x, 0);
5029         }
5030       goto cost_minus;
5031
5032     case MINUS:
5033       {
5034         op0 = XEXP (x, 0);
5035         op1 = XEXP (x, 1);
5036
5037 cost_minus:
5038         /* Detect valid immediates.  */
5039         if ((GET_MODE_CLASS (mode) == MODE_INT
5040              || (GET_MODE_CLASS (mode) == MODE_CC
5041                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5042             && CONST_INT_P (op1)
5043             && aarch64_uimm12_shift (INTVAL (op1)))
5044           {
5045             *cost += rtx_cost (op0, MINUS, 0, speed);
5046
5047             if (speed)
5048               /* SUB(S) (immediate).  */
5049               *cost += extra_cost->alu.arith;
5050             return true;
5051
5052           }
5053
5054         rtx new_op1 = aarch64_strip_extend (op1);
5055
5056         /* Cost this as an FMA-alike operation.  */
5057         if ((GET_CODE (new_op1) == MULT
5058              || GET_CODE (new_op1) == ASHIFT)
5059             && code != COMPARE)
5060           {
5061             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5062                                             (enum rtx_code) code,
5063                                             speed);
5064             *cost += rtx_cost (op0, MINUS, 0, speed);
5065             return true;
5066           }
5067
5068         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5069
5070         if (speed)
5071           {
5072             if (GET_MODE_CLASS (mode) == MODE_INT)
5073               /* SUB(S).  */
5074               *cost += extra_cost->alu.arith;
5075             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5076               /* FSUB.  */
5077               *cost += extra_cost->fp[mode == DFmode].addsub;
5078           }
5079         return true;
5080       }
5081
5082     case PLUS:
5083       {
5084         rtx new_op0;
5085
5086         op0 = XEXP (x, 0);
5087         op1 = XEXP (x, 1);
5088
5089         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5090             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5091           {
5092             /* CSINC.  */
5093             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5094             *cost += rtx_cost (op1, PLUS, 1, speed);
5095             return true;
5096           }
5097
5098         if (GET_MODE_CLASS (mode) == MODE_INT
5099             && CONST_INT_P (op1)
5100             && aarch64_uimm12_shift (INTVAL (op1)))
5101           {
5102             *cost += rtx_cost (op0, PLUS, 0, speed);
5103
5104             if (speed)
5105               /* ADD (immediate).  */
5106               *cost += extra_cost->alu.arith;
5107             return true;
5108           }
5109
5110         /* Strip any extend, leave shifts behind as we will
5111            cost them through mult_cost.  */
5112         new_op0 = aarch64_strip_extend (op0);
5113
5114         if (GET_CODE (new_op0) == MULT
5115             || GET_CODE (new_op0) == ASHIFT)
5116           {
5117             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5118                                             speed);
5119             *cost += rtx_cost (op1, PLUS, 1, speed);
5120             return true;
5121           }
5122
5123         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5124                   + rtx_cost (op1, PLUS, 1, speed));
5125
5126         if (speed)
5127           {
5128             if (GET_MODE_CLASS (mode) == MODE_INT)
5129               /* ADD.  */
5130               *cost += extra_cost->alu.arith;
5131             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5132               /* FADD.  */
5133               *cost += extra_cost->fp[mode == DFmode].addsub;
5134           }
5135         return true;
5136       }
5137
5138     case IOR:
5139     case XOR:
5140     case AND:
5141     cost_logic:
5142       op0 = XEXP (x, 0);
5143       op1 = XEXP (x, 1);
5144
5145       if (code == AND
5146           && GET_CODE (op0) == MULT
5147           && CONST_INT_P (XEXP (op0, 1))
5148           && CONST_INT_P (op1)
5149           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5150                                INTVAL (op1)) != 0)
5151         {
5152           /* This is a UBFM/SBFM.  */
5153           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5154           if (speed)
5155             *cost += extra_cost->alu.bfx;
5156           return true;
5157         }
5158
5159       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5160         {
5161           /* We possibly get the immediate for free, this is not
5162              modelled.  */
5163           if (CONST_INT_P (op1)
5164               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5165             {
5166               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5167
5168               if (speed)
5169                 *cost += extra_cost->alu.logical;
5170
5171               return true;
5172             }
5173           else
5174             {
5175               rtx new_op0 = op0;
5176
5177               /* Handle ORN, EON, or BIC.  */
5178               if (GET_CODE (op0) == NOT)
5179                 op0 = XEXP (op0, 0);
5180
5181               new_op0 = aarch64_strip_shift (op0);
5182
5183               /* If we had a shift on op0 then this is a logical-shift-
5184                  by-register/immediate operation.  Otherwise, this is just
5185                  a logical operation.  */
5186               if (speed)
5187                 {
5188                   if (new_op0 != op0)
5189                     {
5190                       /* Shift by immediate.  */
5191                       if (CONST_INT_P (XEXP (op0, 1)))
5192                         *cost += extra_cost->alu.log_shift;
5193                       else
5194                         *cost += extra_cost->alu.log_shift_reg;
5195                     }
5196                   else
5197                     *cost += extra_cost->alu.logical;
5198                 }
5199
5200               /* In both cases we want to cost both operands.  */
5201               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5202                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5203
5204               return true;
5205             }
5206         }
5207       return false;
5208
5209     case NOT:
5210       /* MVN.  */
5211       if (speed)
5212         *cost += extra_cost->alu.logical;
5213
5214       /* The logical instruction could have the shifted register form,
5215          but the cost is the same if the shift is processed as a separate
5216          instruction, so we don't bother with it here.  */
5217       return false;
5218
5219     case ZERO_EXTEND:
5220
5221       op0 = XEXP (x, 0);
5222       /* If a value is written in SI mode, then zero extended to DI
5223          mode, the operation will in general be free as a write to
5224          a 'w' register implicitly zeroes the upper bits of an 'x'
5225          register.  However, if this is
5226
5227            (set (reg) (zero_extend (reg)))
5228
5229          we must cost the explicit register move.  */
5230       if (mode == DImode
5231           && GET_MODE (op0) == SImode
5232           && outer == SET)
5233         {
5234           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5235
5236           if (!op_cost && speed)
5237             /* MOV.  */
5238             *cost += extra_cost->alu.extend;
5239           else
5240             /* Free, the cost is that of the SI mode operation.  */
5241             *cost = op_cost;
5242
5243           return true;
5244         }
5245       else if (MEM_P (XEXP (x, 0)))
5246         {
5247           /* All loads can zero extend to any size for free.  */
5248           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5249           return true;
5250         }
5251
5252       /* UXTB/UXTH.  */
5253       if (speed)
5254         *cost += extra_cost->alu.extend;
5255
5256       return false;
5257
5258     case SIGN_EXTEND:
5259       if (MEM_P (XEXP (x, 0)))
5260         {
5261           /* LDRSH.  */
5262           if (speed)
5263             {
5264               rtx address = XEXP (XEXP (x, 0), 0);
5265               *cost += extra_cost->ldst.load_sign_extend;
5266
5267               *cost +=
5268                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5269                                                      0, speed));
5270             }
5271           return true;
5272         }
5273
5274       if (speed)
5275         *cost += extra_cost->alu.extend;
5276       return false;
5277
5278     case ROTATE:
5279       if (!CONST_INT_P (XEXP (x, 1)))
5280         *cost += COSTS_N_INSNS (2);
5281       /* Fall through.  */
5282     case ROTATERT:
5283     case LSHIFTRT:
5284     case ASHIFT:
5285     case ASHIFTRT:
5286
5287       /* Shifting by a register often takes an extra cycle.  */
5288       if (speed && !CONST_INT_P (XEXP (x, 1)))
5289         *cost += extra_cost->alu.arith_shift_reg;
5290
5291       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5292       return true;
5293
5294     case HIGH:
5295       if (!CONSTANT_P (XEXP (x, 0)))
5296         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5297       return true;
5298
5299     case LO_SUM:
5300       if (!CONSTANT_P (XEXP (x, 1)))
5301         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5302       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5303       return true;
5304
5305     case ZERO_EXTRACT:
5306     case SIGN_EXTRACT:
5307       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5308       return true;
5309
5310     case MULT:
5311       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5312       /* aarch64_rtx_mult_cost always handles recursion to its
5313          operands.  */
5314       return true;
5315
5316     case MOD:
5317     case UMOD:
5318       *cost = COSTS_N_INSNS (2);
5319       if (speed)
5320         {
5321           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5322             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5323                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5324           else if (GET_MODE (x) == DFmode)
5325             *cost += (extra_cost->fp[1].mult
5326                       + extra_cost->fp[1].div);
5327           else if (GET_MODE (x) == SFmode)
5328             *cost += (extra_cost->fp[0].mult
5329                       + extra_cost->fp[0].div);
5330         }
5331       return false;  /* All arguments need to be in registers.  */
5332
5333     case DIV:
5334     case UDIV:
5335       *cost = COSTS_N_INSNS (1);
5336       if (speed)
5337         {
5338           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5339             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5340           else if (GET_MODE (x) == DFmode)
5341             *cost += extra_cost->fp[1].div;
5342           else if (GET_MODE (x) == SFmode)
5343             *cost += extra_cost->fp[0].div;
5344         }
5345       return false;  /* All arguments need to be in registers.  */
5346
5347     default:
5348       break;
5349     }
5350   return false;
5351 }
5352
5353 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5354    calculated for X.  This cost is stored in *COST.  Returns true
5355    if the total cost of X was calculated.  */
5356 static bool
5357 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5358                    int param, int *cost, bool speed)
5359 {
5360   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5361
5362   if (dump_file && (dump_flags & TDF_DETAILS))
5363     {
5364       print_rtl_single (dump_file, x);
5365       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5366                speed ? "Hot" : "Cold",
5367                *cost, result ? "final" : "partial");
5368     }
5369
5370   return result;
5371 }
5372
5373 static int
5374 aarch64_register_move_cost (enum machine_mode mode,
5375                             reg_class_t from_i, reg_class_t to_i)
5376 {
5377   enum reg_class from = (enum reg_class) from_i;
5378   enum reg_class to = (enum reg_class) to_i;
5379   const struct cpu_regmove_cost *regmove_cost
5380     = aarch64_tune_params->regmove_cost;
5381
5382   /* Moving between GPR and stack cost is the same as GP2GP.  */
5383   if ((from == GENERAL_REGS && to == STACK_REG)
5384       || (to == GENERAL_REGS && from == STACK_REG))
5385     return regmove_cost->GP2GP;
5386
5387   /* To/From the stack register, we move via the gprs.  */
5388   if (to == STACK_REG || from == STACK_REG)
5389     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5390             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5391
5392   if (from == GENERAL_REGS && to == GENERAL_REGS)
5393     return regmove_cost->GP2GP;
5394   else if (from == GENERAL_REGS)
5395     return regmove_cost->GP2FP;
5396   else if (to == GENERAL_REGS)
5397     return regmove_cost->FP2GP;
5398
5399   /* When AdvSIMD instructions are disabled it is not possible to move
5400      a 128-bit value directly between Q registers.  This is handled in
5401      secondary reload.  A general register is used as a scratch to move
5402      the upper DI value and the lower DI value is moved directly,
5403      hence the cost is the sum of three moves. */
5404   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5405     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5406
5407   return regmove_cost->FP2FP;
5408 }
5409
5410 static int
5411 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5412                           reg_class_t rclass ATTRIBUTE_UNUSED,
5413                           bool in ATTRIBUTE_UNUSED)
5414 {
5415   return aarch64_tune_params->memmov_cost;
5416 }
5417
5418 /* Return the number of instructions that can be issued per cycle.  */
5419 static int
5420 aarch64_sched_issue_rate (void)
5421 {
5422   return aarch64_tune_params->issue_rate;
5423 }
5424
5425 /* Vectorizer cost model target hooks.  */
5426
5427 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5428 static int
5429 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5430                                     tree vectype,
5431                                     int misalign ATTRIBUTE_UNUSED)
5432 {
5433   unsigned elements;
5434
5435   switch (type_of_cost)
5436     {
5437       case scalar_stmt:
5438         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5439
5440       case scalar_load:
5441         return aarch64_tune_params->vec_costs->scalar_load_cost;
5442
5443       case scalar_store:
5444         return aarch64_tune_params->vec_costs->scalar_store_cost;
5445
5446       case vector_stmt:
5447         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5448
5449       case vector_load:
5450         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5451
5452       case vector_store:
5453         return aarch64_tune_params->vec_costs->vec_store_cost;
5454
5455       case vec_to_scalar:
5456         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5457
5458       case scalar_to_vec:
5459         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5460
5461       case unaligned_load:
5462         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5463
5464       case unaligned_store:
5465         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5466
5467       case cond_branch_taken:
5468         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5469
5470       case cond_branch_not_taken:
5471         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5472
5473       case vec_perm:
5474       case vec_promote_demote:
5475         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5476
5477       case vec_construct:
5478         elements = TYPE_VECTOR_SUBPARTS (vectype);
5479         return elements / 2 + 1;
5480
5481       default:
5482         gcc_unreachable ();
5483     }
5484 }
5485
5486 /* Implement targetm.vectorize.add_stmt_cost.  */
5487 static unsigned
5488 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5489                        struct _stmt_vec_info *stmt_info, int misalign,
5490                        enum vect_cost_model_location where)
5491 {
5492   unsigned *cost = (unsigned *) data;
5493   unsigned retval = 0;
5494
5495   if (flag_vect_cost_model)
5496     {
5497       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5498       int stmt_cost =
5499             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5500
5501       /* Statements in an inner loop relative to the loop being
5502          vectorized are weighted more heavily.  The value here is
5503          a function (linear for now) of the loop nest level.  */
5504       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5505         {
5506           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5507           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5508           unsigned nest_level = loop_depth (loop);
5509
5510           count *= nest_level;
5511         }
5512
5513       retval = (unsigned) (count * stmt_cost);
5514       cost[where] += retval;
5515     }
5516
5517   return retval;
5518 }
5519
5520 static void initialize_aarch64_code_model (void);
5521
5522 /* Parse the architecture extension string.  */
5523
5524 static void
5525 aarch64_parse_extension (char *str)
5526 {
5527   /* The extension string is parsed left to right.  */
5528   const struct aarch64_option_extension *opt = NULL;
5529
5530   /* Flag to say whether we are adding or removing an extension.  */
5531   int adding_ext = -1;
5532
5533   while (str != NULL && *str != 0)
5534     {
5535       char *ext;
5536       size_t len;
5537
5538       str++;
5539       ext = strchr (str, '+');
5540
5541       if (ext != NULL)
5542         len = ext - str;
5543       else
5544         len = strlen (str);
5545
5546       if (len >= 2 && strncmp (str, "no", 2) == 0)
5547         {
5548           adding_ext = 0;
5549           len -= 2;
5550           str += 2;
5551         }
5552       else if (len > 0)
5553         adding_ext = 1;
5554
5555       if (len == 0)
5556         {
5557           error ("missing feature modifier after %qs", "+no");
5558           return;
5559         }
5560
5561       /* Scan over the extensions table trying to find an exact match.  */
5562       for (opt = all_extensions; opt->name != NULL; opt++)
5563         {
5564           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5565             {
5566               /* Add or remove the extension.  */
5567               if (adding_ext)
5568                 aarch64_isa_flags |= opt->flags_on;
5569               else
5570                 aarch64_isa_flags &= ~(opt->flags_off);
5571               break;
5572             }
5573         }
5574
5575       if (opt->name == NULL)
5576         {
5577           /* Extension not found in list.  */
5578           error ("unknown feature modifier %qs", str);
5579           return;
5580         }
5581
5582       str = ext;
5583     };
5584
5585   return;
5586 }
5587
5588 /* Parse the ARCH string.  */
5589
5590 static void
5591 aarch64_parse_arch (void)
5592 {
5593   char *ext;
5594   const struct processor *arch;
5595   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5596   size_t len;
5597
5598   strcpy (str, aarch64_arch_string);
5599
5600   ext = strchr (str, '+');
5601
5602   if (ext != NULL)
5603     len = ext - str;
5604   else
5605     len = strlen (str);
5606
5607   if (len == 0)
5608     {
5609       error ("missing arch name in -march=%qs", str);
5610       return;
5611     }
5612
5613   /* Loop through the list of supported ARCHs to find a match.  */
5614   for (arch = all_architectures; arch->name != NULL; arch++)
5615     {
5616       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5617         {
5618           selected_arch = arch;
5619           aarch64_isa_flags = selected_arch->flags;
5620
5621           if (!selected_cpu)
5622             selected_cpu = &all_cores[selected_arch->core];
5623
5624           if (ext != NULL)
5625             {
5626               /* ARCH string contains at least one extension.  */
5627               aarch64_parse_extension (ext);
5628             }
5629
5630           if (strcmp (selected_arch->arch, selected_cpu->arch))
5631             {
5632               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5633                        selected_cpu->name, selected_arch->name);
5634             }
5635
5636           return;
5637         }
5638     }
5639
5640   /* ARCH name not found in list.  */
5641   error ("unknown value %qs for -march", str);
5642   return;
5643 }
5644
5645 /* Parse the CPU string.  */
5646
5647 static void
5648 aarch64_parse_cpu (void)
5649 {
5650   char *ext;
5651   const struct processor *cpu;
5652   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5653   size_t len;
5654
5655   strcpy (str, aarch64_cpu_string);
5656
5657   ext = strchr (str, '+');
5658
5659   if (ext != NULL)
5660     len = ext - str;
5661   else
5662     len = strlen (str);
5663
5664   if (len == 0)
5665     {
5666       error ("missing cpu name in -mcpu=%qs", str);
5667       return;
5668     }
5669
5670   /* Loop through the list of supported CPUs to find a match.  */
5671   for (cpu = all_cores; cpu->name != NULL; cpu++)
5672     {
5673       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5674         {
5675           selected_cpu = cpu;
5676           aarch64_isa_flags = selected_cpu->flags;
5677
5678           if (ext != NULL)
5679             {
5680               /* CPU string contains at least one extension.  */
5681               aarch64_parse_extension (ext);
5682             }
5683
5684           return;
5685         }
5686     }
5687
5688   /* CPU name not found in list.  */
5689   error ("unknown value %qs for -mcpu", str);
5690   return;
5691 }
5692
5693 /* Parse the TUNE string.  */
5694
5695 static void
5696 aarch64_parse_tune (void)
5697 {
5698   const struct processor *cpu;
5699   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5700   strcpy (str, aarch64_tune_string);
5701
5702   /* Loop through the list of supported CPUs to find a match.  */
5703   for (cpu = all_cores; cpu->name != NULL; cpu++)
5704     {
5705       if (strcmp (cpu->name, str) == 0)
5706         {
5707           selected_tune = cpu;
5708           return;
5709         }
5710     }
5711
5712   /* CPU name not found in list.  */
5713   error ("unknown value %qs for -mtune", str);
5714   return;
5715 }
5716
5717
5718 /* Implement TARGET_OPTION_OVERRIDE.  */
5719
5720 static void
5721 aarch64_override_options (void)
5722 {
5723   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5724      If either of -march or -mtune is given, they override their
5725      respective component of -mcpu.
5726
5727      So, first parse AARCH64_CPU_STRING, then the others, be careful
5728      with -march as, if -mcpu is not present on the command line, march
5729      must set a sensible default CPU.  */
5730   if (aarch64_cpu_string)
5731     {
5732       aarch64_parse_cpu ();
5733     }
5734
5735   if (aarch64_arch_string)
5736     {
5737       aarch64_parse_arch ();
5738     }
5739
5740   if (aarch64_tune_string)
5741     {
5742       aarch64_parse_tune ();
5743     }
5744
5745 #ifndef HAVE_AS_MABI_OPTION
5746   /* The compiler may have been configured with 2.23.* binutils, which does
5747      not have support for ILP32.  */
5748   if (TARGET_ILP32)
5749     error ("Assembler does not support -mabi=ilp32");
5750 #endif
5751
5752   initialize_aarch64_code_model ();
5753
5754   aarch64_build_bitmask_table ();
5755
5756   /* This target defaults to strict volatile bitfields.  */
5757   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5758     flag_strict_volatile_bitfields = 1;
5759
5760   /* If the user did not specify a processor, choose the default
5761      one for them.  This will be the CPU set during configuration using
5762      --with-cpu, otherwise it is "generic".  */
5763   if (!selected_cpu)
5764     {
5765       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5766       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5767     }
5768
5769   gcc_assert (selected_cpu);
5770
5771   if (!selected_tune)
5772     selected_tune = selected_cpu;
5773
5774   aarch64_tune_flags = selected_tune->flags;
5775   aarch64_tune = selected_tune->core;
5776   aarch64_tune_params = selected_tune->tune;
5777
5778   if (aarch64_fix_a53_err835769 == 2)
5779     {
5780 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5781       aarch64_fix_a53_err835769 = 1;
5782 #else
5783       aarch64_fix_a53_err835769 = 0;
5784 #endif
5785     }
5786
5787   aarch64_override_options_after_change ();
5788 }
5789
5790 /* Implement targetm.override_options_after_change.  */
5791
5792 static void
5793 aarch64_override_options_after_change (void)
5794 {
5795   if (flag_omit_frame_pointer)
5796     flag_omit_leaf_frame_pointer = false;
5797   else if (flag_omit_leaf_frame_pointer)
5798     flag_omit_frame_pointer = true;
5799 }
5800
5801 static struct machine_function *
5802 aarch64_init_machine_status (void)
5803 {
5804   struct machine_function *machine;
5805   machine = ggc_alloc_cleared_machine_function ();
5806   return machine;
5807 }
5808
5809 void
5810 aarch64_init_expanders (void)
5811 {
5812   init_machine_status = aarch64_init_machine_status;
5813 }
5814
5815 /* A checking mechanism for the implementation of the various code models.  */
5816 static void
5817 initialize_aarch64_code_model (void)
5818 {
5819    if (flag_pic)
5820      {
5821        switch (aarch64_cmodel_var)
5822          {
5823          case AARCH64_CMODEL_TINY:
5824            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5825            break;
5826          case AARCH64_CMODEL_SMALL:
5827            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5828            break;
5829          case AARCH64_CMODEL_LARGE:
5830            sorry ("code model %qs with -f%s", "large",
5831                   flag_pic > 1 ? "PIC" : "pic");
5832          default:
5833            gcc_unreachable ();
5834          }
5835      }
5836    else
5837      aarch64_cmodel = aarch64_cmodel_var;
5838 }
5839
5840 /* Return true if SYMBOL_REF X binds locally.  */
5841
5842 static bool
5843 aarch64_symbol_binds_local_p (const_rtx x)
5844 {
5845   return (SYMBOL_REF_DECL (x)
5846           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5847           : SYMBOL_REF_LOCAL_P (x));
5848 }
5849
5850 /* Return true if SYMBOL_REF X is thread local */
5851 static bool
5852 aarch64_tls_symbol_p (rtx x)
5853 {
5854   if (! TARGET_HAVE_TLS)
5855     return false;
5856
5857   if (GET_CODE (x) != SYMBOL_REF)
5858     return false;
5859
5860   return SYMBOL_REF_TLS_MODEL (x) != 0;
5861 }
5862
5863 /* Classify a TLS symbol into one of the TLS kinds.  */
5864 enum aarch64_symbol_type
5865 aarch64_classify_tls_symbol (rtx x)
5866 {
5867   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5868
5869   switch (tls_kind)
5870     {
5871     case TLS_MODEL_GLOBAL_DYNAMIC:
5872     case TLS_MODEL_LOCAL_DYNAMIC:
5873       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5874
5875     case TLS_MODEL_INITIAL_EXEC:
5876       return SYMBOL_SMALL_GOTTPREL;
5877
5878     case TLS_MODEL_LOCAL_EXEC:
5879       return SYMBOL_SMALL_TPREL;
5880
5881     case TLS_MODEL_EMULATED:
5882     case TLS_MODEL_NONE:
5883       return SYMBOL_FORCE_TO_MEM;
5884
5885     default:
5886       gcc_unreachable ();
5887     }
5888 }
5889
5890 /* Return the method that should be used to access SYMBOL_REF or
5891    LABEL_REF X in context CONTEXT.  */
5892
5893 enum aarch64_symbol_type
5894 aarch64_classify_symbol (rtx x, rtx offset,
5895                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5896 {
5897   if (GET_CODE (x) == LABEL_REF)
5898     {
5899       switch (aarch64_cmodel)
5900         {
5901         case AARCH64_CMODEL_LARGE:
5902           return SYMBOL_FORCE_TO_MEM;
5903
5904         case AARCH64_CMODEL_TINY_PIC:
5905         case AARCH64_CMODEL_TINY:
5906           return SYMBOL_TINY_ABSOLUTE;
5907
5908         case AARCH64_CMODEL_SMALL_PIC:
5909         case AARCH64_CMODEL_SMALL:
5910           return SYMBOL_SMALL_ABSOLUTE;
5911
5912         default:
5913           gcc_unreachable ();
5914         }
5915     }
5916
5917   if (GET_CODE (x) == SYMBOL_REF)
5918     {
5919       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5920           return SYMBOL_FORCE_TO_MEM;
5921
5922       if (aarch64_tls_symbol_p (x))
5923         return aarch64_classify_tls_symbol (x);
5924
5925       switch (aarch64_cmodel)
5926         {
5927         case AARCH64_CMODEL_TINY:
5928           /* When we retreive symbol + offset address, we have to make sure
5929              the offset does not cause overflow of the final address.  But
5930              we have no way of knowing the address of symbol at compile time
5931              so we can't accurately say if the distance between the PC and
5932              symbol + offset is outside the addressible range of +/-1M in the
5933              TINY code model.  So we rely on images not being greater than
5934              1M and cap the offset at 1M and anything beyond 1M will have to
5935              be loaded using an alternative mechanism.  */
5936           if (SYMBOL_REF_WEAK (x)
5937               || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
5938             return SYMBOL_FORCE_TO_MEM;
5939           return SYMBOL_TINY_ABSOLUTE;
5940
5941         case AARCH64_CMODEL_SMALL:
5942           /* Same reasoning as the tiny code model, but the offset cap here is
5943              4G.  */
5944           if (SYMBOL_REF_WEAK (x)
5945               || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
5946               || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
5947             return SYMBOL_FORCE_TO_MEM;
5948           return SYMBOL_SMALL_ABSOLUTE;
5949
5950         case AARCH64_CMODEL_TINY_PIC:
5951           if (!aarch64_symbol_binds_local_p (x))
5952             return SYMBOL_TINY_GOT;
5953           return SYMBOL_TINY_ABSOLUTE;
5954
5955         case AARCH64_CMODEL_SMALL_PIC:
5956           if (!aarch64_symbol_binds_local_p (x))
5957             return SYMBOL_SMALL_GOT;
5958           return SYMBOL_SMALL_ABSOLUTE;
5959
5960         default:
5961           gcc_unreachable ();
5962         }
5963     }
5964
5965   /* By default push everything into the constant pool.  */
5966   return SYMBOL_FORCE_TO_MEM;
5967 }
5968
5969 bool
5970 aarch64_constant_address_p (rtx x)
5971 {
5972   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5973 }
5974
5975 bool
5976 aarch64_legitimate_pic_operand_p (rtx x)
5977 {
5978   if (GET_CODE (x) == SYMBOL_REF
5979       || (GET_CODE (x) == CONST
5980           && GET_CODE (XEXP (x, 0)) == PLUS
5981           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5982      return false;
5983
5984   return true;
5985 }
5986
5987 /* Return true if X holds either a quarter-precision or
5988      floating-point +0.0 constant.  */
5989 static bool
5990 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5991 {
5992   if (!CONST_DOUBLE_P (x))
5993     return false;
5994
5995   /* TODO: We could handle moving 0.0 to a TFmode register,
5996      but first we would like to refactor the movtf_aarch64
5997      to be more amicable to split moves properly and
5998      correctly gate on TARGET_SIMD.  For now - reject all
5999      constants which are not to SFmode or DFmode registers.  */
6000   if (!(mode == SFmode || mode == DFmode))
6001     return false;
6002
6003   if (aarch64_float_const_zero_rtx_p (x))
6004     return true;
6005   return aarch64_float_const_representable_p (x);
6006 }
6007
6008 static bool
6009 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6010 {
6011   /* Do not allow vector struct mode constants.  We could support
6012      0 and -1 easily, but they need support in aarch64-simd.md.  */
6013   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6014     return false;
6015
6016   /* This could probably go away because
6017      we now decompose CONST_INTs according to expand_mov_immediate.  */
6018   if ((GET_CODE (x) == CONST_VECTOR
6019        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6020       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6021         return !targetm.cannot_force_const_mem (mode, x);
6022
6023   if (GET_CODE (x) == HIGH
6024       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6025     return true;
6026
6027   return aarch64_constant_address_p (x);
6028 }
6029
6030 rtx
6031 aarch64_load_tp (rtx target)
6032 {
6033   if (!target
6034       || GET_MODE (target) != Pmode
6035       || !register_operand (target, Pmode))
6036     target = gen_reg_rtx (Pmode);
6037
6038   /* Can return in any reg.  */
6039   emit_insn (gen_aarch64_load_tp_hard (target));
6040   return target;
6041 }
6042
6043 /* On AAPCS systems, this is the "struct __va_list".  */
6044 static GTY(()) tree va_list_type;
6045
6046 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6047    Return the type to use as __builtin_va_list.
6048
6049    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6050
6051    struct __va_list
6052    {
6053      void *__stack;
6054      void *__gr_top;
6055      void *__vr_top;
6056      int   __gr_offs;
6057      int   __vr_offs;
6058    };  */
6059
6060 static tree
6061 aarch64_build_builtin_va_list (void)
6062 {
6063   tree va_list_name;
6064   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6065
6066   /* Create the type.  */
6067   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6068   /* Give it the required name.  */
6069   va_list_name = build_decl (BUILTINS_LOCATION,
6070                              TYPE_DECL,
6071                              get_identifier ("__va_list"),
6072                              va_list_type);
6073   DECL_ARTIFICIAL (va_list_name) = 1;
6074   TYPE_NAME (va_list_type) = va_list_name;
6075   TYPE_STUB_DECL (va_list_type) = va_list_name;
6076
6077   /* Create the fields.  */
6078   f_stack = build_decl (BUILTINS_LOCATION,
6079                         FIELD_DECL, get_identifier ("__stack"),
6080                         ptr_type_node);
6081   f_grtop = build_decl (BUILTINS_LOCATION,
6082                         FIELD_DECL, get_identifier ("__gr_top"),
6083                         ptr_type_node);
6084   f_vrtop = build_decl (BUILTINS_LOCATION,
6085                         FIELD_DECL, get_identifier ("__vr_top"),
6086                         ptr_type_node);
6087   f_groff = build_decl (BUILTINS_LOCATION,
6088                         FIELD_DECL, get_identifier ("__gr_offs"),
6089                         integer_type_node);
6090   f_vroff = build_decl (BUILTINS_LOCATION,
6091                         FIELD_DECL, get_identifier ("__vr_offs"),
6092                         integer_type_node);
6093
6094   DECL_ARTIFICIAL (f_stack) = 1;
6095   DECL_ARTIFICIAL (f_grtop) = 1;
6096   DECL_ARTIFICIAL (f_vrtop) = 1;
6097   DECL_ARTIFICIAL (f_groff) = 1;
6098   DECL_ARTIFICIAL (f_vroff) = 1;
6099
6100   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6101   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6102   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6103   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6104   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6105
6106   TYPE_FIELDS (va_list_type) = f_stack;
6107   DECL_CHAIN (f_stack) = f_grtop;
6108   DECL_CHAIN (f_grtop) = f_vrtop;
6109   DECL_CHAIN (f_vrtop) = f_groff;
6110   DECL_CHAIN (f_groff) = f_vroff;
6111
6112   /* Compute its layout.  */
6113   layout_type (va_list_type);
6114
6115   return va_list_type;
6116 }
6117
6118 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6119 static void
6120 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6121 {
6122   const CUMULATIVE_ARGS *cum;
6123   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6124   tree stack, grtop, vrtop, groff, vroff;
6125   tree t;
6126   int gr_save_area_size;
6127   int vr_save_area_size;
6128   int vr_offset;
6129
6130   cum = &crtl->args.info;
6131   gr_save_area_size
6132     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6133   vr_save_area_size
6134     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6135
6136   if (TARGET_GENERAL_REGS_ONLY)
6137     {
6138       if (cum->aapcs_nvrn > 0)
6139         sorry ("%qs and floating point or vector arguments",
6140                "-mgeneral-regs-only");
6141       vr_save_area_size = 0;
6142     }
6143
6144   f_stack = TYPE_FIELDS (va_list_type_node);
6145   f_grtop = DECL_CHAIN (f_stack);
6146   f_vrtop = DECL_CHAIN (f_grtop);
6147   f_groff = DECL_CHAIN (f_vrtop);
6148   f_vroff = DECL_CHAIN (f_groff);
6149
6150   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6151                   NULL_TREE);
6152   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6153                   NULL_TREE);
6154   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6155                   NULL_TREE);
6156   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6157                   NULL_TREE);
6158   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6159                   NULL_TREE);
6160
6161   /* Emit code to initialize STACK, which points to the next varargs stack
6162      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6163      by named arguments.  STACK is 8-byte aligned.  */
6164   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6165   if (cum->aapcs_stack_size > 0)
6166     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6167   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6168   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6169
6170   /* Emit code to initialize GRTOP, the top of the GR save area.
6171      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6172   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6173   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6174   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6175
6176   /* Emit code to initialize VRTOP, the top of the VR save area.
6177      This address is gr_save_area_bytes below GRTOP, rounded
6178      down to the next 16-byte boundary.  */
6179   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6180   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6181                              STACK_BOUNDARY / BITS_PER_UNIT);
6182
6183   if (vr_offset)
6184     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6185   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6186   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6187
6188   /* Emit code to initialize GROFF, the offset from GRTOP of the
6189      next GPR argument.  */
6190   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6191               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6192   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6193
6194   /* Likewise emit code to initialize VROFF, the offset from FTOP
6195      of the next VR argument.  */
6196   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6197               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6198   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6199 }
6200
6201 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6202
6203 static tree
6204 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6205                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6206 {
6207   tree addr;
6208   bool indirect_p;
6209   bool is_ha;           /* is HFA or HVA.  */
6210   bool dw_align;        /* double-word align.  */
6211   enum machine_mode ag_mode = VOIDmode;
6212   int nregs;
6213   enum machine_mode mode;
6214
6215   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6216   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6217   HOST_WIDE_INT size, rsize, adjust, align;
6218   tree t, u, cond1, cond2;
6219
6220   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6221   if (indirect_p)
6222     type = build_pointer_type (type);
6223
6224   mode = TYPE_MODE (type);
6225
6226   f_stack = TYPE_FIELDS (va_list_type_node);
6227   f_grtop = DECL_CHAIN (f_stack);
6228   f_vrtop = DECL_CHAIN (f_grtop);
6229   f_groff = DECL_CHAIN (f_vrtop);
6230   f_vroff = DECL_CHAIN (f_groff);
6231
6232   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6233                   f_stack, NULL_TREE);
6234   size = int_size_in_bytes (type);
6235   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6236
6237   dw_align = false;
6238   adjust = 0;
6239   if (aarch64_vfp_is_call_or_return_candidate (mode,
6240                                                type,
6241                                                &ag_mode,
6242                                                &nregs,
6243                                                &is_ha))
6244     {
6245       /* TYPE passed in fp/simd registers.  */
6246       if (TARGET_GENERAL_REGS_ONLY)
6247         sorry ("%qs and floating point or vector arguments",
6248                "-mgeneral-regs-only");
6249
6250       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6251                       unshare_expr (valist), f_vrtop, NULL_TREE);
6252       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6253                       unshare_expr (valist), f_vroff, NULL_TREE);
6254
6255       rsize = nregs * UNITS_PER_VREG;
6256
6257       if (is_ha)
6258         {
6259           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6260             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6261         }
6262       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6263                && size < UNITS_PER_VREG)
6264         {
6265           adjust = UNITS_PER_VREG - size;
6266         }
6267     }
6268   else
6269     {
6270       /* TYPE passed in general registers.  */
6271       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6272                       unshare_expr (valist), f_grtop, NULL_TREE);
6273       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6274                       unshare_expr (valist), f_groff, NULL_TREE);
6275       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6276       nregs = rsize / UNITS_PER_WORD;
6277
6278       if (align > 8)
6279         dw_align = true;
6280
6281       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6282           && size < UNITS_PER_WORD)
6283         {
6284           adjust = UNITS_PER_WORD  - size;
6285         }
6286     }
6287
6288   /* Get a local temporary for the field value.  */
6289   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6290
6291   /* Emit code to branch if off >= 0.  */
6292   t = build2 (GE_EXPR, boolean_type_node, off,
6293               build_int_cst (TREE_TYPE (off), 0));
6294   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6295
6296   if (dw_align)
6297     {
6298       /* Emit: offs = (offs + 15) & -16.  */
6299       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6300                   build_int_cst (TREE_TYPE (off), 15));
6301       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6302                   build_int_cst (TREE_TYPE (off), -16));
6303       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6304     }
6305   else
6306     roundup = NULL;
6307
6308   /* Update ap.__[g|v]r_offs  */
6309   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6310               build_int_cst (TREE_TYPE (off), rsize));
6311   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6312
6313   /* String up.  */
6314   if (roundup)
6315     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6316
6317   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6318   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6319               build_int_cst (TREE_TYPE (f_off), 0));
6320   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6321
6322   /* String up: make sure the assignment happens before the use.  */
6323   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6324   COND_EXPR_ELSE (cond1) = t;
6325
6326   /* Prepare the trees handling the argument that is passed on the stack;
6327      the top level node will store in ON_STACK.  */
6328   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6329   if (align > 8)
6330     {
6331       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6332       t = fold_convert (intDI_type_node, arg);
6333       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6334                   build_int_cst (TREE_TYPE (t), 15));
6335       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6336                   build_int_cst (TREE_TYPE (t), -16));
6337       t = fold_convert (TREE_TYPE (arg), t);
6338       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6339     }
6340   else
6341     roundup = NULL;
6342   /* Advance ap.__stack  */
6343   t = fold_convert (intDI_type_node, arg);
6344   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6345               build_int_cst (TREE_TYPE (t), size + 7));
6346   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6347               build_int_cst (TREE_TYPE (t), -8));
6348   t = fold_convert (TREE_TYPE (arg), t);
6349   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6350   /* String up roundup and advance.  */
6351   if (roundup)
6352     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6353   /* String up with arg */
6354   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6355   /* Big-endianness related address adjustment.  */
6356   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6357       && size < UNITS_PER_WORD)
6358   {
6359     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6360                 size_int (UNITS_PER_WORD - size));
6361     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6362   }
6363
6364   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6365   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6366
6367   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6368   t = off;
6369   if (adjust)
6370     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6371                 build_int_cst (TREE_TYPE (off), adjust));
6372
6373   t = fold_convert (sizetype, t);
6374   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6375
6376   if (is_ha)
6377     {
6378       /* type ha; // treat as "struct {ftype field[n];}"
6379          ... [computing offs]
6380          for (i = 0; i <nregs; ++i, offs += 16)
6381            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6382          return ha;  */
6383       int i;
6384       tree tmp_ha, field_t, field_ptr_t;
6385
6386       /* Declare a local variable.  */
6387       tmp_ha = create_tmp_var_raw (type, "ha");
6388       gimple_add_tmp_var (tmp_ha);
6389
6390       /* Establish the base type.  */
6391       switch (ag_mode)
6392         {
6393         case SFmode:
6394           field_t = float_type_node;
6395           field_ptr_t = float_ptr_type_node;
6396           break;
6397         case DFmode:
6398           field_t = double_type_node;
6399           field_ptr_t = double_ptr_type_node;
6400           break;
6401         case TFmode:
6402           field_t = long_double_type_node;
6403           field_ptr_t = long_double_ptr_type_node;
6404           break;
6405 /* The half precision and quad precision are not fully supported yet.  Enable
6406    the following code after the support is complete.  Need to find the correct
6407    type node for __fp16 *.  */
6408 #if 0
6409         case HFmode:
6410           field_t = float_type_node;
6411           field_ptr_t = float_ptr_type_node;
6412           break;
6413 #endif
6414         case V2SImode:
6415         case V4SImode:
6416             {
6417               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6418               field_t = build_vector_type_for_mode (innertype, ag_mode);
6419               field_ptr_t = build_pointer_type (field_t);
6420             }
6421           break;
6422         default:
6423           gcc_assert (0);
6424         }
6425
6426       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6427       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6428       addr = t;
6429       t = fold_convert (field_ptr_t, addr);
6430       t = build2 (MODIFY_EXPR, field_t,
6431                   build1 (INDIRECT_REF, field_t, tmp_ha),
6432                   build1 (INDIRECT_REF, field_t, t));
6433
6434       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6435       for (i = 1; i < nregs; ++i)
6436         {
6437           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6438           u = fold_convert (field_ptr_t, addr);
6439           u = build2 (MODIFY_EXPR, field_t,
6440                       build2 (MEM_REF, field_t, tmp_ha,
6441                               build_int_cst (field_ptr_t,
6442                                              (i *
6443                                               int_size_in_bytes (field_t)))),
6444                       build1 (INDIRECT_REF, field_t, u));
6445           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6446         }
6447
6448       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6449       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6450     }
6451
6452   COND_EXPR_ELSE (cond2) = t;
6453   addr = fold_convert (build_pointer_type (type), cond1);
6454   addr = build_va_arg_indirect_ref (addr);
6455
6456   if (indirect_p)
6457     addr = build_va_arg_indirect_ref (addr);
6458
6459   return addr;
6460 }
6461
6462 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6463
6464 static void
6465 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6466                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6467                                 int no_rtl)
6468 {
6469   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6470   CUMULATIVE_ARGS local_cum;
6471   int gr_saved, vr_saved;
6472
6473   /* The caller has advanced CUM up to, but not beyond, the last named
6474      argument.  Advance a local copy of CUM past the last "real" named
6475      argument, to find out how many registers are left over.  */
6476   local_cum = *cum;
6477   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6478
6479   /* Found out how many registers we need to save.  */
6480   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6481   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6482
6483   if (TARGET_GENERAL_REGS_ONLY)
6484     {
6485       if (local_cum.aapcs_nvrn > 0)
6486         sorry ("%qs and floating point or vector arguments",
6487                "-mgeneral-regs-only");
6488       vr_saved = 0;
6489     }
6490
6491   if (!no_rtl)
6492     {
6493       if (gr_saved > 0)
6494         {
6495           rtx ptr, mem;
6496
6497           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6498           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6499                                - gr_saved * UNITS_PER_WORD);
6500           mem = gen_frame_mem (BLKmode, ptr);
6501           set_mem_alias_set (mem, get_varargs_alias_set ());
6502
6503           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6504                                mem, gr_saved);
6505         }
6506       if (vr_saved > 0)
6507         {
6508           /* We can't use move_block_from_reg, because it will use
6509              the wrong mode, storing D regs only.  */
6510           enum machine_mode mode = TImode;
6511           int off, i;
6512
6513           /* Set OFF to the offset from virtual_incoming_args_rtx of
6514              the first vector register.  The VR save area lies below
6515              the GR one, and is aligned to 16 bytes.  */
6516           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6517                                    STACK_BOUNDARY / BITS_PER_UNIT);
6518           off -= vr_saved * UNITS_PER_VREG;
6519
6520           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6521             {
6522               rtx ptr, mem;
6523
6524               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6525               mem = gen_frame_mem (mode, ptr);
6526               set_mem_alias_set (mem, get_varargs_alias_set ());
6527               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6528               off += UNITS_PER_VREG;
6529             }
6530         }
6531     }
6532
6533   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6534      any complication of having crtl->args.pretend_args_size changed.  */
6535   cfun->machine->saved_varargs_size
6536     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6537                       STACK_BOUNDARY / BITS_PER_UNIT)
6538        + vr_saved * UNITS_PER_VREG);
6539 }
6540
6541 static void
6542 aarch64_conditional_register_usage (void)
6543 {
6544   int i;
6545   if (!TARGET_FLOAT)
6546     {
6547       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6548         {
6549           fixed_regs[i] = 1;
6550           call_used_regs[i] = 1;
6551         }
6552     }
6553 }
6554
6555 /* Walk down the type tree of TYPE counting consecutive base elements.
6556    If *MODEP is VOIDmode, then set it to the first valid floating point
6557    type.  If a non-floating point type is found, or if a floating point
6558    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6559    otherwise return the count in the sub-tree.  */
6560 static int
6561 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6562 {
6563   enum machine_mode mode;
6564   HOST_WIDE_INT size;
6565
6566   switch (TREE_CODE (type))
6567     {
6568     case REAL_TYPE:
6569       mode = TYPE_MODE (type);
6570       if (mode != DFmode && mode != SFmode && mode != TFmode)
6571         return -1;
6572
6573       if (*modep == VOIDmode)
6574         *modep = mode;
6575
6576       if (*modep == mode)
6577         return 1;
6578
6579       break;
6580
6581     case COMPLEX_TYPE:
6582       mode = TYPE_MODE (TREE_TYPE (type));
6583       if (mode != DFmode && mode != SFmode && mode != TFmode)
6584         return -1;
6585
6586       if (*modep == VOIDmode)
6587         *modep = mode;
6588
6589       if (*modep == mode)
6590         return 2;
6591
6592       break;
6593
6594     case VECTOR_TYPE:
6595       /* Use V2SImode and V4SImode as representatives of all 64-bit
6596          and 128-bit vector types.  */
6597       size = int_size_in_bytes (type);
6598       switch (size)
6599         {
6600         case 8:
6601           mode = V2SImode;
6602           break;
6603         case 16:
6604           mode = V4SImode;
6605           break;
6606         default:
6607           return -1;
6608         }
6609
6610       if (*modep == VOIDmode)
6611         *modep = mode;
6612
6613       /* Vector modes are considered to be opaque: two vectors are
6614          equivalent for the purposes of being homogeneous aggregates
6615          if they are the same size.  */
6616       if (*modep == mode)
6617         return 1;
6618
6619       break;
6620
6621     case ARRAY_TYPE:
6622       {
6623         int count;
6624         tree index = TYPE_DOMAIN (type);
6625
6626         /* Can't handle incomplete types.  */
6627         if (!COMPLETE_TYPE_P (type))
6628           return -1;
6629
6630         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6631         if (count == -1
6632             || !index
6633             || !TYPE_MAX_VALUE (index)
6634             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6635             || !TYPE_MIN_VALUE (index)
6636             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6637             || count < 0)
6638           return -1;
6639
6640         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6641                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6642
6643         /* There must be no padding.  */
6644         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6645             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6646                 != count * GET_MODE_BITSIZE (*modep)))
6647           return -1;
6648
6649         return count;
6650       }
6651
6652     case RECORD_TYPE:
6653       {
6654         int count = 0;
6655         int sub_count;
6656         tree field;
6657
6658         /* Can't handle incomplete types.  */
6659         if (!COMPLETE_TYPE_P (type))
6660           return -1;
6661
6662         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6663           {
6664             if (TREE_CODE (field) != FIELD_DECL)
6665               continue;
6666
6667             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6668             if (sub_count < 0)
6669               return -1;
6670             count += sub_count;
6671           }
6672
6673         /* There must be no padding.  */
6674         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6675             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6676                 != count * GET_MODE_BITSIZE (*modep)))
6677           return -1;
6678
6679         return count;
6680       }
6681
6682     case UNION_TYPE:
6683     case QUAL_UNION_TYPE:
6684       {
6685         /* These aren't very interesting except in a degenerate case.  */
6686         int count = 0;
6687         int sub_count;
6688         tree field;
6689
6690         /* Can't handle incomplete types.  */
6691         if (!COMPLETE_TYPE_P (type))
6692           return -1;
6693
6694         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6695           {
6696             if (TREE_CODE (field) != FIELD_DECL)
6697               continue;
6698
6699             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6700             if (sub_count < 0)
6701               return -1;
6702             count = count > sub_count ? count : sub_count;
6703           }
6704
6705         /* There must be no padding.  */
6706         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6707             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6708                 != count * GET_MODE_BITSIZE (*modep)))
6709           return -1;
6710
6711         return count;
6712       }
6713
6714     default:
6715       break;
6716     }
6717
6718   return -1;
6719 }
6720
6721 /* Return true if we use LRA instead of reload pass.  */
6722 static bool
6723 aarch64_lra_p (void)
6724 {
6725   return aarch64_lra_flag;
6726 }
6727
6728 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6729    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6730    array types.  The C99 floating-point complex types are also considered
6731    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6732    types, which are GCC extensions and out of the scope of AAPCS64, are
6733    treated as composite types here as well.
6734
6735    Note that MODE itself is not sufficient in determining whether a type
6736    is such a composite type or not.  This is because
6737    stor-layout.c:compute_record_mode may have already changed the MODE
6738    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6739    structure with only one field may have its MODE set to the mode of the
6740    field.  Also an integer mode whose size matches the size of the
6741    RECORD_TYPE type may be used to substitute the original mode
6742    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6743    solely relied on.  */
6744
6745 static bool
6746 aarch64_composite_type_p (const_tree type,
6747                           enum machine_mode mode)
6748 {
6749   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6750     return true;
6751
6752   if (mode == BLKmode
6753       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6754       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6755     return true;
6756
6757   return false;
6758 }
6759
6760 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6761    type as described in AAPCS64 \S 4.1.2.
6762
6763    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6764
6765 static bool
6766 aarch64_short_vector_p (const_tree type,
6767                         enum machine_mode mode)
6768 {
6769   HOST_WIDE_INT size = -1;
6770
6771   if (type && TREE_CODE (type) == VECTOR_TYPE)
6772     size = int_size_in_bytes (type);
6773   else if (!aarch64_composite_type_p (type, mode)
6774            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6775                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6776     size = GET_MODE_SIZE (mode);
6777
6778   return (size == 8 || size == 16) ? true : false;
6779 }
6780
6781 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6782    shall be passed or returned in simd/fp register(s) (providing these
6783    parameter passing registers are available).
6784
6785    Upon successful return, *COUNT returns the number of needed registers,
6786    *BASE_MODE returns the mode of the individual register and when IS_HAF
6787    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6788    floating-point aggregate or a homogeneous short-vector aggregate.  */
6789
6790 static bool
6791 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6792                                          const_tree type,
6793                                          enum machine_mode *base_mode,
6794                                          int *count,
6795                                          bool *is_ha)
6796 {
6797   enum machine_mode new_mode = VOIDmode;
6798   bool composite_p = aarch64_composite_type_p (type, mode);
6799
6800   if (is_ha != NULL) *is_ha = false;
6801
6802   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6803       || aarch64_short_vector_p (type, mode))
6804     {
6805       *count = 1;
6806       new_mode = mode;
6807     }
6808   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6809     {
6810       if (is_ha != NULL) *is_ha = true;
6811       *count = 2;
6812       new_mode = GET_MODE_INNER (mode);
6813     }
6814   else if (type && composite_p)
6815     {
6816       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6817
6818       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6819         {
6820           if (is_ha != NULL) *is_ha = true;
6821           *count = ag_count;
6822         }
6823       else
6824         return false;
6825     }
6826   else
6827     return false;
6828
6829   *base_mode = new_mode;
6830   return true;
6831 }
6832
6833 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6834
6835 static rtx
6836 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6837                           int incoming ATTRIBUTE_UNUSED)
6838 {
6839   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6840 }
6841
6842 /* Implements target hook vector_mode_supported_p.  */
6843 static bool
6844 aarch64_vector_mode_supported_p (enum machine_mode mode)
6845 {
6846   if (TARGET_SIMD
6847       && (mode == V4SImode  || mode == V8HImode
6848           || mode == V16QImode || mode == V2DImode
6849           || mode == V2SImode  || mode == V4HImode
6850           || mode == V8QImode || mode == V2SFmode
6851           || mode == V4SFmode || mode == V2DFmode
6852           || mode == V1DFmode))
6853     return true;
6854
6855   return false;
6856 }
6857
6858 /* Return appropriate SIMD container
6859    for MODE within a vector of WIDTH bits.  */
6860 static enum machine_mode
6861 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6862 {
6863   gcc_assert (width == 64 || width == 128);
6864   if (TARGET_SIMD)
6865     {
6866       if (width == 128)
6867         switch (mode)
6868           {
6869           case DFmode:
6870             return V2DFmode;
6871           case SFmode:
6872             return V4SFmode;
6873           case SImode:
6874             return V4SImode;
6875           case HImode:
6876             return V8HImode;
6877           case QImode:
6878             return V16QImode;
6879           case DImode:
6880             return V2DImode;
6881           default:
6882             break;
6883           }
6884       else
6885         switch (mode)
6886           {
6887           case SFmode:
6888             return V2SFmode;
6889           case SImode:
6890             return V2SImode;
6891           case HImode:
6892             return V4HImode;
6893           case QImode:
6894             return V8QImode;
6895           default:
6896             break;
6897           }
6898     }
6899   return word_mode;
6900 }
6901
6902 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6903 static enum machine_mode
6904 aarch64_preferred_simd_mode (enum machine_mode mode)
6905 {
6906   return aarch64_simd_container_mode (mode, 128);
6907 }
6908
6909 /* Return the bitmask of possible vector sizes for the vectorizer
6910    to iterate over.  */
6911 static unsigned int
6912 aarch64_autovectorize_vector_sizes (void)
6913 {
6914   return (16 | 8);
6915 }
6916
6917 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6918    vector types in order to conform to the AAPCS64 (see "Procedure
6919    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6920    qualify for emission with the mangled names defined in that document,
6921    a vector type must not only be of the correct mode but also be
6922    composed of AdvSIMD vector element types (e.g.
6923    _builtin_aarch64_simd_qi); these types are registered by
6924    aarch64_init_simd_builtins ().  In other words, vector types defined
6925    in other ways e.g. via vector_size attribute will get default
6926    mangled names.  */
6927 typedef struct
6928 {
6929   enum machine_mode mode;
6930   const char *element_type_name;
6931   const char *mangled_name;
6932 } aarch64_simd_mangle_map_entry;
6933
6934 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6935   /* 64-bit containerized types.  */
6936   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6937   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6938   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6939   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6940   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6941   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6942   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6943   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6944   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6945   /* 128-bit containerized types.  */
6946   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6947   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6948   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6949   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6950   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6951   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6952   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6953   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6954   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6955   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6956   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6957   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6958   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6959   { VOIDmode, NULL, NULL }
6960 };
6961
6962 /* Implement TARGET_MANGLE_TYPE.  */
6963
6964 static const char *
6965 aarch64_mangle_type (const_tree type)
6966 {
6967   /* The AArch64 ABI documents say that "__va_list" has to be
6968      managled as if it is in the "std" namespace.  */
6969   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6970     return "St9__va_list";
6971
6972   /* Check the mode of the vector type, and the name of the vector
6973      element type, against the table.  */
6974   if (TREE_CODE (type) == VECTOR_TYPE)
6975     {
6976       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6977
6978       while (pos->mode != VOIDmode)
6979         {
6980           tree elt_type = TREE_TYPE (type);
6981
6982           if (pos->mode == TYPE_MODE (type)
6983               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6984               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6985                           pos->element_type_name))
6986             return pos->mangled_name;
6987
6988           pos++;
6989         }
6990     }
6991
6992   /* Use the default mangling.  */
6993   return NULL;
6994 }
6995
6996 static int
6997 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6998 {
6999   return MEM_P (*x);
7000 }
7001
7002 static bool
7003 is_memory_op (rtx mem_insn)
7004 {
7005    rtx pattern = PATTERN (mem_insn);
7006    return for_each_rtx (&pattern, is_mem_p, NULL);
7007 }
7008
7009 /* Find the first rtx before insn that will generate an assembly
7010    instruction.  */
7011
7012 static rtx
7013 aarch64_prev_real_insn (rtx insn)
7014 {
7015   if (!insn)
7016     return NULL;
7017
7018   do
7019     {
7020       insn = prev_real_insn (insn);
7021     }
7022   while (insn && recog_memoized (insn) < 0);
7023
7024   return insn;
7025 }
7026
7027 static bool
7028 is_madd_op (enum attr_type t1)
7029 {
7030   unsigned int i;
7031   /* A number of these may be AArch32 only.  */
7032   enum attr_type mlatypes[] = {
7033     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7034     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7035     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7036   };
7037
7038   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7039     {
7040       if (t1 == mlatypes[i])
7041         return true;
7042     }
7043
7044   return false;
7045 }
7046
7047 /* Check if there is a register dependency between a load and the insn
7048    for which we hold recog_data.  */
7049
7050 static bool
7051 dep_between_memop_and_curr (rtx memop)
7052 {
7053   rtx load_reg;
7054   int opno;
7055
7056   if (!memop)
7057     return false;
7058
7059   if (!REG_P (SET_DEST (memop)))
7060     return false;
7061
7062   load_reg = SET_DEST (memop);
7063   for (opno = 0; opno < recog_data.n_operands; opno++)
7064     {
7065       rtx operand = recog_data.operand[opno];
7066       if (REG_P (operand)
7067           && reg_overlap_mentioned_p (load_reg, operand))
7068         return true;
7069
7070     }
7071   return false;
7072 }
7073
7074 bool
7075 aarch64_madd_needs_nop (rtx insn)
7076 {
7077   enum attr_type attr_type;
7078   rtx prev;
7079   rtx body;
7080
7081   if (!aarch64_fix_a53_err835769)
7082     return false;
7083
7084   if (recog_memoized (insn) < 0)
7085     return false;
7086
7087   attr_type = get_attr_type (insn);
7088   if (!is_madd_op (attr_type))
7089     return false;
7090
7091   prev = aarch64_prev_real_insn (insn);
7092   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7093      Restore recog state to INSN to avoid state corruption.  */
7094   extract_constrain_insn_cached (insn);
7095
7096   if (!prev)
7097     return false;
7098
7099   body = single_set (prev);
7100
7101   /* If the previous insn is a memory op and there is no dependency between
7102      it and the madd, emit a nop between them.  If we know the previous insn is
7103      a memory op but body is NULL, emit the nop to be safe, it's probably a
7104      load/store pair insn.  */
7105   if (is_memory_op (prev)
7106       && GET_MODE (recog_data.operand[0]) == DImode
7107       && (!dep_between_memop_and_curr (body)))
7108     return true;
7109
7110   return false;
7111
7112 }
7113
7114 void
7115 aarch64_final_prescan_insn (rtx insn)
7116 {
7117   if (aarch64_madd_needs_nop (insn))
7118     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7119 }
7120
7121
7122 /* Return the equivalent letter for size.  */
7123 static char
7124 sizetochar (int size)
7125 {
7126   switch (size)
7127     {
7128     case 64: return 'd';
7129     case 32: return 's';
7130     case 16: return 'h';
7131     case 8 : return 'b';
7132     default: gcc_unreachable ();
7133     }
7134 }
7135
7136 /* Return true iff x is a uniform vector of floating-point
7137    constants, and the constant can be represented in
7138    quarter-precision form.  Note, as aarch64_float_const_representable
7139    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7140 static bool
7141 aarch64_vect_float_const_representable_p (rtx x)
7142 {
7143   int i = 0;
7144   REAL_VALUE_TYPE r0, ri;
7145   rtx x0, xi;
7146
7147   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7148     return false;
7149
7150   x0 = CONST_VECTOR_ELT (x, 0);
7151   if (!CONST_DOUBLE_P (x0))
7152     return false;
7153
7154   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7155
7156   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7157     {
7158       xi = CONST_VECTOR_ELT (x, i);
7159       if (!CONST_DOUBLE_P (xi))
7160         return false;
7161
7162       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7163       if (!REAL_VALUES_EQUAL (r0, ri))
7164         return false;
7165     }
7166
7167   return aarch64_float_const_representable_p (x0);
7168 }
7169
7170 /* Return true for valid and false for invalid.  */
7171 bool
7172 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7173                               struct simd_immediate_info *info)
7174 {
7175 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7176   matches = 1;                                          \
7177   for (i = 0; i < idx; i += (STRIDE))                   \
7178     if (!(TEST))                                        \
7179       matches = 0;                                      \
7180   if (matches)                                          \
7181     {                                                   \
7182       immtype = (CLASS);                                \
7183       elsize = (ELSIZE);                                \
7184       eshift = (SHIFT);                                 \
7185       emvn = (NEG);                                     \
7186       break;                                            \
7187     }
7188
7189   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7190   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7191   unsigned char bytes[16];
7192   int immtype = -1, matches;
7193   unsigned int invmask = inverse ? 0xff : 0;
7194   int eshift, emvn;
7195
7196   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7197     {
7198       if (! (aarch64_simd_imm_zero_p (op, mode)
7199              || aarch64_vect_float_const_representable_p (op)))
7200         return false;
7201
7202       if (info)
7203         {
7204           info->value = CONST_VECTOR_ELT (op, 0);
7205           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7206           info->mvn = false;
7207           info->shift = 0;
7208         }
7209
7210       return true;
7211     }
7212
7213   /* Splat vector constant out into a byte vector.  */
7214   for (i = 0; i < n_elts; i++)
7215     {
7216       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7217          it must be laid out in the vector register in reverse order.  */
7218       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7219       unsigned HOST_WIDE_INT elpart;
7220       unsigned int part, parts;
7221
7222       if (GET_CODE (el) == CONST_INT)
7223         {
7224           elpart = INTVAL (el);
7225           parts = 1;
7226         }
7227       else if (GET_CODE (el) == CONST_DOUBLE)
7228         {
7229           elpart = CONST_DOUBLE_LOW (el);
7230           parts = 2;
7231         }
7232       else
7233         gcc_unreachable ();
7234
7235       for (part = 0; part < parts; part++)
7236         {
7237           unsigned int byte;
7238           for (byte = 0; byte < innersize; byte++)
7239             {
7240               bytes[idx++] = (elpart & 0xff) ^ invmask;
7241               elpart >>= BITS_PER_UNIT;
7242             }
7243           if (GET_CODE (el) == CONST_DOUBLE)
7244             elpart = CONST_DOUBLE_HIGH (el);
7245         }
7246     }
7247
7248   /* Sanity check.  */
7249   gcc_assert (idx == GET_MODE_SIZE (mode));
7250
7251   do
7252     {
7253       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7254              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7255
7256       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7257              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7258
7259       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7260              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7261
7262       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7263              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7264
7265       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7266
7267       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7268
7269       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7270              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7271
7272       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7273              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7274
7275       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7276              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7277
7278       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7279              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7280
7281       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7282
7283       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7284
7285       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7286              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7287
7288       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7289              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7290
7291       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7292              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7293
7294       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7295              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7296
7297       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7298
7299       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7300              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7301     }
7302   while (0);
7303
7304   if (immtype == -1)
7305     return false;
7306
7307   if (info)
7308     {
7309       info->element_width = elsize;
7310       info->mvn = emvn != 0;
7311       info->shift = eshift;
7312
7313       unsigned HOST_WIDE_INT imm = 0;
7314
7315       if (immtype >= 12 && immtype <= 15)
7316         info->msl = true;
7317
7318       /* Un-invert bytes of recognized vector, if necessary.  */
7319       if (invmask != 0)
7320         for (i = 0; i < idx; i++)
7321           bytes[i] ^= invmask;
7322
7323       if (immtype == 17)
7324         {
7325           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7326           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7327
7328           for (i = 0; i < 8; i++)
7329             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7330               << (i * BITS_PER_UNIT);
7331
7332
7333           info->value = GEN_INT (imm);
7334         }
7335       else
7336         {
7337           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7338             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7339
7340           /* Construct 'abcdefgh' because the assembler cannot handle
7341              generic constants.  */
7342           if (info->mvn)
7343             imm = ~imm;
7344           imm = (imm >> info->shift) & 0xff;
7345           info->value = GEN_INT (imm);
7346         }
7347     }
7348
7349   return true;
7350 #undef CHECK
7351 }
7352
7353 static bool
7354 aarch64_const_vec_all_same_int_p (rtx x,
7355                                   HOST_WIDE_INT minval,
7356                                   HOST_WIDE_INT maxval)
7357 {
7358   HOST_WIDE_INT firstval;
7359   int count, i;
7360
7361   if (GET_CODE (x) != CONST_VECTOR
7362       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7363     return false;
7364
7365   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7366   if (firstval < minval || firstval > maxval)
7367     return false;
7368
7369   count = CONST_VECTOR_NUNITS (x);
7370   for (i = 1; i < count; i++)
7371     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7372       return false;
7373
7374   return true;
7375 }
7376
7377 /* Check of immediate shift constants are within range.  */
7378 bool
7379 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7380 {
7381   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7382   if (left)
7383     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7384   else
7385     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7386 }
7387
7388 /* Return true if X is a uniform vector where all elements
7389    are either the floating-point constant 0.0 or the
7390    integer constant 0.  */
7391 bool
7392 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7393 {
7394   return x == CONST0_RTX (mode);
7395 }
7396
7397 bool
7398 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7399 {
7400   HOST_WIDE_INT imm = INTVAL (x);
7401   int i;
7402
7403   for (i = 0; i < 8; i++)
7404     {
7405       unsigned int byte = imm & 0xff;
7406       if (byte != 0xff && byte != 0)
7407        return false;
7408       imm >>= 8;
7409     }
7410
7411   return true;
7412 }
7413
7414 bool
7415 aarch64_mov_operand_p (rtx x,
7416                        enum aarch64_symbol_context context,
7417                        enum machine_mode mode)
7418 {
7419   if (GET_CODE (x) == HIGH
7420       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7421     return true;
7422
7423   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7424     return true;
7425
7426   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7427     return true;
7428
7429   return aarch64_classify_symbolic_expression (x, context)
7430     == SYMBOL_TINY_ABSOLUTE;
7431 }
7432
7433 /* Return a const_int vector of VAL.  */
7434 rtx
7435 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7436 {
7437   int nunits = GET_MODE_NUNITS (mode);
7438   rtvec v = rtvec_alloc (nunits);
7439   int i;
7440
7441   for (i=0; i < nunits; i++)
7442     RTVEC_ELT (v, i) = GEN_INT (val);
7443
7444   return gen_rtx_CONST_VECTOR (mode, v);
7445 }
7446
7447 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7448
7449 bool
7450 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7451 {
7452   enum machine_mode vmode;
7453
7454   gcc_assert (!VECTOR_MODE_P (mode));
7455   vmode = aarch64_preferred_simd_mode (mode);
7456   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7457   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7458 }
7459
7460 /* Construct and return a PARALLEL RTX vector.  */
7461 rtx
7462 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7463 {
7464   int nunits = GET_MODE_NUNITS (mode);
7465   rtvec v = rtvec_alloc (nunits / 2);
7466   int base = high ? nunits / 2 : 0;
7467   rtx t1;
7468   int i;
7469
7470   for (i=0; i < nunits / 2; i++)
7471     RTVEC_ELT (v, i) = GEN_INT (base + i);
7472
7473   t1 = gen_rtx_PARALLEL (mode, v);
7474   return t1;
7475 }
7476
7477 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7478    HIGH (exclusive).  */
7479 void
7480 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7481 {
7482   HOST_WIDE_INT lane;
7483   gcc_assert (GET_CODE (operand) == CONST_INT);
7484   lane = INTVAL (operand);
7485
7486   if (lane < low || lane >= high)
7487     error ("lane out of range");
7488 }
7489
7490 void
7491 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7492 {
7493   gcc_assert (GET_CODE (operand) == CONST_INT);
7494   HOST_WIDE_INT lane = INTVAL (operand);
7495
7496   if (lane < low || lane >= high)
7497     error ("constant out of range");
7498 }
7499
7500 /* Emit code to reinterpret one AdvSIMD type as another,
7501    without altering bits.  */
7502 void
7503 aarch64_simd_reinterpret (rtx dest, rtx src)
7504 {
7505   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7506 }
7507
7508 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7509    registers).  */
7510 void
7511 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7512                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7513                             rtx op1)
7514 {
7515   rtx mem = gen_rtx_MEM (mode, destaddr);
7516   rtx tmp1 = gen_reg_rtx (mode);
7517   rtx tmp2 = gen_reg_rtx (mode);
7518
7519   emit_insn (intfn (tmp1, op1, tmp2));
7520
7521   emit_move_insn (mem, tmp1);
7522   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7523   emit_move_insn (mem, tmp2);
7524 }
7525
7526 /* Return TRUE if OP is a valid vector addressing mode.  */
7527 bool
7528 aarch64_simd_mem_operand_p (rtx op)
7529 {
7530   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7531                         || GET_CODE (XEXP (op, 0)) == REG);
7532 }
7533
7534 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7535    not to early-clobber SRC registers in the process.
7536
7537    We assume that the operands described by SRC and DEST represent a
7538    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7539    number of components into which the copy has been decomposed.  */
7540 void
7541 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7542                                 rtx *src, unsigned int count)
7543 {
7544   unsigned int i;
7545
7546   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7547       || REGNO (operands[0]) < REGNO (operands[1]))
7548     {
7549       for (i = 0; i < count; i++)
7550         {
7551           operands[2 * i] = dest[i];
7552           operands[2 * i + 1] = src[i];
7553         }
7554     }
7555   else
7556     {
7557       for (i = 0; i < count; i++)
7558         {
7559           operands[2 * i] = dest[count - i - 1];
7560           operands[2 * i + 1] = src[count - i - 1];
7561         }
7562     }
7563 }
7564
7565 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7566    one of VSTRUCT modes: OI, CI or XI.  */
7567 int
7568 aarch64_simd_attr_length_move (rtx insn)
7569 {
7570   enum machine_mode mode;
7571
7572   extract_insn_cached (insn);
7573
7574   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7575     {
7576       mode = GET_MODE (recog_data.operand[0]);
7577       switch (mode)
7578         {
7579         case OImode:
7580           return 8;
7581         case CImode:
7582           return 12;
7583         case XImode:
7584           return 16;
7585         default:
7586           gcc_unreachable ();
7587         }
7588     }
7589   return 4;
7590 }
7591
7592 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7593    alignment of a vector to 128 bits.  */
7594 static HOST_WIDE_INT
7595 aarch64_simd_vector_alignment (const_tree type)
7596 {
7597   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7598   return MIN (align, 128);
7599 }
7600
7601 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7602 static bool
7603 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7604 {
7605   if (is_packed)
7606     return false;
7607
7608   /* We guarantee alignment for vectors up to 128-bits.  */
7609   if (tree_int_cst_compare (TYPE_SIZE (type),
7610                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7611     return false;
7612
7613   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7614   return true;
7615 }
7616
7617 /* If VALS is a vector constant that can be loaded into a register
7618    using DUP, generate instructions to do so and return an RTX to
7619    assign to the register.  Otherwise return NULL_RTX.  */
7620 static rtx
7621 aarch64_simd_dup_constant (rtx vals)
7622 {
7623   enum machine_mode mode = GET_MODE (vals);
7624   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7625   int n_elts = GET_MODE_NUNITS (mode);
7626   bool all_same = true;
7627   rtx x;
7628   int i;
7629
7630   if (GET_CODE (vals) != CONST_VECTOR)
7631     return NULL_RTX;
7632
7633   for (i = 1; i < n_elts; ++i)
7634     {
7635       x = CONST_VECTOR_ELT (vals, i);
7636       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7637         all_same = false;
7638     }
7639
7640   if (!all_same)
7641     return NULL_RTX;
7642
7643   /* We can load this constant by using DUP and a constant in a
7644      single ARM register.  This will be cheaper than a vector
7645      load.  */
7646   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7647   return gen_rtx_VEC_DUPLICATE (mode, x);
7648 }
7649
7650
7651 /* Generate code to load VALS, which is a PARALLEL containing only
7652    constants (for vec_init) or CONST_VECTOR, efficiently into a
7653    register.  Returns an RTX to copy into the register, or NULL_RTX
7654    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7655 static rtx
7656 aarch64_simd_make_constant (rtx vals)
7657 {
7658   enum machine_mode mode = GET_MODE (vals);
7659   rtx const_dup;
7660   rtx const_vec = NULL_RTX;
7661   int n_elts = GET_MODE_NUNITS (mode);
7662   int n_const = 0;
7663   int i;
7664
7665   if (GET_CODE (vals) == CONST_VECTOR)
7666     const_vec = vals;
7667   else if (GET_CODE (vals) == PARALLEL)
7668     {
7669       /* A CONST_VECTOR must contain only CONST_INTs and
7670          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7671          Only store valid constants in a CONST_VECTOR.  */
7672       for (i = 0; i < n_elts; ++i)
7673         {
7674           rtx x = XVECEXP (vals, 0, i);
7675           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7676             n_const++;
7677         }
7678       if (n_const == n_elts)
7679         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7680     }
7681   else
7682     gcc_unreachable ();
7683
7684   if (const_vec != NULL_RTX
7685       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7686     /* Load using MOVI/MVNI.  */
7687     return const_vec;
7688   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7689     /* Loaded using DUP.  */
7690     return const_dup;
7691   else if (const_vec != NULL_RTX)
7692     /* Load from constant pool. We can not take advantage of single-cycle
7693        LD1 because we need a PC-relative addressing mode.  */
7694     return const_vec;
7695   else
7696     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7697        We can not construct an initializer.  */
7698     return NULL_RTX;
7699 }
7700
7701 void
7702 aarch64_expand_vector_init (rtx target, rtx vals)
7703 {
7704   enum machine_mode mode = GET_MODE (target);
7705   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7706   int n_elts = GET_MODE_NUNITS (mode);
7707   int n_var = 0, one_var = -1;
7708   bool all_same = true;
7709   rtx x, mem;
7710   int i;
7711
7712   x = XVECEXP (vals, 0, 0);
7713   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7714     n_var = 1, one_var = 0;
7715
7716   for (i = 1; i < n_elts; ++i)
7717     {
7718       x = XVECEXP (vals, 0, i);
7719       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7720         ++n_var, one_var = i;
7721
7722       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7723         all_same = false;
7724     }
7725
7726   if (n_var == 0)
7727     {
7728       rtx constant = aarch64_simd_make_constant (vals);
7729       if (constant != NULL_RTX)
7730         {
7731           emit_move_insn (target, constant);
7732           return;
7733         }
7734     }
7735
7736   /* Splat a single non-constant element if we can.  */
7737   if (all_same)
7738     {
7739       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7740       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7741       return;
7742     }
7743
7744   /* One field is non-constant.  Load constant then overwrite varying
7745      field.  This is more efficient than using the stack.  */
7746   if (n_var == 1)
7747     {
7748       rtx copy = copy_rtx (vals);
7749       rtx index = GEN_INT (one_var);
7750       enum insn_code icode;
7751
7752       /* Load constant part of vector, substitute neighboring value for
7753          varying element.  */
7754       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7755       aarch64_expand_vector_init (target, copy);
7756
7757       /* Insert variable.  */
7758       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7759       icode = optab_handler (vec_set_optab, mode);
7760       gcc_assert (icode != CODE_FOR_nothing);
7761       emit_insn (GEN_FCN (icode) (target, x, index));
7762       return;
7763     }
7764
7765   /* Construct the vector in memory one field at a time
7766      and load the whole vector.  */
7767   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7768   for (i = 0; i < n_elts; i++)
7769     emit_move_insn (adjust_address_nv (mem, inner_mode,
7770                                     i * GET_MODE_SIZE (inner_mode)),
7771                     XVECEXP (vals, 0, i));
7772   emit_move_insn (target, mem);
7773
7774 }
7775
7776 static unsigned HOST_WIDE_INT
7777 aarch64_shift_truncation_mask (enum machine_mode mode)
7778 {
7779   return
7780     (aarch64_vector_mode_supported_p (mode)
7781      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7782 }
7783
7784 #ifndef TLS_SECTION_ASM_FLAG
7785 #define TLS_SECTION_ASM_FLAG 'T'
7786 #endif
7787
7788 void
7789 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7790                                tree decl ATTRIBUTE_UNUSED)
7791 {
7792   char flagchars[10], *f = flagchars;
7793
7794   /* If we have already declared this section, we can use an
7795      abbreviated form to switch back to it -- unless this section is
7796      part of a COMDAT groups, in which case GAS requires the full
7797      declaration every time.  */
7798   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7799       && (flags & SECTION_DECLARED))
7800     {
7801       fprintf (asm_out_file, "\t.section\t%s\n", name);
7802       return;
7803     }
7804
7805   if (!(flags & SECTION_DEBUG))
7806     *f++ = 'a';
7807   if (flags & SECTION_WRITE)
7808     *f++ = 'w';
7809   if (flags & SECTION_CODE)
7810     *f++ = 'x';
7811   if (flags & SECTION_SMALL)
7812     *f++ = 's';
7813   if (flags & SECTION_MERGE)
7814     *f++ = 'M';
7815   if (flags & SECTION_STRINGS)
7816     *f++ = 'S';
7817   if (flags & SECTION_TLS)
7818     *f++ = TLS_SECTION_ASM_FLAG;
7819   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7820     *f++ = 'G';
7821   *f = '\0';
7822
7823   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7824
7825   if (!(flags & SECTION_NOTYPE))
7826     {
7827       const char *type;
7828       const char *format;
7829
7830       if (flags & SECTION_BSS)
7831         type = "nobits";
7832       else
7833         type = "progbits";
7834
7835 #ifdef TYPE_OPERAND_FMT
7836       format = "," TYPE_OPERAND_FMT;
7837 #else
7838       format = ",@%s";
7839 #endif
7840
7841       fprintf (asm_out_file, format, type);
7842
7843       if (flags & SECTION_ENTSIZE)
7844         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7845       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7846         {
7847           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7848             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7849           else
7850             fprintf (asm_out_file, ",%s,comdat",
7851                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7852         }
7853     }
7854
7855   putc ('\n', asm_out_file);
7856 }
7857
7858 /* Select a format to encode pointers in exception handling data.  */
7859 int
7860 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7861 {
7862    int type;
7863    switch (aarch64_cmodel)
7864      {
7865      case AARCH64_CMODEL_TINY:
7866      case AARCH64_CMODEL_TINY_PIC:
7867      case AARCH64_CMODEL_SMALL:
7868      case AARCH64_CMODEL_SMALL_PIC:
7869        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7870           for everything.  */
7871        type = DW_EH_PE_sdata4;
7872        break;
7873      default:
7874        /* No assumptions here.  8-byte relocs required.  */
7875        type = DW_EH_PE_sdata8;
7876        break;
7877      }
7878    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7879 }
7880
7881 /* Emit load exclusive.  */
7882
7883 static void
7884 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7885                              rtx mem, rtx model_rtx)
7886 {
7887   rtx (*gen) (rtx, rtx, rtx);
7888
7889   switch (mode)
7890     {
7891     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7892     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7893     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7894     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7895     default:
7896       gcc_unreachable ();
7897     }
7898
7899   emit_insn (gen (rval, mem, model_rtx));
7900 }
7901
7902 /* Emit store exclusive.  */
7903
7904 static void
7905 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7906                               rtx rval, rtx mem, rtx model_rtx)
7907 {
7908   rtx (*gen) (rtx, rtx, rtx, rtx);
7909
7910   switch (mode)
7911     {
7912     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7913     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7914     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7915     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7916     default:
7917       gcc_unreachable ();
7918     }
7919
7920   emit_insn (gen (bval, rval, mem, model_rtx));
7921 }
7922
7923 /* Mark the previous jump instruction as unlikely.  */
7924
7925 static void
7926 aarch64_emit_unlikely_jump (rtx insn)
7927 {
7928   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7929
7930   insn = emit_jump_insn (insn);
7931   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7932 }
7933
7934 /* Expand a compare and swap pattern.  */
7935
7936 void
7937 aarch64_expand_compare_and_swap (rtx operands[])
7938 {
7939   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7940   enum machine_mode mode, cmp_mode;
7941   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7942
7943   bval = operands[0];
7944   rval = operands[1];
7945   mem = operands[2];
7946   oldval = operands[3];
7947   newval = operands[4];
7948   is_weak = operands[5];
7949   mod_s = operands[6];
7950   mod_f = operands[7];
7951   mode = GET_MODE (mem);
7952   cmp_mode = mode;
7953
7954   /* Normally the succ memory model must be stronger than fail, but in the
7955      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7956      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7957
7958   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7959       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7960     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7961
7962   switch (mode)
7963     {
7964     case QImode:
7965     case HImode:
7966       /* For short modes, we're going to perform the comparison in SImode,
7967          so do the zero-extension now.  */
7968       cmp_mode = SImode;
7969       rval = gen_reg_rtx (SImode);
7970       oldval = convert_modes (SImode, mode, oldval, true);
7971       /* Fall through.  */
7972
7973     case SImode:
7974     case DImode:
7975       /* Force the value into a register if needed.  */
7976       if (!aarch64_plus_operand (oldval, mode))
7977         oldval = force_reg (cmp_mode, oldval);
7978       break;
7979
7980     default:
7981       gcc_unreachable ();
7982     }
7983
7984   switch (mode)
7985     {
7986     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7987     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7988     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7989     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7990     default:
7991       gcc_unreachable ();
7992     }
7993
7994   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7995
7996   if (mode == QImode || mode == HImode)
7997     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7998
7999   x = gen_rtx_REG (CCmode, CC_REGNUM);
8000   x = gen_rtx_EQ (SImode, x, const0_rtx);
8001   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8002 }
8003
8004 /* Split a compare and swap pattern.  */
8005
8006 void
8007 aarch64_split_compare_and_swap (rtx operands[])
8008 {
8009   rtx rval, mem, oldval, newval, scratch;
8010   enum machine_mode mode;
8011   bool is_weak;
8012   rtx label1, label2, x, cond;
8013
8014   rval = operands[0];
8015   mem = operands[1];
8016   oldval = operands[2];
8017   newval = operands[3];
8018   is_weak = (operands[4] != const0_rtx);
8019   scratch = operands[7];
8020   mode = GET_MODE (mem);
8021
8022   label1 = NULL_RTX;
8023   if (!is_weak)
8024     {
8025       label1 = gen_label_rtx ();
8026       emit_label (label1);
8027     }
8028   label2 = gen_label_rtx ();
8029
8030   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8031
8032   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8033   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8034   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8035                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8036   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8037
8038   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8039
8040   if (!is_weak)
8041     {
8042       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8043       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8044                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8045       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8046     }
8047   else
8048     {
8049       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8050       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8051       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8052     }
8053
8054   emit_label (label2);
8055 }
8056
8057 /* Split an atomic operation.  */
8058
8059 void
8060 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8061                      rtx value, rtx model_rtx, rtx cond)
8062 {
8063   enum machine_mode mode = GET_MODE (mem);
8064   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8065   rtx label, x;
8066
8067   label = gen_label_rtx ();
8068   emit_label (label);
8069
8070   if (new_out)
8071     new_out = gen_lowpart (wmode, new_out);
8072   if (old_out)
8073     old_out = gen_lowpart (wmode, old_out);
8074   else
8075     old_out = new_out;
8076   value = simplify_gen_subreg (wmode, value, mode, 0);
8077
8078   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8079
8080   switch (code)
8081     {
8082     case SET:
8083       new_out = value;
8084       break;
8085
8086     case NOT:
8087       x = gen_rtx_AND (wmode, old_out, value);
8088       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8089       x = gen_rtx_NOT (wmode, new_out);
8090       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8091       break;
8092
8093     case MINUS:
8094       if (CONST_INT_P (value))
8095         {
8096           value = GEN_INT (-INTVAL (value));
8097           code = PLUS;
8098         }
8099       /* Fall through.  */
8100
8101     default:
8102       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8103       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8104       break;
8105     }
8106
8107   aarch64_emit_store_exclusive (mode, cond, mem,
8108                                 gen_lowpart (mode, new_out), model_rtx);
8109
8110   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8111   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8112                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8113   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8114 }
8115
8116 static void
8117 aarch64_print_extension (void)
8118 {
8119   const struct aarch64_option_extension *opt = NULL;
8120
8121   for (opt = all_extensions; opt->name != NULL; opt++)
8122     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8123       asm_fprintf (asm_out_file, "+%s", opt->name);
8124
8125   asm_fprintf (asm_out_file, "\n");
8126 }
8127
8128 static void
8129 aarch64_start_file (void)
8130 {
8131   if (selected_arch)
8132     {
8133       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8134       aarch64_print_extension ();
8135     }
8136   else if (selected_cpu)
8137     {
8138       const char *truncated_name
8139             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8140       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8141       aarch64_print_extension ();
8142     }
8143   default_file_start();
8144 }
8145
8146 /* Target hook for c_mode_for_suffix.  */
8147 static enum machine_mode
8148 aarch64_c_mode_for_suffix (char suffix)
8149 {
8150   if (suffix == 'q')
8151     return TFmode;
8152
8153   return VOIDmode;
8154 }
8155
8156 /* We can only represent floating point constants which will fit in
8157    "quarter-precision" values.  These values are characterised by
8158    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8159    by:
8160
8161    (-1)^s * (n/16) * 2^r
8162
8163    Where:
8164      's' is the sign bit.
8165      'n' is an integer in the range 16 <= n <= 31.
8166      'r' is an integer in the range -3 <= r <= 4.  */
8167
8168 /* Return true iff X can be represented by a quarter-precision
8169    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8170 bool
8171 aarch64_float_const_representable_p (rtx x)
8172 {
8173   /* This represents our current view of how many bits
8174      make up the mantissa.  */
8175   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8176   int exponent;
8177   unsigned HOST_WIDE_INT mantissa, mask;
8178   HOST_WIDE_INT m1, m2;
8179   REAL_VALUE_TYPE r, m;
8180
8181   if (!CONST_DOUBLE_P (x))
8182     return false;
8183
8184   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8185
8186   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8187      know if we have +zero until we analyse the mantissa, but we
8188      can reject the other invalid values.  */
8189   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8190       || REAL_VALUE_MINUS_ZERO (r))
8191     return false;
8192
8193   /* Extract exponent.  */
8194   r = real_value_abs (&r);
8195   exponent = REAL_EXP (&r);
8196
8197   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8198      highest (sign) bit, with a fixed binary point at bit point_pos.
8199      m1 holds the low part of the mantissa, m2 the high part.
8200      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8201      bits for the mantissa, this can fail (low bits will be lost).  */
8202   real_ldexp (&m, &r, point_pos - exponent);
8203   REAL_VALUE_TO_INT (&m1, &m2, m);
8204
8205   /* If the low part of the mantissa has bits set we cannot represent
8206      the value.  */
8207   if (m1 != 0)
8208     return false;
8209   /* We have rejected the lower HOST_WIDE_INT, so update our
8210      understanding of how many bits lie in the mantissa and
8211      look only at the high HOST_WIDE_INT.  */
8212   mantissa = m2;
8213   point_pos -= HOST_BITS_PER_WIDE_INT;
8214
8215   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8216   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8217   if ((mantissa & mask) != 0)
8218     return false;
8219
8220   /* Having filtered unrepresentable values, we may now remove all
8221      but the highest 5 bits.  */
8222   mantissa >>= point_pos - 5;
8223
8224   /* We cannot represent the value 0.0, so reject it.  This is handled
8225      elsewhere.  */
8226   if (mantissa == 0)
8227     return false;
8228
8229   /* Then, as bit 4 is always set, we can mask it off, leaving
8230      the mantissa in the range [0, 15].  */
8231   mantissa &= ~(1 << 4);
8232   gcc_assert (mantissa <= 15);
8233
8234   /* GCC internally does not use IEEE754-like encoding (where normalized
8235      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8236      Our mantissa values are shifted 4 places to the left relative to
8237      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8238      by 5 places to correct for GCC's representation.  */
8239   exponent = 5 - exponent;
8240
8241   return (exponent >= 0 && exponent <= 7);
8242 }
8243
8244 char*
8245 aarch64_output_simd_mov_immediate (rtx const_vector,
8246                                    enum machine_mode mode,
8247                                    unsigned width)
8248 {
8249   bool is_valid;
8250   static char templ[40];
8251   const char *mnemonic;
8252   const char *shift_op;
8253   unsigned int lane_count = 0;
8254   char element_char;
8255
8256   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8257
8258   /* This will return true to show const_vector is legal for use as either
8259      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8260      also update INFO to show how the immediate should be generated.  */
8261   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8262   gcc_assert (is_valid);
8263
8264   element_char = sizetochar (info.element_width);
8265   lane_count = width / info.element_width;
8266
8267   mode = GET_MODE_INNER (mode);
8268   if (mode == SFmode || mode == DFmode)
8269     {
8270       gcc_assert (info.shift == 0 && ! info.mvn);
8271       if (aarch64_float_const_zero_rtx_p (info.value))
8272         info.value = GEN_INT (0);
8273       else
8274         {
8275 #define buf_size 20
8276           REAL_VALUE_TYPE r;
8277           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8278           char float_buf[buf_size] = {'\0'};
8279           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8280 #undef buf_size
8281
8282           if (lane_count == 1)
8283             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8284           else
8285             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8286                       lane_count, element_char, float_buf);
8287           return templ;
8288         }
8289     }
8290
8291   mnemonic = info.mvn ? "mvni" : "movi";
8292   shift_op = info.msl ? "msl" : "lsl";
8293
8294   if (lane_count == 1)
8295     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8296               mnemonic, UINTVAL (info.value));
8297   else if (info.shift)
8298     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8299               ", %s %d", mnemonic, lane_count, element_char,
8300               UINTVAL (info.value), shift_op, info.shift);
8301   else
8302     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8303               mnemonic, lane_count, element_char, UINTVAL (info.value));
8304   return templ;
8305 }
8306
8307 char*
8308 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8309                                           enum machine_mode mode)
8310 {
8311   enum machine_mode vmode;
8312
8313   gcc_assert (!VECTOR_MODE_P (mode));
8314   vmode = aarch64_simd_container_mode (mode, 64);
8315   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8316   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8317 }
8318
8319 /* Split operands into moves from op[1] + op[2] into op[0].  */
8320
8321 void
8322 aarch64_split_combinev16qi (rtx operands[3])
8323 {
8324   unsigned int dest = REGNO (operands[0]);
8325   unsigned int src1 = REGNO (operands[1]);
8326   unsigned int src2 = REGNO (operands[2]);
8327   enum machine_mode halfmode = GET_MODE (operands[1]);
8328   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8329   rtx destlo, desthi;
8330
8331   gcc_assert (halfmode == V16QImode);
8332
8333   if (src1 == dest && src2 == dest + halfregs)
8334     {
8335       /* No-op move.  Can't split to nothing; emit something.  */
8336       emit_note (NOTE_INSN_DELETED);
8337       return;
8338     }
8339
8340   /* Preserve register attributes for variable tracking.  */
8341   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8342   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8343                                GET_MODE_SIZE (halfmode));
8344
8345   /* Special case of reversed high/low parts.  */
8346   if (reg_overlap_mentioned_p (operands[2], destlo)
8347       && reg_overlap_mentioned_p (operands[1], desthi))
8348     {
8349       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8350       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8351       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8352     }
8353   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8354     {
8355       /* Try to avoid unnecessary moves if part of the result
8356          is in the right place already.  */
8357       if (src1 != dest)
8358         emit_move_insn (destlo, operands[1]);
8359       if (src2 != dest + halfregs)
8360         emit_move_insn (desthi, operands[2]);
8361     }
8362   else
8363     {
8364       if (src2 != dest + halfregs)
8365         emit_move_insn (desthi, operands[2]);
8366       if (src1 != dest)
8367         emit_move_insn (destlo, operands[1]);
8368     }
8369 }
8370
8371 /* vec_perm support.  */
8372
8373 #define MAX_VECT_LEN 16
8374
8375 struct expand_vec_perm_d
8376 {
8377   rtx target, op0, op1;
8378   unsigned char perm[MAX_VECT_LEN];
8379   enum machine_mode vmode;
8380   unsigned char nelt;
8381   bool one_vector_p;
8382   bool testing_p;
8383 };
8384
8385 /* Generate a variable permutation.  */
8386
8387 static void
8388 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8389 {
8390   enum machine_mode vmode = GET_MODE (target);
8391   bool one_vector_p = rtx_equal_p (op0, op1);
8392
8393   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8394   gcc_checking_assert (GET_MODE (op0) == vmode);
8395   gcc_checking_assert (GET_MODE (op1) == vmode);
8396   gcc_checking_assert (GET_MODE (sel) == vmode);
8397   gcc_checking_assert (TARGET_SIMD);
8398
8399   if (one_vector_p)
8400     {
8401       if (vmode == V8QImode)
8402         {
8403           /* Expand the argument to a V16QI mode by duplicating it.  */
8404           rtx pair = gen_reg_rtx (V16QImode);
8405           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8406           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8407         }
8408       else
8409         {
8410           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8411         }
8412     }
8413   else
8414     {
8415       rtx pair;
8416
8417       if (vmode == V8QImode)
8418         {
8419           pair = gen_reg_rtx (V16QImode);
8420           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8421           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8422         }
8423       else
8424         {
8425           pair = gen_reg_rtx (OImode);
8426           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8427           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8428         }
8429     }
8430 }
8431
8432 void
8433 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8434 {
8435   enum machine_mode vmode = GET_MODE (target);
8436   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8437   bool one_vector_p = rtx_equal_p (op0, op1);
8438   rtx rmask[MAX_VECT_LEN], mask;
8439
8440   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8441
8442   /* The TBL instruction does not use a modulo index, so we must take care
8443      of that ourselves.  */
8444   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8445   for (i = 0; i < nelt; ++i)
8446     rmask[i] = mask;
8447   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8448   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8449
8450   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8451 }
8452
8453 /* Recognize patterns suitable for the TRN instructions.  */
8454 static bool
8455 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8456 {
8457   unsigned int i, odd, mask, nelt = d->nelt;
8458   rtx out, in0, in1, x;
8459   rtx (*gen) (rtx, rtx, rtx);
8460   enum machine_mode vmode = d->vmode;
8461
8462   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8463     return false;
8464
8465   /* Note that these are little-endian tests.
8466      We correct for big-endian later.  */
8467   if (d->perm[0] == 0)
8468     odd = 0;
8469   else if (d->perm[0] == 1)
8470     odd = 1;
8471   else
8472     return false;
8473   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8474
8475   for (i = 0; i < nelt; i += 2)
8476     {
8477       if (d->perm[i] != i + odd)
8478         return false;
8479       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8480         return false;
8481     }
8482
8483   /* Success!  */
8484   if (d->testing_p)
8485     return true;
8486
8487   in0 = d->op0;
8488   in1 = d->op1;
8489   if (BYTES_BIG_ENDIAN)
8490     {
8491       x = in0, in0 = in1, in1 = x;
8492       odd = !odd;
8493     }
8494   out = d->target;
8495
8496   if (odd)
8497     {
8498       switch (vmode)
8499         {
8500         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8501         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8502         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8503         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8504         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8505         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8506         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8507         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8508         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8509         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8510         default:
8511           return false;
8512         }
8513     }
8514   else
8515     {
8516       switch (vmode)
8517         {
8518         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8519         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8520         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8521         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8522         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8523         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8524         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8525         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8526         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8527         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8528         default:
8529           return false;
8530         }
8531     }
8532
8533   emit_insn (gen (out, in0, in1));
8534   return true;
8535 }
8536
8537 /* Recognize patterns suitable for the UZP instructions.  */
8538 static bool
8539 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8540 {
8541   unsigned int i, odd, mask, nelt = d->nelt;
8542   rtx out, in0, in1, x;
8543   rtx (*gen) (rtx, rtx, rtx);
8544   enum machine_mode vmode = d->vmode;
8545
8546   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8547     return false;
8548
8549   /* Note that these are little-endian tests.
8550      We correct for big-endian later.  */
8551   if (d->perm[0] == 0)
8552     odd = 0;
8553   else if (d->perm[0] == 1)
8554     odd = 1;
8555   else
8556     return false;
8557   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8558
8559   for (i = 0; i < nelt; i++)
8560     {
8561       unsigned elt = (i * 2 + odd) & mask;
8562       if (d->perm[i] != elt)
8563         return false;
8564     }
8565
8566   /* Success!  */
8567   if (d->testing_p)
8568     return true;
8569
8570   in0 = d->op0;
8571   in1 = d->op1;
8572   if (BYTES_BIG_ENDIAN)
8573     {
8574       x = in0, in0 = in1, in1 = x;
8575       odd = !odd;
8576     }
8577   out = d->target;
8578
8579   if (odd)
8580     {
8581       switch (vmode)
8582         {
8583         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8584         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8585         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8586         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8587         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8588         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8589         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8590         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8591         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8592         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8593         default:
8594           return false;
8595         }
8596     }
8597   else
8598     {
8599       switch (vmode)
8600         {
8601         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8602         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8603         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8604         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8605         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8606         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8607         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8608         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8609         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8610         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8611         default:
8612           return false;
8613         }
8614     }
8615
8616   emit_insn (gen (out, in0, in1));
8617   return true;
8618 }
8619
8620 /* Recognize patterns suitable for the ZIP instructions.  */
8621 static bool
8622 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8623 {
8624   unsigned int i, high, mask, nelt = d->nelt;
8625   rtx out, in0, in1, x;
8626   rtx (*gen) (rtx, rtx, rtx);
8627   enum machine_mode vmode = d->vmode;
8628
8629   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8630     return false;
8631
8632   /* Note that these are little-endian tests.
8633      We correct for big-endian later.  */
8634   high = nelt / 2;
8635   if (d->perm[0] == high)
8636     /* Do Nothing.  */
8637     ;
8638   else if (d->perm[0] == 0)
8639     high = 0;
8640   else
8641     return false;
8642   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8643
8644   for (i = 0; i < nelt / 2; i++)
8645     {
8646       unsigned elt = (i + high) & mask;
8647       if (d->perm[i * 2] != elt)
8648         return false;
8649       elt = (elt + nelt) & mask;
8650       if (d->perm[i * 2 + 1] != elt)
8651         return false;
8652     }
8653
8654   /* Success!  */
8655   if (d->testing_p)
8656     return true;
8657
8658   in0 = d->op0;
8659   in1 = d->op1;
8660   if (BYTES_BIG_ENDIAN)
8661     {
8662       x = in0, in0 = in1, in1 = x;
8663       high = !high;
8664     }
8665   out = d->target;
8666
8667   if (high)
8668     {
8669       switch (vmode)
8670         {
8671         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8672         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8673         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8674         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8675         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8676         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8677         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8678         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8679         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8680         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8681         default:
8682           return false;
8683         }
8684     }
8685   else
8686     {
8687       switch (vmode)
8688         {
8689         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8690         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8691         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8692         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8693         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8694         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8695         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8696         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8697         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8698         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8699         default:
8700           return false;
8701         }
8702     }
8703
8704   emit_insn (gen (out, in0, in1));
8705   return true;
8706 }
8707
8708 static bool
8709 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8710 {
8711   rtx (*gen) (rtx, rtx, rtx);
8712   rtx out = d->target;
8713   rtx in0;
8714   enum machine_mode vmode = d->vmode;
8715   unsigned int i, elt, nelt = d->nelt;
8716   rtx lane;
8717
8718   /* TODO: This may not be big-endian safe.  */
8719   if (BYTES_BIG_ENDIAN)
8720     return false;
8721
8722   elt = d->perm[0];
8723   for (i = 1; i < nelt; i++)
8724     {
8725       if (elt != d->perm[i])
8726         return false;
8727     }
8728
8729   /* The generic preparation in aarch64_expand_vec_perm_const_1
8730      swaps the operand order and the permute indices if it finds
8731      d->perm[0] to be in the second operand.  Thus, we can always
8732      use d->op0 and need not do any extra arithmetic to get the
8733      correct lane number.  */
8734   in0 = d->op0;
8735   lane = GEN_INT (elt);
8736
8737   switch (vmode)
8738     {
8739     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8740     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8741     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8742     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8743     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8744     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8745     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8746     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8747     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8748     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8749     default:
8750       return false;
8751     }
8752
8753   emit_insn (gen (out, in0, lane));
8754   return true;
8755 }
8756
8757 static bool
8758 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8759 {
8760   rtx rperm[MAX_VECT_LEN], sel;
8761   enum machine_mode vmode = d->vmode;
8762   unsigned int i, nelt = d->nelt;
8763
8764   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8765      numbering of elements for big-endian, we must reverse the order.  */
8766   if (BYTES_BIG_ENDIAN)
8767     return false;
8768
8769   if (d->testing_p)
8770     return true;
8771
8772   /* Generic code will try constant permutation twice.  Once with the
8773      original mode and again with the elements lowered to QImode.
8774      So wait and don't do the selector expansion ourselves.  */
8775   if (vmode != V8QImode && vmode != V16QImode)
8776     return false;
8777
8778   for (i = 0; i < nelt; ++i)
8779     rperm[i] = GEN_INT (d->perm[i]);
8780   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8781   sel = force_reg (vmode, sel);
8782
8783   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8784   return true;
8785 }
8786
8787 static bool
8788 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8789 {
8790   /* The pattern matching functions above are written to look for a small
8791      number to begin the sequence (0, 1, N/2).  If we begin with an index
8792      from the second operand, we can swap the operands.  */
8793   if (d->perm[0] >= d->nelt)
8794     {
8795       unsigned i, nelt = d->nelt;
8796       rtx x;
8797
8798       for (i = 0; i < nelt; ++i)
8799         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8800
8801       x = d->op0;
8802       d->op0 = d->op1;
8803       d->op1 = x;
8804     }
8805
8806   if (TARGET_SIMD)
8807     {
8808       if (aarch64_evpc_zip (d))
8809         return true;
8810       else if (aarch64_evpc_uzp (d))
8811         return true;
8812       else if (aarch64_evpc_trn (d))
8813         return true;
8814       else if (aarch64_evpc_dup (d))
8815         return true;
8816       return aarch64_evpc_tbl (d);
8817     }
8818   return false;
8819 }
8820
8821 /* Expand a vec_perm_const pattern.  */
8822
8823 bool
8824 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8825 {
8826   struct expand_vec_perm_d d;
8827   int i, nelt, which;
8828
8829   d.target = target;
8830   d.op0 = op0;
8831   d.op1 = op1;
8832
8833   d.vmode = GET_MODE (target);
8834   gcc_assert (VECTOR_MODE_P (d.vmode));
8835   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8836   d.testing_p = false;
8837
8838   for (i = which = 0; i < nelt; ++i)
8839     {
8840       rtx e = XVECEXP (sel, 0, i);
8841       int ei = INTVAL (e) & (2 * nelt - 1);
8842       which |= (ei < nelt ? 1 : 2);
8843       d.perm[i] = ei;
8844     }
8845
8846   switch (which)
8847     {
8848     default:
8849       gcc_unreachable ();
8850
8851     case 3:
8852       d.one_vector_p = false;
8853       if (!rtx_equal_p (op0, op1))
8854         break;
8855
8856       /* The elements of PERM do not suggest that only the first operand
8857          is used, but both operands are identical.  Allow easier matching
8858          of the permutation by folding the permutation into the single
8859          input vector.  */
8860       /* Fall Through.  */
8861     case 2:
8862       for (i = 0; i < nelt; ++i)
8863         d.perm[i] &= nelt - 1;
8864       d.op0 = op1;
8865       d.one_vector_p = true;
8866       break;
8867
8868     case 1:
8869       d.op1 = op0;
8870       d.one_vector_p = true;
8871       break;
8872     }
8873
8874   return aarch64_expand_vec_perm_const_1 (&d);
8875 }
8876
8877 static bool
8878 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8879                                      const unsigned char *sel)
8880 {
8881   struct expand_vec_perm_d d;
8882   unsigned int i, nelt, which;
8883   bool ret;
8884
8885   d.vmode = vmode;
8886   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8887   d.testing_p = true;
8888   memcpy (d.perm, sel, nelt);
8889
8890   /* Calculate whether all elements are in one vector.  */
8891   for (i = which = 0; i < nelt; ++i)
8892     {
8893       unsigned char e = d.perm[i];
8894       gcc_assert (e < 2 * nelt);
8895       which |= (e < nelt ? 1 : 2);
8896     }
8897
8898   /* If all elements are from the second vector, reindex as if from the
8899      first vector.  */
8900   if (which == 2)
8901     for (i = 0; i < nelt; ++i)
8902       d.perm[i] -= nelt;
8903
8904   /* Check whether the mask can be applied to a single vector.  */
8905   d.one_vector_p = (which != 3);
8906
8907   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8908   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8909   if (!d.one_vector_p)
8910     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8911
8912   start_sequence ();
8913   ret = aarch64_expand_vec_perm_const_1 (&d);
8914   end_sequence ();
8915
8916   return ret;
8917 }
8918
8919 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8920 bool
8921 aarch64_cannot_change_mode_class (enum machine_mode from,
8922                                   enum machine_mode to,
8923                                   enum reg_class rclass)
8924 {
8925   /* Full-reg subregs are allowed on general regs or any class if they are
8926      the same size.  */
8927   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8928       || !reg_classes_intersect_p (FP_REGS, rclass))
8929     return false;
8930
8931   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8932      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8933      2. Scalar to Scalar for integer modes or same size float modes.
8934      3. Vector to Vector modes.  */
8935   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8936     {
8937       if (aarch64_vector_mode_supported_p (from)
8938           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8939         return false;
8940
8941       if (GET_MODE_NUNITS (from) == 1
8942           && GET_MODE_NUNITS (to) == 1
8943           && (GET_MODE_CLASS (from) == MODE_INT
8944               || from == to))
8945         return false;
8946
8947       if (aarch64_vector_mode_supported_p (from)
8948           && aarch64_vector_mode_supported_p (to))
8949         return false;
8950     }
8951
8952   return true;
8953 }
8954
8955 #undef TARGET_ADDRESS_COST
8956 #define TARGET_ADDRESS_COST aarch64_address_cost
8957
8958 /* This hook will determines whether unnamed bitfields affect the alignment
8959    of the containing structure.  The hook returns true if the structure
8960    should inherit the alignment requirements of an unnamed bitfield's
8961    type.  */
8962 #undef TARGET_ALIGN_ANON_BITFIELD
8963 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8964
8965 #undef TARGET_ASM_ALIGNED_DI_OP
8966 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8967
8968 #undef TARGET_ASM_ALIGNED_HI_OP
8969 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8970
8971 #undef TARGET_ASM_ALIGNED_SI_OP
8972 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8973
8974 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8975 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8976   hook_bool_const_tree_hwi_hwi_const_tree_true
8977
8978 #undef TARGET_ASM_FILE_START
8979 #define TARGET_ASM_FILE_START aarch64_start_file
8980
8981 #undef TARGET_ASM_OUTPUT_MI_THUNK
8982 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8983
8984 #undef TARGET_ASM_SELECT_RTX_SECTION
8985 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8986
8987 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8988 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8989
8990 #undef TARGET_BUILD_BUILTIN_VA_LIST
8991 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8992
8993 #undef TARGET_CALLEE_COPIES
8994 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8995
8996 #undef TARGET_CAN_ELIMINATE
8997 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8998
8999 #undef TARGET_CANNOT_FORCE_CONST_MEM
9000 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9001
9002 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9003 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9004
9005 /* Only the least significant bit is used for initialization guard
9006    variables.  */
9007 #undef TARGET_CXX_GUARD_MASK_BIT
9008 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9009
9010 #undef TARGET_C_MODE_FOR_SUFFIX
9011 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9012
9013 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9014 #undef  TARGET_DEFAULT_TARGET_FLAGS
9015 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9016 #endif
9017
9018 #undef TARGET_CLASS_MAX_NREGS
9019 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9020
9021 #undef TARGET_BUILTIN_DECL
9022 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9023
9024 #undef  TARGET_EXPAND_BUILTIN
9025 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9026
9027 #undef TARGET_EXPAND_BUILTIN_VA_START
9028 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9029
9030 #undef TARGET_FOLD_BUILTIN
9031 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9032
9033 #undef TARGET_FUNCTION_ARG
9034 #define TARGET_FUNCTION_ARG aarch64_function_arg
9035
9036 #undef TARGET_FUNCTION_ARG_ADVANCE
9037 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9038
9039 #undef TARGET_FUNCTION_ARG_BOUNDARY
9040 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9041
9042 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9043 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9044
9045 #undef TARGET_FUNCTION_VALUE
9046 #define TARGET_FUNCTION_VALUE aarch64_function_value
9047
9048 #undef TARGET_FUNCTION_VALUE_REGNO_P
9049 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9050
9051 #undef TARGET_FRAME_POINTER_REQUIRED
9052 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9053
9054 #undef TARGET_GIMPLE_FOLD_BUILTIN
9055 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9056
9057 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9058 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9059
9060 #undef  TARGET_INIT_BUILTINS
9061 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9062
9063 #undef TARGET_LEGITIMATE_ADDRESS_P
9064 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9065
9066 #undef TARGET_LEGITIMATE_CONSTANT_P
9067 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9068
9069 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9070 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9071
9072 #undef TARGET_LRA_P
9073 #define TARGET_LRA_P aarch64_lra_p
9074
9075 #undef TARGET_MANGLE_TYPE
9076 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9077
9078 #undef TARGET_MEMORY_MOVE_COST
9079 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9080
9081 #undef TARGET_MUST_PASS_IN_STACK
9082 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9083
9084 /* This target hook should return true if accesses to volatile bitfields
9085    should use the narrowest mode possible.  It should return false if these
9086    accesses should use the bitfield container type.  */
9087 #undef TARGET_NARROW_VOLATILE_BITFIELD
9088 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9089
9090 #undef  TARGET_OPTION_OVERRIDE
9091 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9092
9093 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9094 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9095   aarch64_override_options_after_change
9096
9097 #undef TARGET_PASS_BY_REFERENCE
9098 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9099
9100 #undef TARGET_PREFERRED_RELOAD_CLASS
9101 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9102
9103 #undef TARGET_SECONDARY_RELOAD
9104 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9105
9106 #undef TARGET_SHIFT_TRUNCATION_MASK
9107 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9108
9109 #undef TARGET_SETUP_INCOMING_VARARGS
9110 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9111
9112 #undef TARGET_STRUCT_VALUE_RTX
9113 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9114
9115 #undef TARGET_REGISTER_MOVE_COST
9116 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9117
9118 #undef TARGET_RETURN_IN_MEMORY
9119 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9120
9121 #undef TARGET_RETURN_IN_MSB
9122 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9123
9124 #undef TARGET_RTX_COSTS
9125 #define TARGET_RTX_COSTS aarch64_rtx_costs
9126
9127 #undef TARGET_SCHED_ISSUE_RATE
9128 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9129
9130 #undef TARGET_TRAMPOLINE_INIT
9131 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9132
9133 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9134 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9135
9136 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9137 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9138
9139 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9140 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9141
9142 #undef TARGET_VECTORIZE_ADD_STMT_COST
9143 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9144
9145 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9146 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9147   aarch64_builtin_vectorization_cost
9148
9149 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9150 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9151
9152 #undef TARGET_VECTORIZE_BUILTINS
9153 #define TARGET_VECTORIZE_BUILTINS
9154
9155 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9156 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9157   aarch64_builtin_vectorized_function
9158
9159 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9160 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9161   aarch64_autovectorize_vector_sizes
9162
9163 /* Section anchor support.  */
9164
9165 #undef TARGET_MIN_ANCHOR_OFFSET
9166 #define TARGET_MIN_ANCHOR_OFFSET -256
9167
9168 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9169    byte offset; we can do much more for larger data types, but have no way
9170    to determine the size of the access.  We assume accesses are aligned.  */
9171 #undef TARGET_MAX_ANCHOR_OFFSET
9172 #define TARGET_MAX_ANCHOR_OFFSET 4095
9173
9174 #undef TARGET_VECTOR_ALIGNMENT
9175 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9176
9177 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9178 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9179   aarch64_simd_vector_alignment_reachable
9180
9181 /* vec_perm support.  */
9182
9183 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9184 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9185   aarch64_vectorize_vec_perm_const_ok
9186
9187
9188 #undef TARGET_FIXED_CONDITION_CODE_REGS
9189 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9190
9191 #undef TARGET_RELAXED_ORDERING
9192 #define TARGET_RELAXED_ORDERING true
9193
9194 #undef TARGET_FLAGS_REGNUM
9195 #define TARGET_FLAGS_REGNUM CC_REGNUM
9196
9197 struct gcc_target targetm = TARGET_INITIALIZER;
9198
9199 #include "gt-aarch64.h"