gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 146
 147 /* The processor for which instructions should be scheduled.  */
 148 enum aarch64_processor aarch64_tune = cortexa53;
 149
 150 /* The current tuning set.  */
 151 const struct tune_params *aarch64_tune_params;
 152
 153 /* Mask to specify which instructions we are allowed to generate.  */
 154 unsigned long aarch64_isa_flags = 0;
 155
 156 /* Mask to specify which instruction scheduling options should be used.  */
 157 unsigned long aarch64_tune_flags = 0;
 158
 159 /* Tuning parameters.  */
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS
 162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 163 #else
 164 #define NAMED_PARAM(NAME, VAL) (VAL)
 165 #endif
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_addrcost_table generic_addrcost_table =
 175 {
 176 #if HAVE_DESIGNATED_INITIALIZERS
 177   .addr_scale_costs =
 178 #endif
 179     {
 180       NAMED_PARAM (qi, 0),
 181       NAMED_PARAM (hi, 0),
 182       NAMED_PARAM (si, 0),
 183       NAMED_PARAM (ti, 0),
 184     },
 185   NAMED_PARAM (pre_modify, 0),
 186   NAMED_PARAM (post_modify, 0),
 187   NAMED_PARAM (register_offset, 0),
 188   NAMED_PARAM (register_extend, 0),
 189   NAMED_PARAM (imm_offset, 0)
 190 };
 191
 192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 193 __extension__
 194 #endif
 195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 196 {
 197 #if HAVE_DESIGNATED_INITIALIZERS
 198   .addr_scale_costs =
 199 #endif
 200     {
 201       NAMED_PARAM (qi, 0),
 202       NAMED_PARAM (hi, 1),
 203       NAMED_PARAM (si, 0),
 204       NAMED_PARAM (ti, 1),
 205     },
 206   NAMED_PARAM (pre_modify, 0),
 207   NAMED_PARAM (post_modify, 0),
 208   NAMED_PARAM (register_offset, 0),
 209   NAMED_PARAM (register_extend, 0),
 210   NAMED_PARAM (imm_offset, 0),
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct cpu_regmove_cost generic_regmove_cost =
 217 {
 218   NAMED_PARAM (GP2GP, 1),
 219   NAMED_PARAM (GP2FP, 2),
 220   NAMED_PARAM (FP2GP, 2),
 221   /* We currently do not provide direct support for TFmode Q->Q move.
 222      Therefore we need to raise the cost above 2 in order to have
 223      reload handle the situation.  */
 224   NAMED_PARAM (FP2FP, 4)
 225 };
 226
 227 /* Generic costs for vector insn classes.  */
 228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 229 __extension__
 230 #endif
 231 static const struct cpu_vector_cost generic_vector_cost =
 232 {
 233   NAMED_PARAM (scalar_stmt_cost, 1),
 234   NAMED_PARAM (scalar_load_cost, 1),
 235   NAMED_PARAM (scalar_store_cost, 1),
 236   NAMED_PARAM (vec_stmt_cost, 1),
 237   NAMED_PARAM (vec_to_scalar_cost, 1),
 238   NAMED_PARAM (scalar_to_vec_cost, 1),
 239   NAMED_PARAM (vec_align_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_store_cost, 1),
 242   NAMED_PARAM (vec_store_cost, 1),
 243   NAMED_PARAM (cond_taken_branch_cost, 3),
 244   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 245 };
 246
 247 /* Generic costs for vector insn classes.  */
 248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 249 __extension__
 250 #endif
 251 static const struct cpu_vector_cost cortexa57_vector_cost =
 252 {
 253   NAMED_PARAM (scalar_stmt_cost, 1),
 254   NAMED_PARAM (scalar_load_cost, 4),
 255   NAMED_PARAM (scalar_store_cost, 1),
 256   NAMED_PARAM (vec_stmt_cost, 3),
 257   NAMED_PARAM (vec_to_scalar_cost, 8),
 258   NAMED_PARAM (scalar_to_vec_cost, 8),
 259   NAMED_PARAM (vec_align_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_store_cost, 1),
 262   NAMED_PARAM (vec_store_cost, 1),
 263   NAMED_PARAM (cond_taken_branch_cost, 1),
 264   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 265 };
 266
 267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 268 __extension__
 269 #endif
 270 static const struct tune_params generic_tunings =
 271 {
 272   &cortexa57_extra_costs,
 273   &generic_addrcost_table,
 274   &generic_regmove_cost,
 275   &generic_vector_cost,
 276   NAMED_PARAM (memmov_cost, 4),
 277   NAMED_PARAM (issue_rate, 2)
 278 };
 279
 280 static const struct tune_params cortexa53_tunings =
 281 {
 282   &cortexa53_extra_costs,
 283   &generic_addrcost_table,
 284   &generic_regmove_cost,
 285   &generic_vector_cost,
 286   NAMED_PARAM (memmov_cost, 4),
 287   NAMED_PARAM (issue_rate, 2)
 288 };
 289
 290 static const struct tune_params cortexa57_tunings =
 291 {
 292   &cortexa57_extra_costs,
 293   &cortexa57_addrcost_table,
 294   &generic_regmove_cost,
 295   &cortexa57_vector_cost,
 296   NAMED_PARAM (memmov_cost, 4),
 297   NAMED_PARAM (issue_rate, 3)
 298 };
 299
 300 /* A processor implementing AArch64.  */
 301 struct processor
 302 {
 303   const char *const name;
 304   enum aarch64_processor core;
 305   const char *arch;
 306   const unsigned long flags;
 307   const struct tune_params *const tune;
 308 };
 309
 310 /* Processor cores implementing AArch64.  */
 311 static const struct processor all_cores[] =
 312 {
 313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 314   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 315 #include "aarch64-cores.def"
 316 #undef AARCH64_CORE
 317   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 318   {NULL, aarch64_none, NULL, 0, NULL}
 319 };
 320
 321 /* Architectures implementing AArch64.  */
 322 static const struct processor all_architectures[] =
 323 {
 324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 325   {NAME, CORE, #ARCH, FLAGS, NULL},
 326 #include "aarch64-arches.def"
 327 #undef AARCH64_ARCH
 328   {NULL, aarch64_none, NULL, 0, NULL}
 329 };
 330
 331 /* Target specification.  These are populated as commandline arguments
 332    are processed, or NULL if not specified.  */
 333 static const struct processor *selected_arch;
 334 static const struct processor *selected_cpu;
 335 static const struct processor *selected_tune;
 336
 337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 338
 339 /* An ISA extension in the co-processor and main instruction set space.  */
 340 struct aarch64_option_extension
 341 {
 342   const char *const name;
 343   const unsigned long flags_on;
 344   const unsigned long flags_off;
 345 };
 346
 347 /* ISA extensions in AArch64.  */
 348 static const struct aarch64_option_extension all_extensions[] =
 349 {
 350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 351   {NAME, FLAGS_ON, FLAGS_OFF},
 352 #include "aarch64-option-extensions.def"
 353 #undef AARCH64_OPT_EXTENSION
 354   {NULL, 0, 0}
 355 };
 356
 357 /* Used to track the size of an address when generating a pre/post
 358    increment address.  */
 359 static enum machine_mode aarch64_memory_reference_mode;
 360
 361 /* Used to force GTY into this file.  */
 362 static GTY(()) int gty_dummy;
 363
 364 /* A table of valid AArch64 "bitmask immediate" values for
 365    logical instructions.  */
 366
 367 #define AARCH64_NUM_BITMASKS  5334
 368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 369
 370 typedef enum aarch64_cond_code
 371 {
 372   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 373   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 374   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 375 }
 376 aarch64_cc;
 377
 378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 379
 380 /* The condition codes of the processor, and the inverse function.  */
 381 static const char * const aarch64_condition_codes[] =
 382 {
 383   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 384   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 385 };
 386
 387 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 388 unsigned
 389 aarch64_dbx_register_number (unsigned regno)
 390 {
 391    if (GP_REGNUM_P (regno))
 392      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 393    else if (regno == SP_REGNUM)
 394      return AARCH64_DWARF_SP;
 395    else if (FP_REGNUM_P (regno))
 396      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 397
 398    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 399       equivalent DWARF register.  */
 400    return DWARF_FRAME_REGISTERS;
 401 }
 402
 403 /* Return TRUE if MODE is any of the large INT modes.  */
 404 static bool
 405 aarch64_vect_struct_mode_p (enum machine_mode mode)
 406 {
 407   return mode == OImode || mode == CImode || mode == XImode;
 408 }
 409
 410 /* Return TRUE if MODE is any of the vector modes.  */
 411 static bool
 412 aarch64_vector_mode_p (enum machine_mode mode)
 413 {
 414   return aarch64_vector_mode_supported_p (mode)
 415          || aarch64_vect_struct_mode_p (mode);
 416 }
 417
 418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 419 static bool
 420 aarch64_array_mode_supported_p (enum machine_mode mode,
 421                                 unsigned HOST_WIDE_INT nelems)
 422 {
 423   if (TARGET_SIMD
 424       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 425       && (nelems >= 2 && nelems <= 4))
 426     return true;
 427
 428   return false;
 429 }
 430
 431 /* Implement HARD_REGNO_NREGS.  */
 432
 433 int
 434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 435 {
 436   switch (aarch64_regno_regclass (regno))
 437     {
 438     case FP_REGS:
 439     case FP_LO_REGS:
 440       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 441     default:
 442       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 443     }
 444   gcc_unreachable ();
 445 }
 446
 447 /* Implement HARD_REGNO_MODE_OK.  */
 448
 449 int
 450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 451 {
 452   if (GET_MODE_CLASS (mode) == MODE_CC)
 453     return regno == CC_REGNUM;
 454
 455   if (regno == SP_REGNUM)
 456     /* The purpose of comparing with ptr_mode is to support the
 457        global register variable associated with the stack pointer
 458        register via the syntax of asm ("wsp") in ILP32.  */
 459     return mode == Pmode || mode == ptr_mode;
 460
 461   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 462     return mode == Pmode;
 463
 464   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 465     return 1;
 466
 467   if (FP_REGNUM_P (regno))
 468     {
 469       if (aarch64_vect_struct_mode_p (mode))
 470         return
 471           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 472       else
 473         return 1;
 474     }
 475
 476   return 0;
 477 }
 478
 479 /* Return true if calls to DECL should be treated as
 480    long-calls (ie called via a register).  */
 481 static bool
 482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 483 {
 484   return false;
 485 }
 486
 487 /* Return true if calls to symbol-ref SYM should be treated as
 488    long-calls (ie called via a register).  */
 489 bool
 490 aarch64_is_long_call_p (rtx sym)
 491 {
 492   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 493 }
 494
 495 /* Return true if the offsets to a zero/sign-extract operation
 496    represent an expression that matches an extend operation.  The
 497    operands represent the paramters from
 498
 499    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 500 bool
 501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 502                                 rtx extract_imm)
 503 {
 504   HOST_WIDE_INT mult_val, extract_val;
 505
 506   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 507     return false;
 508
 509   mult_val = INTVAL (mult_imm);
 510   extract_val = INTVAL (extract_imm);
 511
 512   if (extract_val > 8
 513       && extract_val < GET_MODE_BITSIZE (mode)
 514       && exact_log2 (extract_val & ~7) > 0
 515       && (extract_val & 7) <= 4
 516       && mult_val == (1 << (extract_val & 7)))
 517     return true;
 518
 519   return false;
 520 }
 521
 522 /* Emit an insn that's a simple single-set.  Both the operands must be
 523    known to be valid.  */
 524 inline static rtx
 525 emit_set_insn (rtx x, rtx y)
 526 {
 527   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 528 }
 529
 530 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 531    return the rtx for register 0 in the proper mode.  */
 532 rtx
 533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 534 {
 535   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 536   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 537
 538   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 539   return cc_reg;
 540 }
 541
 542 /* Build the SYMBOL_REF for __tls_get_addr.  */
 543
 544 static GTY(()) rtx tls_get_addr_libfunc;
 545
 546 rtx
 547 aarch64_tls_get_addr (void)
 548 {
 549   if (!tls_get_addr_libfunc)
 550     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 551   return tls_get_addr_libfunc;
 552 }
 553
 554 /* Return the TLS model to use for ADDR.  */
 555
 556 static enum tls_model
 557 tls_symbolic_operand_type (rtx addr)
 558 {
 559   enum tls_model tls_kind = TLS_MODEL_NONE;
 560   rtx sym, addend;
 561
 562   if (GET_CODE (addr) == CONST)
 563     {
 564       split_const (addr, &sym, &addend);
 565       if (GET_CODE (sym) == SYMBOL_REF)
 566         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 567     }
 568   else if (GET_CODE (addr) == SYMBOL_REF)
 569     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 570
 571   return tls_kind;
 572 }
 573
 574 /* We'll allow lo_sum's in addresses in our legitimate addresses
 575    so that combine would take care of combining addresses where
 576    necessary, but for generation purposes, we'll generate the address
 577    as :
 578    RTL                               Absolute
 579    tmp = hi (symbol_ref);            adrp  x1, foo
 580    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 581                                      nop
 582
 583    PIC                               TLS
 584    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 585    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 586                                      bl   __tls_get_addr
 587                                      nop
 588
 589    Load TLS symbol, depending on TLS mechanism and TLS access model.
 590
 591    Global Dynamic - Traditional TLS:
 592    adrp tmp, :tlsgd:imm
 593    add  dest, tmp, #:tlsgd_lo12:imm
 594    bl   __tls_get_addr
 595
 596    Global Dynamic - TLS Descriptors:
 597    adrp dest, :tlsdesc:imm
 598    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 599    add  dest, dest, #:tlsdesc_lo12:imm
 600    blr  tmp
 601    mrs  tp, tpidr_el0
 602    add  dest, dest, tp
 603
 604    Initial Exec:
 605    mrs  tp, tpidr_el0
 606    adrp tmp, :gottprel:imm
 607    ldr  dest, [tmp, #:gottprel_lo12:imm]
 608    add  dest, dest, tp
 609
 610    Local Exec:
 611    mrs  tp, tpidr_el0
 612    add  t0, tp, #:tprel_hi12:imm
 613    add  t0, #:tprel_lo12_nc:imm
 614 */
 615
 616 static void
 617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 618                                    enum aarch64_symbol_type type)
 619 {
 620   switch (type)
 621     {
 622     case SYMBOL_SMALL_ABSOLUTE:
 623       {
 624         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 625         rtx tmp_reg = dest;
 626         enum machine_mode mode = GET_MODE (dest);
 627
 628         gcc_assert (mode == Pmode || mode == ptr_mode);
 629
 630         if (can_create_pseudo_p ())
 631           tmp_reg = gen_reg_rtx (mode);
 632
 633         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 634         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 635         return;
 636       }
 637
 638     case SYMBOL_TINY_ABSOLUTE:
 639       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 640       return;
 641
 642     case SYMBOL_SMALL_GOT:
 643       {
 644         /* In ILP32, the mode of dest can be either SImode or DImode,
 645            while the got entry is always of SImode size.  The mode of
 646            dest depends on how dest is used: if dest is assigned to a
 647            pointer (e.g. in the memory), it has SImode; it may have
 648            DImode if dest is dereferenced to access the memeory.
 649            This is why we have to handle three different ldr_got_small
 650            patterns here (two patterns for ILP32).  */
 651         rtx tmp_reg = dest;
 652         enum machine_mode mode = GET_MODE (dest);
 653
 654         if (can_create_pseudo_p ())
 655           tmp_reg = gen_reg_rtx (mode);
 656
 657         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 658         if (mode == ptr_mode)
 659           {
 660             if (mode == DImode)
 661               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 662             else
 663               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 664           }
 665         else
 666           {
 667             gcc_assert (mode == Pmode);
 668             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 669           }
 670
 671         return;
 672       }
 673
 674     case SYMBOL_SMALL_TLSGD:
 675       {
 676         rtx insns;
 677         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 678
 679         start_sequence ();
 680         emit_call_insn (gen_tlsgd_small (result, imm));
 681         insns = get_insns ();
 682         end_sequence ();
 683
 684         RTL_CONST_CALL_P (insns) = 1;
 685         emit_libcall_block (insns, dest, result, imm);
 686         return;
 687       }
 688
 689     case SYMBOL_SMALL_TLSDESC:
 690       {
 691         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 692         rtx tp;
 693
 694         emit_insn (gen_tlsdesc_small (imm));
 695         tp = aarch64_load_tp (NULL);
 696         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 697         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 698         return;
 699       }
 700
 701     case SYMBOL_SMALL_GOTTPREL:
 702       {
 703         rtx tmp_reg = gen_reg_rtx (Pmode);
 704         rtx tp = aarch64_load_tp (NULL);
 705         emit_insn (gen_tlsie_small (tmp_reg, imm));
 706         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 707         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 708         return;
 709       }
 710
 711     case SYMBOL_SMALL_TPREL:
 712       {
 713         rtx tp = aarch64_load_tp (NULL);
 714         emit_insn (gen_tlsle_small (dest, tp, imm));
 715         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 716         return;
 717       }
 718
 719     case SYMBOL_TINY_GOT:
 720       emit_insn (gen_ldr_got_tiny (dest, imm));
 721       return;
 722
 723     default:
 724       gcc_unreachable ();
 725     }
 726 }
 727
 728 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 729    handle all moves if !can_create_pseudo_p ().  The distinction is
 730    important because, unlike emit_move_insn, the move expanders know
 731    how to force Pmode objects into the constant pool even when the
 732    constant pool address is not itself legitimate.  */
 733 static rtx
 734 aarch64_emit_move (rtx dest, rtx src)
 735 {
 736   return (can_create_pseudo_p ()
 737           ? emit_move_insn (dest, src)
 738           : emit_move_insn_1 (dest, src));
 739 }
 740
 741 /* Split a 128-bit move operation into two 64-bit move operations,
 742    taking care to handle partial overlap of register to register
 743    copies.  Special cases are needed when moving between GP regs and
 744    FP regs.  SRC can be a register, constant or memory; DST a register
 745    or memory.  If either operand is memory it must not have any side
 746    effects.  */
 747 void
 748 aarch64_split_128bit_move (rtx dst, rtx src)
 749 {
 750   rtx dst_lo, dst_hi;
 751   rtx src_lo, src_hi;
 752
 753   enum machine_mode mode = GET_MODE (dst);
 754
 755   gcc_assert (mode == TImode || mode == TFmode);
 756   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 757   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 758
 759   if (REG_P (dst) && REG_P (src))
 760     {
 761       int src_regno = REGNO (src);
 762       int dst_regno = REGNO (dst);
 763
 764       /* Handle FP <-> GP regs.  */
 765       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 766         {
 767           src_lo = gen_lowpart (word_mode, src);
 768           src_hi = gen_highpart (word_mode, src);
 769
 770           if (mode == TImode)
 771             {
 772               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 773               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 774             }
 775           else
 776             {
 777               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 778               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 779             }
 780           return;
 781         }
 782       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 783         {
 784           dst_lo = gen_lowpart (word_mode, dst);
 785           dst_hi = gen_highpart (word_mode, dst);
 786
 787           if (mode == TImode)
 788             {
 789               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 790               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 791             }
 792           else
 793             {
 794               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 795               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 796             }
 797           return;
 798         }
 799     }
 800
 801   dst_lo = gen_lowpart (word_mode, dst);
 802   dst_hi = gen_highpart (word_mode, dst);
 803   src_lo = gen_lowpart (word_mode, src);
 804   src_hi = gen_highpart_mode (word_mode, mode, src);
 805
 806   /* At most one pairing may overlap.  */
 807   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 808     {
 809       aarch64_emit_move (dst_hi, src_hi);
 810       aarch64_emit_move (dst_lo, src_lo);
 811     }
 812   else
 813     {
 814       aarch64_emit_move (dst_lo, src_lo);
 815       aarch64_emit_move (dst_hi, src_hi);
 816     }
 817 }
 818
 819 bool
 820 aarch64_split_128bit_move_p (rtx dst, rtx src)
 821 {
 822   return (! REG_P (src)
 823           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 824 }
 825
 826 /* Split a complex SIMD combine.  */
 827
 828 void
 829 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 830 {
 831   enum machine_mode src_mode = GET_MODE (src1);
 832   enum machine_mode dst_mode = GET_MODE (dst);
 833
 834   gcc_assert (VECTOR_MODE_P (dst_mode));
 835
 836   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 837     {
 838       rtx (*gen) (rtx, rtx, rtx);
 839
 840       switch (src_mode)
 841         {
 842         case V8QImode:
 843           gen = gen_aarch64_simd_combinev8qi;
 844           break;
 845         case V4HImode:
 846           gen = gen_aarch64_simd_combinev4hi;
 847           break;
 848         case V2SImode:
 849           gen = gen_aarch64_simd_combinev2si;
 850           break;
 851         case V2SFmode:
 852           gen = gen_aarch64_simd_combinev2sf;
 853           break;
 854         case DImode:
 855           gen = gen_aarch64_simd_combinedi;
 856           break;
 857         case DFmode:
 858           gen = gen_aarch64_simd_combinedf;
 859           break;
 860         default:
 861           gcc_unreachable ();
 862         }
 863
 864       emit_insn (gen (dst, src1, src2));
 865       return;
 866     }
 867 }
 868
 869 /* Split a complex SIMD move.  */
 870
 871 void
 872 aarch64_split_simd_move (rtx dst, rtx src)
 873 {
 874   enum machine_mode src_mode = GET_MODE (src);
 875   enum machine_mode dst_mode = GET_MODE (dst);
 876
 877   gcc_assert (VECTOR_MODE_P (dst_mode));
 878
 879   if (REG_P (dst) && REG_P (src))
 880     {
 881       rtx (*gen) (rtx, rtx);
 882
 883       gcc_assert (VECTOR_MODE_P (src_mode));
 884
 885       switch (src_mode)
 886         {
 887         case V16QImode:
 888           gen = gen_aarch64_split_simd_movv16qi;
 889           break;
 890         case V8HImode:
 891           gen = gen_aarch64_split_simd_movv8hi;
 892           break;
 893         case V4SImode:
 894           gen = gen_aarch64_split_simd_movv4si;
 895           break;
 896         case V2DImode:
 897           gen = gen_aarch64_split_simd_movv2di;
 898           break;
 899         case V4SFmode:
 900           gen = gen_aarch64_split_simd_movv4sf;
 901           break;
 902         case V2DFmode:
 903           gen = gen_aarch64_split_simd_movv2df;
 904           break;
 905         default:
 906           gcc_unreachable ();
 907         }
 908
 909       emit_insn (gen (dst, src));
 910       return;
 911     }
 912 }
 913
 914 static rtx
 915 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 916 {
 917   if (can_create_pseudo_p ())
 918     return force_reg (mode, value);
 919   else
 920     {
 921       x = aarch64_emit_move (x, value);
 922       return x;
 923     }
 924 }
 925
 926
 927 static rtx
 928 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 929 {
 930   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 931     {
 932       rtx high;
 933       /* Load the full offset into a register.  This
 934          might be improvable in the future.  */
 935       high = GEN_INT (offset);
 936       offset = 0;
 937       high = aarch64_force_temporary (mode, temp, high);
 938       reg = aarch64_force_temporary (mode, temp,
 939                                      gen_rtx_PLUS (mode, high, reg));
 940     }
 941   return plus_constant (mode, reg, offset);
 942 }
 943
 944 void
 945 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 946 {
 947   enum machine_mode mode = GET_MODE (dest);
 948   unsigned HOST_WIDE_INT mask;
 949   int i;
 950   bool first;
 951   unsigned HOST_WIDE_INT val;
 952   bool subtargets;
 953   rtx subtarget;
 954   int one_match, zero_match;
 955
 956   gcc_assert (mode == SImode || mode == DImode);
 957
 958   /* Check on what type of symbol it is.  */
 959   if (GET_CODE (imm) == SYMBOL_REF
 960       || GET_CODE (imm) == LABEL_REF
 961       || GET_CODE (imm) == CONST)
 962     {
 963       rtx mem, base, offset;
 964       enum aarch64_symbol_type sty;
 965
 966       /* If we have (const (plus symbol offset)), separate out the offset
 967          before we start classifying the symbol.  */
 968       split_const (imm, &base, &offset);
 969
 970       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 971       switch (sty)
 972         {
 973         case SYMBOL_FORCE_TO_MEM:
 974           if (offset != const0_rtx
 975               && targetm.cannot_force_const_mem (mode, imm))
 976             {
 977               gcc_assert (can_create_pseudo_p ());
 978               base = aarch64_force_temporary (mode, dest, base);
 979               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 980               aarch64_emit_move (dest, base);
 981               return;
 982             }
 983           mem = force_const_mem (ptr_mode, imm);
 984           gcc_assert (mem);
 985           if (mode != ptr_mode)
 986             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 987           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 988           return;
 989
 990         case SYMBOL_SMALL_TLSGD:
 991         case SYMBOL_SMALL_TLSDESC:
 992         case SYMBOL_SMALL_GOTTPREL:
 993         case SYMBOL_SMALL_GOT:
 994         case SYMBOL_TINY_GOT:
 995           if (offset != const0_rtx)
 996             {
 997               gcc_assert(can_create_pseudo_p ());
 998               base = aarch64_force_temporary (mode, dest, base);
 999               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1000               aarch64_emit_move (dest, base);
1001               return;
1002             }
1003           /* FALLTHRU */
1004
1005         case SYMBOL_SMALL_TPREL:
1006         case SYMBOL_SMALL_ABSOLUTE:
1007         case SYMBOL_TINY_ABSOLUTE:
1008           aarch64_load_symref_appropriately (dest, imm, sty);
1009           return;
1010
1011         default:
1012           gcc_unreachable ();
1013         }
1014     }
1015
1016   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1017     {
1018       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1019       return;
1020     }
1021
1022   if (!CONST_INT_P (imm))
1023     {
1024       if (GET_CODE (imm) == HIGH)
1025         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1026       else
1027         {
1028           rtx mem = force_const_mem (mode, imm);
1029           gcc_assert (mem);
1030           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1031         }
1032
1033       return;
1034     }
1035
1036   if (mode == SImode)
1037     {
1038       /* We know we can't do this in 1 insn, and we must be able to do it
1039          in two; so don't mess around looking for sequences that don't buy
1040          us anything.  */
1041       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1042       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1043                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1044       return;
1045     }
1046
1047   /* Remaining cases are all for DImode.  */
1048
1049   val = INTVAL (imm);
1050   subtargets = optimize && can_create_pseudo_p ();
1051
1052   one_match = 0;
1053   zero_match = 0;
1054   mask = 0xffff;
1055
1056   for (i = 0; i < 64; i += 16, mask <<= 16)
1057     {
1058       if ((val & mask) == 0)
1059         zero_match++;
1060       else if ((val & mask) == mask)
1061         one_match++;
1062     }
1063
1064   if (one_match == 2)
1065     {
1066       mask = 0xffff;
1067       for (i = 0; i < 64; i += 16, mask <<= 16)
1068         {
1069           if ((val & mask) != mask)
1070             {
1071               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1072               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1073                                          GEN_INT ((val >> i) & 0xffff)));
1074               return;
1075             }
1076         }
1077       gcc_unreachable ();
1078     }
1079
1080   if (zero_match == 2)
1081     goto simple_sequence;
1082
1083   mask = 0x0ffff0000UL;
1084   for (i = 16; i < 64; i += 16, mask <<= 16)
1085     {
1086       HOST_WIDE_INT comp = mask & ~(mask - 1);
1087
1088       if (aarch64_uimm12_shift (val - (val & mask)))
1089         {
1090           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1091
1092           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1093           emit_insn (gen_adddi3 (dest, subtarget,
1094                                  GEN_INT (val - (val & mask))));
1095           return;
1096         }
1097       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1098         {
1099           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1100
1101           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1102                                   GEN_INT ((val + comp) & mask)));
1103           emit_insn (gen_adddi3 (dest, subtarget,
1104                                  GEN_INT (val - ((val + comp) & mask))));
1105           return;
1106         }
1107       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1108         {
1109           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1110
1111           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1112                                   GEN_INT ((val - comp) | ~mask)));
1113           emit_insn (gen_adddi3 (dest, subtarget,
1114                                  GEN_INT (val - ((val - comp) | ~mask))));
1115           return;
1116         }
1117       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1118         {
1119           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1120
1121           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1122                                   GEN_INT (val | ~mask)));
1123           emit_insn (gen_adddi3 (dest, subtarget,
1124                                  GEN_INT (val - (val | ~mask))));
1125           return;
1126         }
1127     }
1128
1129   /* See if we can do it by arithmetically combining two
1130      immediates.  */
1131   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1132     {
1133       int j;
1134       mask = 0xffff;
1135
1136       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1137           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1138         {
1139           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1140           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1141                                   GEN_INT (aarch64_bitmasks[i])));
1142           emit_insn (gen_adddi3 (dest, subtarget,
1143                                  GEN_INT (val - aarch64_bitmasks[i])));
1144           return;
1145         }
1146
1147       for (j = 0; j < 64; j += 16, mask <<= 16)
1148         {
1149           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1150             {
1151               emit_insn (gen_rtx_SET (VOIDmode, dest,
1152                                       GEN_INT (aarch64_bitmasks[i])));
1153               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1154                                          GEN_INT ((val >> j) & 0xffff)));
1155               return;
1156             }
1157         }
1158     }
1159
1160   /* See if we can do it by logically combining two immediates.  */
1161   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1162     {
1163       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1164         {
1165           int j;
1166
1167           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1168             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1169               {
1170                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1171                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1172                                         GEN_INT (aarch64_bitmasks[i])));
1173                 emit_insn (gen_iordi3 (dest, subtarget,
1174                                        GEN_INT (aarch64_bitmasks[j])));
1175                 return;
1176               }
1177         }
1178       else if ((val & aarch64_bitmasks[i]) == val)
1179         {
1180           int j;
1181
1182           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1183             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1184               {
1185
1186                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1187                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1188                                         GEN_INT (aarch64_bitmasks[j])));
1189                 emit_insn (gen_anddi3 (dest, subtarget,
1190                                        GEN_INT (aarch64_bitmasks[i])));
1191                 return;
1192               }
1193         }
1194     }
1195
1196  simple_sequence:
1197   first = true;
1198   mask = 0xffff;
1199   for (i = 0; i < 64; i += 16, mask <<= 16)
1200     {
1201       if ((val & mask) != 0)
1202         {
1203           if (first)
1204             {
1205               emit_insn (gen_rtx_SET (VOIDmode, dest,
1206                                       GEN_INT (val & mask)));
1207               first = false;
1208             }
1209           else
1210             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1211                                        GEN_INT ((val >> i) & 0xffff)));
1212         }
1213     }
1214 }
1215
1216 static bool
1217 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1218 {
1219   /* Indirect calls are not currently supported.  */
1220   if (decl == NULL)
1221     return false;
1222
1223   /* Cannot tail-call to long-calls, since these are outside of the
1224      range of a branch instruction (we could handle this if we added
1225      support for indirect tail-calls.  */
1226   if (aarch64_decl_is_long_call_p (decl))
1227     return false;
1228
1229   return true;
1230 }
1231
1232 /* Implement TARGET_PASS_BY_REFERENCE.  */
1233
1234 static bool
1235 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1236                            enum machine_mode mode,
1237                            const_tree type,
1238                            bool named ATTRIBUTE_UNUSED)
1239 {
1240   HOST_WIDE_INT size;
1241   enum machine_mode dummymode;
1242   int nregs;
1243
1244   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1245   size = (mode == BLKmode && type)
1246     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1247
1248   /* Aggregates are passed by reference based on their size.  */
1249   if (type && AGGREGATE_TYPE_P (type))
1250     {
1251       size = int_size_in_bytes (type);
1252     }
1253
1254   /* Variable sized arguments are always returned by reference.  */
1255   if (size < 0)
1256     return true;
1257
1258   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1259   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1260                                                &dummymode, &nregs,
1261                                                NULL))
1262     return false;
1263
1264   /* Arguments which are variable sized or larger than 2 registers are
1265      passed by reference unless they are a homogenous floating point
1266      aggregate.  */
1267   return size > 2 * UNITS_PER_WORD;
1268 }
1269
1270 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1271 static bool
1272 aarch64_return_in_msb (const_tree valtype)
1273 {
1274   enum machine_mode dummy_mode;
1275   int dummy_int;
1276
1277   /* Never happens in little-endian mode.  */
1278   if (!BYTES_BIG_ENDIAN)
1279     return false;
1280
1281   /* Only composite types smaller than or equal to 16 bytes can
1282      be potentially returned in registers.  */
1283   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1284       || int_size_in_bytes (valtype) <= 0
1285       || int_size_in_bytes (valtype) > 16)
1286     return false;
1287
1288   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1289      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1290      is always passed/returned in the least significant bits of fp/simd
1291      register(s).  */
1292   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1293                                                &dummy_mode, &dummy_int, NULL))
1294     return false;
1295
1296   return true;
1297 }
1298
1299 /* Implement TARGET_FUNCTION_VALUE.
1300    Define how to find the value returned by a function.  */
1301
1302 static rtx
1303 aarch64_function_value (const_tree type, const_tree func,
1304                         bool outgoing ATTRIBUTE_UNUSED)
1305 {
1306   enum machine_mode mode;
1307   int unsignedp;
1308   int count;
1309   enum machine_mode ag_mode;
1310
1311   mode = TYPE_MODE (type);
1312   if (INTEGRAL_TYPE_P (type))
1313     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1314
1315   if (aarch64_return_in_msb (type))
1316     {
1317       HOST_WIDE_INT size = int_size_in_bytes (type);
1318
1319       if (size % UNITS_PER_WORD != 0)
1320         {
1321           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1322           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1323         }
1324     }
1325
1326   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1327                                                &ag_mode, &count, NULL))
1328     {
1329       if (!aarch64_composite_type_p (type, mode))
1330         {
1331           gcc_assert (count == 1 && mode == ag_mode);
1332           return gen_rtx_REG (mode, V0_REGNUM);
1333         }
1334       else
1335         {
1336           int i;
1337           rtx par;
1338
1339           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1340           for (i = 0; i < count; i++)
1341             {
1342               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1343               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1344                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1345               XVECEXP (par, 0, i) = tmp;
1346             }
1347           return par;
1348         }
1349     }
1350   else
1351     return gen_rtx_REG (mode, R0_REGNUM);
1352 }
1353
1354 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1355    Return true if REGNO is the number of a hard register in which the values
1356    of called function may come back.  */
1357
1358 static bool
1359 aarch64_function_value_regno_p (const unsigned int regno)
1360 {
1361   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1362      of 16-byte return values are: 128-bit integers and 16-byte small
1363      structures (excluding homogeneous floating-point aggregates).  */
1364   if (regno == R0_REGNUM || regno == R1_REGNUM)
1365     return true;
1366
1367   /* Up to four fp/simd registers can return a function value, e.g. a
1368      homogeneous floating-point aggregate having four members.  */
1369   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1370     return !TARGET_GENERAL_REGS_ONLY;
1371
1372   return false;
1373 }
1374
1375 /* Implement TARGET_RETURN_IN_MEMORY.
1376
1377    If the type T of the result of a function is such that
1378      void func (T arg)
1379    would require that arg be passed as a value in a register (or set of
1380    registers) according to the parameter passing rules, then the result
1381    is returned in the same registers as would be used for such an
1382    argument.  */
1383
1384 static bool
1385 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1386 {
1387   HOST_WIDE_INT size;
1388   enum machine_mode ag_mode;
1389   int count;
1390
1391   if (!AGGREGATE_TYPE_P (type)
1392       && TREE_CODE (type) != COMPLEX_TYPE
1393       && TREE_CODE (type) != VECTOR_TYPE)
1394     /* Simple scalar types always returned in registers.  */
1395     return false;
1396
1397   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1398                                                type,
1399                                                &ag_mode,
1400                                                &count,
1401                                                NULL))
1402     return false;
1403
1404   /* Types larger than 2 registers returned in memory.  */
1405   size = int_size_in_bytes (type);
1406   return (size < 0 || size > 2 * UNITS_PER_WORD);
1407 }
1408
1409 static bool
1410 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1411                                const_tree type, int *nregs)
1412 {
1413   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1414   return aarch64_vfp_is_call_or_return_candidate (mode,
1415                                                   type,
1416                                                   &pcum->aapcs_vfp_rmode,
1417                                                   nregs,
1418                                                   NULL);
1419 }
1420
1421 /* Given MODE and TYPE of a function argument, return the alignment in
1422    bits.  The idea is to suppress any stronger alignment requested by
1423    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1424    This is a helper function for local use only.  */
1425
1426 static unsigned int
1427 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1428 {
1429   unsigned int alignment;
1430
1431   if (type)
1432     {
1433       if (!integer_zerop (TYPE_SIZE (type)))
1434         {
1435           if (TYPE_MODE (type) == mode)
1436             alignment = TYPE_ALIGN (type);
1437           else
1438             alignment = GET_MODE_ALIGNMENT (mode);
1439         }
1440       else
1441         alignment = 0;
1442     }
1443   else
1444     alignment = GET_MODE_ALIGNMENT (mode);
1445
1446   return alignment;
1447 }
1448
1449 /* Layout a function argument according to the AAPCS64 rules.  The rule
1450    numbers refer to the rule numbers in the AAPCS64.  */
1451
1452 static void
1453 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1454                     const_tree type,
1455                     bool named ATTRIBUTE_UNUSED)
1456 {
1457   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1458   int ncrn, nvrn, nregs;
1459   bool allocate_ncrn, allocate_nvrn;
1460   HOST_WIDE_INT size;
1461
1462   /* We need to do this once per argument.  */
1463   if (pcum->aapcs_arg_processed)
1464     return;
1465
1466   pcum->aapcs_arg_processed = true;
1467
1468   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1469   size
1470     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1471                         UNITS_PER_WORD);
1472
1473   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1474   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1475                                                  mode,
1476                                                  type,
1477                                                  &nregs);
1478
1479   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1480      The following code thus handles passing by SIMD/FP registers first.  */
1481
1482   nvrn = pcum->aapcs_nvrn;
1483
1484   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1485      and homogenous short-vector aggregates (HVA).  */
1486   if (allocate_nvrn)
1487     {
1488       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1489         {
1490           pcum->aapcs_nextnvrn = nvrn + nregs;
1491           if (!aarch64_composite_type_p (type, mode))
1492             {
1493               gcc_assert (nregs == 1);
1494               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1495             }
1496           else
1497             {
1498               rtx par;
1499               int i;
1500               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1501               for (i = 0; i < nregs; i++)
1502                 {
1503                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1504                                          V0_REGNUM + nvrn + i);
1505                   tmp = gen_rtx_EXPR_LIST
1506                     (VOIDmode, tmp,
1507                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1508                   XVECEXP (par, 0, i) = tmp;
1509                 }
1510               pcum->aapcs_reg = par;
1511             }
1512           return;
1513         }
1514       else
1515         {
1516           /* C.3 NSRN is set to 8.  */
1517           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1518           goto on_stack;
1519         }
1520     }
1521
1522   ncrn = pcum->aapcs_ncrn;
1523   nregs = size / UNITS_PER_WORD;
1524
1525   /* C6 - C9.  though the sign and zero extension semantics are
1526      handled elsewhere.  This is the case where the argument fits
1527      entirely general registers.  */
1528   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1529     {
1530       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1531
1532       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1533
1534       /* C.8 if the argument has an alignment of 16 then the NGRN is
1535          rounded up to the next even number.  */
1536       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1537         {
1538           ++ncrn;
1539           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1540         }
1541       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1542          A reg is still generated for it, but the caller should be smart
1543          enough not to use it.  */
1544       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1545         {
1546           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1547         }
1548       else
1549         {
1550           rtx par;
1551           int i;
1552
1553           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1554           for (i = 0; i < nregs; i++)
1555             {
1556               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1557               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1558                                        GEN_INT (i * UNITS_PER_WORD));
1559               XVECEXP (par, 0, i) = tmp;
1560             }
1561           pcum->aapcs_reg = par;
1562         }
1563
1564       pcum->aapcs_nextncrn = ncrn + nregs;
1565       return;
1566     }
1567
1568   /* C.11  */
1569   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1570
1571   /* The argument is passed on stack; record the needed number of words for
1572      this argument and align the total size if necessary.  */
1573 on_stack:
1574   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1575   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1576     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1577                                                16 / UNITS_PER_WORD);
1578   return;
1579 }
1580
1581 /* Implement TARGET_FUNCTION_ARG.  */
1582
1583 static rtx
1584 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1585                       const_tree type, bool named)
1586 {
1587   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1588   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1589
1590   if (mode == VOIDmode)
1591     return NULL_RTX;
1592
1593   aarch64_layout_arg (pcum_v, mode, type, named);
1594   return pcum->aapcs_reg;
1595 }
1596
1597 void
1598 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1599                            const_tree fntype ATTRIBUTE_UNUSED,
1600                            rtx libname ATTRIBUTE_UNUSED,
1601                            const_tree fndecl ATTRIBUTE_UNUSED,
1602                            unsigned n_named ATTRIBUTE_UNUSED)
1603 {
1604   pcum->aapcs_ncrn = 0;
1605   pcum->aapcs_nvrn = 0;
1606   pcum->aapcs_nextncrn = 0;
1607   pcum->aapcs_nextnvrn = 0;
1608   pcum->pcs_variant = ARM_PCS_AAPCS64;
1609   pcum->aapcs_reg = NULL_RTX;
1610   pcum->aapcs_arg_processed = false;
1611   pcum->aapcs_stack_words = 0;
1612   pcum->aapcs_stack_size = 0;
1613
1614   return;
1615 }
1616
1617 static void
1618 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1619                               enum machine_mode mode,
1620                               const_tree type,
1621                               bool named)
1622 {
1623   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1624   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1625     {
1626       aarch64_layout_arg (pcum_v, mode, type, named);
1627       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1628                   != (pcum->aapcs_stack_words != 0));
1629       pcum->aapcs_arg_processed = false;
1630       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1631       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1632       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1633       pcum->aapcs_stack_words = 0;
1634       pcum->aapcs_reg = NULL_RTX;
1635     }
1636 }
1637
1638 bool
1639 aarch64_function_arg_regno_p (unsigned regno)
1640 {
1641   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1642           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1643 }
1644
1645 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1646    PARM_BOUNDARY bits of alignment, but will be given anything up
1647    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1648    that both before and after the layout of each argument, the Next
1649    Stacked Argument Address (NSAA) will have a minimum alignment of
1650    8 bytes.  */
1651
1652 static unsigned int
1653 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1654 {
1655   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1656
1657   if (alignment < PARM_BOUNDARY)
1658     alignment = PARM_BOUNDARY;
1659   if (alignment > STACK_BOUNDARY)
1660     alignment = STACK_BOUNDARY;
1661   return alignment;
1662 }
1663
1664 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1665
1666    Return true if an argument passed on the stack should be padded upwards,
1667    i.e. if the least-significant byte of the stack slot has useful data.
1668
1669    Small aggregate types are placed in the lowest memory address.
1670
1671    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1672
1673 bool
1674 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1675 {
1676   /* On little-endian targets, the least significant byte of every stack
1677      argument is passed at the lowest byte address of the stack slot.  */
1678   if (!BYTES_BIG_ENDIAN)
1679     return true;
1680
1681   /* Otherwise, integral, floating-point and pointer types are padded downward:
1682      the least significant byte of a stack argument is passed at the highest
1683      byte address of the stack slot.  */
1684   if (type
1685       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1686          || POINTER_TYPE_P (type))
1687       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1688     return false;
1689
1690   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1691   return true;
1692 }
1693
1694 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1695
1696    It specifies padding for the last (may also be the only)
1697    element of a block move between registers and memory.  If
1698    assuming the block is in the memory, padding upward means that
1699    the last element is padded after its highest significant byte,
1700    while in downward padding, the last element is padded at the
1701    its least significant byte side.
1702
1703    Small aggregates and small complex types are always padded
1704    upwards.
1705
1706    We don't need to worry about homogeneous floating-point or
1707    short-vector aggregates; their move is not affected by the
1708    padding direction determined here.  Regardless of endianness,
1709    each element of such an aggregate is put in the least
1710    significant bits of a fp/simd register.
1711
1712    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1713    register has useful data, and return the opposite if the most
1714    significant byte does.  */
1715
1716 bool
1717 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1718                      bool first ATTRIBUTE_UNUSED)
1719 {
1720
1721   /* Small composite types are always padded upward.  */
1722   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1723     {
1724       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1725                             : GET_MODE_SIZE (mode));
1726       if (size < 2 * UNITS_PER_WORD)
1727         return true;
1728     }
1729
1730   /* Otherwise, use the default padding.  */
1731   return !BYTES_BIG_ENDIAN;
1732 }
1733
1734 static enum machine_mode
1735 aarch64_libgcc_cmp_return_mode (void)
1736 {
1737   return SImode;
1738 }
1739
1740 static bool
1741 aarch64_frame_pointer_required (void)
1742 {
1743   /* If the function contains dynamic stack allocations, we need to
1744      use the frame pointer to access the static parts of the frame.  */
1745   if (cfun->calls_alloca)
1746     return true;
1747
1748   /* In aarch64_override_options_after_change
1749      flag_omit_leaf_frame_pointer turns off the frame pointer by
1750      default.  Turn it back on now if we've not got a leaf
1751      function.  */
1752   if (flag_omit_leaf_frame_pointer
1753       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1754     return true;
1755
1756   return false;
1757 }
1758
1759 /* Mark the registers that need to be saved by the callee and calculate
1760    the size of the callee-saved registers area and frame record (both FP
1761    and LR may be omitted).  */
1762 static void
1763 aarch64_layout_frame (void)
1764 {
1765   HOST_WIDE_INT offset = 0;
1766   int regno;
1767
1768   if (reload_completed && cfun->machine->frame.laid_out)
1769     return;
1770
1771   cfun->machine->frame.fp_lr_offset = 0;
1772
1773   /* First mark all the registers that really need to be saved...  */
1774   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1775     cfun->machine->frame.reg_offset[regno] = -1;
1776
1777   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1778     cfun->machine->frame.reg_offset[regno] = -1;
1779
1780   /* ... that includes the eh data registers (if needed)...  */
1781   if (crtl->calls_eh_return)
1782     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1783       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1784
1785   /* ... and any callee saved register that dataflow says is live.  */
1786   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1787     if (df_regs_ever_live_p (regno)
1788         && !call_used_regs[regno])
1789       cfun->machine->frame.reg_offset[regno] = 0;
1790
1791   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1792     if (df_regs_ever_live_p (regno)
1793         && !call_used_regs[regno])
1794       cfun->machine->frame.reg_offset[regno] = 0;
1795
1796   if (frame_pointer_needed)
1797     {
1798       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1799       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1800       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1801     }
1802
1803   /* Now assign stack slots for them.  */
1804   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1805     if (cfun->machine->frame.reg_offset[regno] != -1)
1806       {
1807         cfun->machine->frame.reg_offset[regno] = offset;
1808         offset += UNITS_PER_WORD;
1809       }
1810
1811   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1812     if (cfun->machine->frame.reg_offset[regno] != -1)
1813       {
1814         cfun->machine->frame.reg_offset[regno] = offset;
1815         offset += UNITS_PER_WORD;
1816       }
1817
1818   if (frame_pointer_needed)
1819     {
1820       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1821       offset += UNITS_PER_WORD;
1822       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1823     }
1824
1825   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1826     {
1827       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1828       offset += UNITS_PER_WORD;
1829       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1830     }
1831
1832   cfun->machine->frame.padding0 =
1833     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1834   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1835
1836   cfun->machine->frame.saved_regs_size = offset;
1837   cfun->machine->frame.laid_out = true;
1838 }
1839
1840 /* Make the last instruction frame-related and note that it performs
1841    the operation described by FRAME_PATTERN.  */
1842
1843 static void
1844 aarch64_set_frame_expr (rtx frame_pattern)
1845 {
1846   rtx insn;
1847
1848   insn = get_last_insn ();
1849   RTX_FRAME_RELATED_P (insn) = 1;
1850   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1851   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1852                                       frame_pattern,
1853                                       REG_NOTES (insn));
1854 }
1855
1856 static bool
1857 aarch64_register_saved_on_entry (int regno)
1858 {
1859   return cfun->machine->frame.reg_offset[regno] != -1;
1860 }
1861
1862
1863 static void
1864 aarch64_save_or_restore_fprs (int start_offset, int increment,
1865                               bool restore, rtx base_rtx)
1866
1867 {
1868   unsigned regno;
1869   unsigned regno2;
1870   rtx insn;
1871   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1872     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1873
1874
1875   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1876     {
1877       if (aarch64_register_saved_on_entry (regno))
1878         {
1879           rtx mem;
1880           mem = gen_mem_ref (DFmode,
1881                              plus_constant (Pmode,
1882                                             base_rtx,
1883                                             start_offset));
1884
1885           for (regno2 = regno + 1;
1886                regno2 <= V31_REGNUM
1887                  && !aarch64_register_saved_on_entry (regno2);
1888                regno2++)
1889             {
1890               /* Empty loop.  */
1891             }
1892           if (regno2 <= V31_REGNUM &&
1893               aarch64_register_saved_on_entry (regno2))
1894             {
1895               rtx mem2;
1896               /* Next highest register to be saved.  */
1897               mem2 = gen_mem_ref (DFmode,
1898                                   plus_constant
1899                                   (Pmode,
1900                                    base_rtx,
1901                                    start_offset + increment));
1902               if (restore == false)
1903                 {
1904                   insn = emit_insn
1905                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1906                                         mem2, gen_rtx_REG (DFmode, regno2)));
1907
1908                 }
1909               else
1910                 {
1911                   insn = emit_insn
1912                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1913                                        gen_rtx_REG (DFmode, regno2), mem2));
1914
1915                   add_reg_note (insn, REG_CFA_RESTORE,
1916                                 gen_rtx_REG (DFmode, regno));
1917                   add_reg_note (insn, REG_CFA_RESTORE,
1918                                 gen_rtx_REG (DFmode, regno2));
1919                 }
1920
1921                   /* The first part of a frame-related parallel insn
1922                      is always assumed to be relevant to the frame
1923                      calculations; subsequent parts, are only
1924                      frame-related if explicitly marked.  */
1925               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1926               regno = regno2;
1927               start_offset += increment * 2;
1928             }
1929           else
1930             {
1931               if (restore == false)
1932                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1933               else
1934                 {
1935                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1936                   add_reg_note (insn, REG_CFA_RESTORE,
1937                                 gen_rtx_REG (DImode, regno));
1938                 }
1939               start_offset += increment;
1940             }
1941           RTX_FRAME_RELATED_P (insn) = 1;
1942         }
1943     }
1944
1945 }
1946
1947
1948 /* offset from the stack pointer of where the saves and
1949    restore's have to happen.  */
1950 static void
1951 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1952                                             bool restore)
1953 {
1954   rtx insn;
1955   rtx base_rtx = stack_pointer_rtx;
1956   HOST_WIDE_INT start_offset = offset;
1957   HOST_WIDE_INT increment = UNITS_PER_WORD;
1958   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1959   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1960   unsigned regno;
1961   unsigned regno2;
1962
1963   for (regno = R0_REGNUM; regno <= limit; regno++)
1964     {
1965       if (aarch64_register_saved_on_entry (regno))
1966         {
1967           rtx mem;
1968           mem = gen_mem_ref (Pmode,
1969                              plus_constant (Pmode,
1970                                             base_rtx,
1971                                             start_offset));
1972
1973           for (regno2 = regno + 1;
1974                regno2 <= limit
1975                  && !aarch64_register_saved_on_entry (regno2);
1976                regno2++)
1977             {
1978               /* Empty loop.  */
1979             }
1980           if (regno2 <= limit &&
1981               aarch64_register_saved_on_entry (regno2))
1982             {
1983               rtx mem2;
1984               /* Next highest register to be saved.  */
1985               mem2 = gen_mem_ref (Pmode,
1986                                   plus_constant
1987                                   (Pmode,
1988                                    base_rtx,
1989                                    start_offset + increment));
1990               if (restore == false)
1991                 {
1992                   insn = emit_insn
1993                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1994                                         mem2, gen_rtx_REG (DImode, regno2)));
1995
1996                 }
1997               else
1998                 {
1999                   insn = emit_insn
2000                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2001                                      gen_rtx_REG (DImode, regno2), mem2));
2002
2003                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2004                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2005                 }
2006
2007                   /* The first part of a frame-related parallel insn
2008                      is always assumed to be relevant to the frame
2009                      calculations; subsequent parts, are only
2010                      frame-related if explicitly marked.  */
2011               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2012                                             1)) = 1;
2013               regno = regno2;
2014               start_offset += increment * 2;
2015             }
2016           else
2017             {
2018               if (restore == false)
2019                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2020               else
2021                 {
2022                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2023                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2024                 }
2025               start_offset += increment;
2026             }
2027           RTX_FRAME_RELATED_P (insn) = 1;
2028         }
2029     }
2030
2031   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2032
2033 }
2034
2035 /* AArch64 stack frames generated by this compiler look like:
2036
2037         +-------------------------------+
2038         |                               |
2039         |  incoming stack arguments     |
2040         |                               |
2041         +-------------------------------+ <-- arg_pointer_rtx
2042         |                               |
2043         |  callee-allocated save area   |
2044         |  for register varargs         |
2045         |                               |
2046         +-------------------------------+ <-- frame_pointer_rtx
2047         |                               |
2048         |  local variables              |
2049         |                               |
2050         +-------------------------------+
2051         |  padding0                     | \
2052         +-------------------------------+  |
2053         |                               |  |
2054         |                               |  |
2055         |  callee-saved registers       |  | frame.saved_regs_size
2056         |                               |  |
2057         +-------------------------------+  |
2058         |  LR'                          |  |
2059         +-------------------------------+  |
2060         |  FP'                          | /
2061       P +-------------------------------+ <-- hard_frame_pointer_rtx
2062         |  dynamic allocation           |
2063         +-------------------------------+
2064         |                               |
2065         |  outgoing stack arguments     |
2066         |                               |
2067         +-------------------------------+ <-- stack_pointer_rtx
2068
2069    Dynamic stack allocations such as alloca insert data at point P.
2070    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2071    hard_frame_pointer_rtx unchanged.  */
2072
2073 /* Generate the prologue instructions for entry into a function.
2074    Establish the stack frame by decreasing the stack pointer with a
2075    properly calculated size and, if necessary, create a frame record
2076    filled with the values of LR and previous frame pointer.  The
2077    current FP is also set up if it is in use.  */
2078
2079 void
2080 aarch64_expand_prologue (void)
2081 {
2082   /* sub sp, sp, #<frame_size>
2083      stp {fp, lr}, [sp, #<frame_size> - 16]
2084      add fp, sp, #<frame_size> - hardfp_offset
2085      stp {cs_reg}, [fp, #-16] etc.
2086
2087      sub sp, sp, <final_adjustment_if_any>
2088   */
2089   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2090   HOST_WIDE_INT frame_size, offset;
2091   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2092   rtx insn;
2093
2094   aarch64_layout_frame ();
2095   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2096   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2097               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2098   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2099                 + crtl->outgoing_args_size);
2100   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2101                                           STACK_BOUNDARY / BITS_PER_UNIT);
2102
2103   if (flag_stack_usage_info)
2104     current_function_static_stack_size = frame_size;
2105
2106   fp_offset = (offset
2107                - original_frame_size
2108                - cfun->machine->frame.saved_regs_size);
2109
2110   /* Store pairs and load pairs have a range only -512 to 504.  */
2111   if (offset >= 512)
2112     {
2113       /* When the frame has a large size, an initial decrease is done on
2114          the stack pointer to jump over the callee-allocated save area for
2115          register varargs, the local variable area and/or the callee-saved
2116          register area.  This will allow the pre-index write-back
2117          store pair instructions to be used for setting up the stack frame
2118          efficiently.  */
2119       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2120       if (offset >= 512)
2121         offset = cfun->machine->frame.saved_regs_size;
2122
2123       frame_size -= (offset + crtl->outgoing_args_size);
2124       fp_offset = 0;
2125
2126       if (frame_size >= 0x1000000)
2127         {
2128           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2129           emit_move_insn (op0, GEN_INT (-frame_size));
2130           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2131           aarch64_set_frame_expr (gen_rtx_SET
2132                                   (Pmode, stack_pointer_rtx,
2133                                    plus_constant (Pmode,
2134                                                   stack_pointer_rtx,
2135                                                   -frame_size)));
2136         }
2137       else if (frame_size > 0)
2138         {
2139           if ((frame_size & 0xfff) != frame_size)
2140             {
2141               insn = emit_insn (gen_add2_insn
2142                                 (stack_pointer_rtx,
2143                                  GEN_INT (-(frame_size
2144                                             & ~(HOST_WIDE_INT)0xfff))));
2145               RTX_FRAME_RELATED_P (insn) = 1;
2146             }
2147           if ((frame_size & 0xfff) != 0)
2148             {
2149               insn = emit_insn (gen_add2_insn
2150                                 (stack_pointer_rtx,
2151                                  GEN_INT (-(frame_size
2152                                             & (HOST_WIDE_INT)0xfff))));
2153               RTX_FRAME_RELATED_P (insn) = 1;
2154             }
2155         }
2156     }
2157   else
2158     frame_size = -1;
2159
2160   if (offset > 0)
2161     {
2162       /* Save the frame pointer and lr if the frame pointer is needed
2163          first.  Make the frame pointer point to the location of the
2164          old frame pointer on the stack.  */
2165       if (frame_pointer_needed)
2166         {
2167           rtx mem_fp, mem_lr;
2168
2169           if (fp_offset)
2170             {
2171               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2172                                                GEN_INT (-offset)));
2173               RTX_FRAME_RELATED_P (insn) = 1;
2174               aarch64_set_frame_expr (gen_rtx_SET
2175                                       (Pmode, stack_pointer_rtx,
2176                                        gen_rtx_MINUS (Pmode,
2177                                                       stack_pointer_rtx,
2178                                                       GEN_INT (offset))));
2179               mem_fp = gen_frame_mem (DImode,
2180                                       plus_constant (Pmode,
2181                                                      stack_pointer_rtx,
2182                                                      fp_offset));
2183               mem_lr = gen_frame_mem (DImode,
2184                                       plus_constant (Pmode,
2185                                                      stack_pointer_rtx,
2186                                                      fp_offset
2187                                                      + UNITS_PER_WORD));
2188               insn = emit_insn (gen_store_pairdi (mem_fp,
2189                                                   hard_frame_pointer_rtx,
2190                                                   mem_lr,
2191                                                   gen_rtx_REG (DImode,
2192                                                                LR_REGNUM)));
2193             }
2194           else
2195             {
2196               insn = emit_insn (gen_storewb_pairdi_di
2197                                 (stack_pointer_rtx, stack_pointer_rtx,
2198                                  hard_frame_pointer_rtx,
2199                                  gen_rtx_REG (DImode, LR_REGNUM),
2200                                  GEN_INT (-offset),
2201                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2202               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2203             }
2204
2205           /* The first part of a frame-related parallel insn is always
2206              assumed to be relevant to the frame calculations;
2207              subsequent parts, are only frame-related if explicitly
2208              marked.  */
2209           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2210           RTX_FRAME_RELATED_P (insn) = 1;
2211
2212           /* Set up frame pointer to point to the location of the
2213              previous frame pointer on the stack.  */
2214           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2215                                            stack_pointer_rtx,
2216                                            GEN_INT (fp_offset)));
2217           aarch64_set_frame_expr (gen_rtx_SET
2218                                   (Pmode, hard_frame_pointer_rtx,
2219                                    plus_constant (Pmode,
2220                                                   stack_pointer_rtx,
2221                                                   fp_offset)));
2222           RTX_FRAME_RELATED_P (insn) = 1;
2223           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2224                                            hard_frame_pointer_rtx));
2225         }
2226       else
2227         {
2228           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2229                                            GEN_INT (-offset)));
2230           RTX_FRAME_RELATED_P (insn) = 1;
2231         }
2232
2233       aarch64_save_or_restore_callee_save_registers
2234         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2235     }
2236
2237   /* when offset >= 512,
2238      sub sp, sp, #<outgoing_args_size> */
2239   if (frame_size > -1)
2240     {
2241       if (crtl->outgoing_args_size > 0)
2242         {
2243           insn = emit_insn (gen_add2_insn
2244                             (stack_pointer_rtx,
2245                              GEN_INT (- crtl->outgoing_args_size)));
2246           RTX_FRAME_RELATED_P (insn) = 1;
2247         }
2248     }
2249 }
2250
2251 /* Generate the epilogue instructions for returning from a function.  */
2252 void
2253 aarch64_expand_epilogue (bool for_sibcall)
2254 {
2255   HOST_WIDE_INT original_frame_size, frame_size, offset;
2256   HOST_WIDE_INT fp_offset;
2257   rtx insn;
2258   rtx cfa_reg;
2259
2260   aarch64_layout_frame ();
2261   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2262   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2263                 + crtl->outgoing_args_size);
2264   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2265                                           STACK_BOUNDARY / BITS_PER_UNIT);
2266
2267   fp_offset = (offset
2268                - original_frame_size
2269                - cfun->machine->frame.saved_regs_size);
2270
2271   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2272
2273   /* Store pairs and load pairs have a range only -512 to 504.  */
2274   if (offset >= 512)
2275     {
2276       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2277       if (offset >= 512)
2278         offset = cfun->machine->frame.saved_regs_size;
2279
2280       frame_size -= (offset + crtl->outgoing_args_size);
2281       fp_offset = 0;
2282       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2283         {
2284           insn = emit_insn (gen_add2_insn
2285                             (stack_pointer_rtx,
2286                              GEN_INT (crtl->outgoing_args_size)));
2287           RTX_FRAME_RELATED_P (insn) = 1;
2288         }
2289     }
2290   else
2291     frame_size = -1;
2292
2293   /* If there were outgoing arguments or we've done dynamic stack
2294      allocation, then restore the stack pointer from the frame
2295      pointer.  This is at most one insn and more efficient than using
2296      GCC's internal mechanism.  */
2297   if (frame_pointer_needed
2298       && (crtl->outgoing_args_size || cfun->calls_alloca))
2299     {
2300       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2301                                        hard_frame_pointer_rtx,
2302                                        GEN_INT (- fp_offset)));
2303       RTX_FRAME_RELATED_P (insn) = 1;
2304       /* As SP is set to (FP - fp_offset), according to the rules in
2305          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2306          from the value of SP from now on.  */
2307       cfa_reg = stack_pointer_rtx;
2308     }
2309
2310   aarch64_save_or_restore_callee_save_registers
2311     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2312
2313   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2314   if (offset > 0)
2315     {
2316       if (frame_pointer_needed)
2317         {
2318           rtx mem_fp, mem_lr;
2319
2320           if (fp_offset)
2321             {
2322               mem_fp = gen_frame_mem (DImode,
2323                                       plus_constant (Pmode,
2324                                                      stack_pointer_rtx,
2325                                                      fp_offset));
2326               mem_lr = gen_frame_mem (DImode,
2327                                       plus_constant (Pmode,
2328                                                      stack_pointer_rtx,
2329                                                      fp_offset
2330                                                      + UNITS_PER_WORD));
2331               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2332                                                  mem_fp,
2333                                                  gen_rtx_REG (DImode,
2334                                                               LR_REGNUM),
2335                                                  mem_lr));
2336             }
2337           else
2338             {
2339               insn = emit_insn (gen_loadwb_pairdi_di
2340                                 (stack_pointer_rtx,
2341                                  stack_pointer_rtx,
2342                                  hard_frame_pointer_rtx,
2343                                  gen_rtx_REG (DImode, LR_REGNUM),
2344                                  GEN_INT (offset),
2345                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2346               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2347               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2348                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2349                                           plus_constant (Pmode, cfa_reg,
2350                                                          offset))));
2351             }
2352
2353           /* The first part of a frame-related parallel insn
2354              is always assumed to be relevant to the frame
2355              calculations; subsequent parts, are only
2356              frame-related if explicitly marked.  */
2357           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2358           RTX_FRAME_RELATED_P (insn) = 1;
2359           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2360           add_reg_note (insn, REG_CFA_RESTORE,
2361                         gen_rtx_REG (DImode, LR_REGNUM));
2362
2363           if (fp_offset)
2364             {
2365               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2366                                                GEN_INT (offset)));
2367               RTX_FRAME_RELATED_P (insn) = 1;
2368             }
2369         }
2370       else
2371         {
2372           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2373                                            GEN_INT (offset)));
2374           RTX_FRAME_RELATED_P (insn) = 1;
2375         }
2376     }
2377
2378   /* Stack adjustment for exception handler.  */
2379   if (crtl->calls_eh_return)
2380     {
2381       /* We need to unwind the stack by the offset computed by
2382          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2383          based on SP.  Ideally we would update the SP and define the
2384          CFA along the lines of:
2385
2386          SP = SP + EH_RETURN_STACKADJ_RTX
2387          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2388
2389          However the dwarf emitter only understands a constant
2390          register offset.
2391
2392          The solution chosen here is to use the otherwise unused IP0
2393          as a temporary register to hold the current SP value.  The
2394          CFA is described using IP0 then SP is modified.  */
2395
2396       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2397
2398       insn = emit_move_insn (ip0, stack_pointer_rtx);
2399       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2400       RTX_FRAME_RELATED_P (insn) = 1;
2401
2402       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2403
2404       /* Ensure the assignment to IP0 does not get optimized away.  */
2405       emit_use (ip0);
2406     }
2407
2408   if (frame_size > -1)
2409     {
2410       if (frame_size >= 0x1000000)
2411         {
2412           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2413           emit_move_insn (op0, GEN_INT (frame_size));
2414           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2415           aarch64_set_frame_expr (gen_rtx_SET
2416                                   (Pmode, stack_pointer_rtx,
2417                                    plus_constant (Pmode,
2418                                                   stack_pointer_rtx,
2419                                                   frame_size)));
2420         }
2421       else if (frame_size > 0)
2422         {
2423           if ((frame_size & 0xfff) != 0)
2424             {
2425               insn = emit_insn (gen_add2_insn
2426                                 (stack_pointer_rtx,
2427                                  GEN_INT ((frame_size
2428                                            & (HOST_WIDE_INT) 0xfff))));
2429               RTX_FRAME_RELATED_P (insn) = 1;
2430             }
2431           if ((frame_size & 0xfff) != frame_size)
2432             {
2433               insn = emit_insn (gen_add2_insn
2434                                 (stack_pointer_rtx,
2435                                  GEN_INT ((frame_size
2436                                            & ~ (HOST_WIDE_INT) 0xfff))));
2437               RTX_FRAME_RELATED_P (insn) = 1;
2438             }
2439         }
2440
2441         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2442                                              plus_constant (Pmode,
2443                                                             stack_pointer_rtx,
2444                                                             offset)));
2445     }
2446
2447   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2448   if (!for_sibcall)
2449     emit_jump_insn (ret_rtx);
2450 }
2451
2452 /* Return the place to copy the exception unwinding return address to.
2453    This will probably be a stack slot, but could (in theory be the
2454    return register).  */
2455 rtx
2456 aarch64_final_eh_return_addr (void)
2457 {
2458   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2459   aarch64_layout_frame ();
2460   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2461   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2462                 + crtl->outgoing_args_size);
2463   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2464                                           STACK_BOUNDARY / BITS_PER_UNIT);
2465   fp_offset = offset
2466     - original_frame_size
2467     - cfun->machine->frame.saved_regs_size;
2468
2469   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2470     return gen_rtx_REG (DImode, LR_REGNUM);
2471
2472   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2473      result in a store to save LR introduced by builtin_eh_return () being
2474      incorrectly deleted because the alias is not detected.
2475      So in the calculation of the address to copy the exception unwinding
2476      return address to, we note 2 cases.
2477      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2478      we return a SP-relative location since all the addresses are SP-relative
2479      in this case.  This prevents the store from being optimized away.
2480      If the fp_offset is not 0, then the addresses will be FP-relative and
2481      therefore we return a FP-relative location.  */
2482
2483   if (frame_pointer_needed)
2484     {
2485       if (fp_offset)
2486         return gen_frame_mem (DImode,
2487                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2488       else
2489         return gen_frame_mem (DImode,
2490                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2491     }
2492
2493   /* If FP is not needed, we calculate the location of LR, which would be
2494      at the top of the saved registers block.  */
2495
2496   return gen_frame_mem (DImode,
2497                         plus_constant (Pmode,
2498                                        stack_pointer_rtx,
2499                                        fp_offset
2500                                        + cfun->machine->frame.saved_regs_size
2501                                        - 2 * UNITS_PER_WORD));
2502 }
2503
2504 /* Possibly output code to build up a constant in a register.  For
2505    the benefit of the costs infrastructure, returns the number of
2506    instructions which would be emitted.  GENERATE inhibits or
2507    enables code generation.  */
2508
2509 static int
2510 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2511 {
2512   int insns = 0;
2513
2514   if (aarch64_bitmask_imm (val, DImode))
2515     {
2516       if (generate)
2517         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2518       insns = 1;
2519     }
2520   else
2521     {
2522       int i;
2523       int ncount = 0;
2524       int zcount = 0;
2525       HOST_WIDE_INT valp = val >> 16;
2526       HOST_WIDE_INT valm;
2527       HOST_WIDE_INT tval;
2528
2529       for (i = 16; i < 64; i += 16)
2530         {
2531           valm = (valp & 0xffff);
2532
2533           if (valm != 0)
2534             ++ zcount;
2535
2536           if (valm != 0xffff)
2537             ++ ncount;
2538
2539           valp >>= 16;
2540         }
2541
2542       /* zcount contains the number of additional MOVK instructions
2543          required if the constant is built up with an initial MOVZ instruction,
2544          while ncount is the number of MOVK instructions required if starting
2545          with a MOVN instruction.  Choose the sequence that yields the fewest
2546          number of instructions, preferring MOVZ instructions when they are both
2547          the same.  */
2548       if (ncount < zcount)
2549         {
2550           if (generate)
2551             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2552                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2553           tval = 0xffff;
2554           insns++;
2555         }
2556       else
2557         {
2558           if (generate)
2559             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2560                             GEN_INT (val & 0xffff));
2561           tval = 0;
2562           insns++;
2563         }
2564
2565       val >>= 16;
2566
2567       for (i = 16; i < 64; i += 16)
2568         {
2569           if ((val & 0xffff) != tval)
2570             {
2571               if (generate)
2572                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2573                                            GEN_INT (i),
2574                                            GEN_INT (val & 0xffff)));
2575               insns++;
2576             }
2577           val >>= 16;
2578         }
2579     }
2580   return insns;
2581 }
2582
2583 static void
2584 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2585 {
2586   HOST_WIDE_INT mdelta = delta;
2587   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2588   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2589
2590   if (mdelta < 0)
2591     mdelta = -mdelta;
2592
2593   if (mdelta >= 4096 * 4096)
2594     {
2595       (void) aarch64_build_constant (scratchreg, delta, true);
2596       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2597     }
2598   else if (mdelta > 0)
2599     {
2600       if (mdelta >= 4096)
2601         {
2602           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2603           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2604           if (delta < 0)
2605             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2606                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2607           else
2608             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2609                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2610         }
2611       if (mdelta % 4096 != 0)
2612         {
2613           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2614           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2615                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2616         }
2617     }
2618 }
2619
2620 /* Output code to add DELTA to the first argument, and then jump
2621    to FUNCTION.  Used for C++ multiple inheritance.  */
2622 static void
2623 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2624                          HOST_WIDE_INT delta,
2625                          HOST_WIDE_INT vcall_offset,
2626                          tree function)
2627 {
2628   /* The this pointer is always in x0.  Note that this differs from
2629      Arm where the this pointer maybe bumped to r1 if r0 is required
2630      to return a pointer to an aggregate.  On AArch64 a result value
2631      pointer will be in x8.  */
2632   int this_regno = R0_REGNUM;
2633   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2634
2635   reload_completed = 1;
2636   emit_note (NOTE_INSN_PROLOGUE_END);
2637
2638   if (vcall_offset == 0)
2639     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2640   else
2641     {
2642       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2643
2644       this_rtx = gen_rtx_REG (Pmode, this_regno);
2645       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2646       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2647
2648       addr = this_rtx;
2649       if (delta != 0)
2650         {
2651           if (delta >= -256 && delta < 256)
2652             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2653                                        plus_constant (Pmode, this_rtx, delta));
2654           else
2655             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2656         }
2657
2658       if (Pmode == ptr_mode)
2659         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2660       else
2661         aarch64_emit_move (temp0,
2662                            gen_rtx_ZERO_EXTEND (Pmode,
2663                                                 gen_rtx_MEM (ptr_mode, addr)));
2664
2665       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2666           addr = plus_constant (Pmode, temp0, vcall_offset);
2667       else
2668         {
2669           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2670           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2671         }
2672
2673       if (Pmode == ptr_mode)
2674         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2675       else
2676         aarch64_emit_move (temp1,
2677                            gen_rtx_SIGN_EXTEND (Pmode,
2678                                                 gen_rtx_MEM (ptr_mode, addr)));
2679
2680       emit_insn (gen_add2_insn (this_rtx, temp1));
2681     }
2682
2683   /* Generate a tail call to the target function.  */
2684   if (!TREE_USED (function))
2685     {
2686       assemble_external (function);
2687       TREE_USED (function) = 1;
2688     }
2689   funexp = XEXP (DECL_RTL (function), 0);
2690   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2691   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2692   SIBLING_CALL_P (insn) = 1;
2693
2694   insn = get_insns ();
2695   shorten_branches (insn);
2696   final_start_function (insn, file, 1);
2697   final (insn, file, 1);
2698   final_end_function ();
2699
2700   /* Stop pretending to be a post-reload pass.  */
2701   reload_completed = 0;
2702 }
2703
2704 static int
2705 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2706 {
2707   if (GET_CODE (*x) == SYMBOL_REF)
2708     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2709
2710   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2711      TLS offsets, not real symbol references.  */
2712   if (GET_CODE (*x) == UNSPEC
2713       && XINT (*x, 1) == UNSPEC_TLS)
2714     return -1;
2715
2716   return 0;
2717 }
2718
2719 static bool
2720 aarch64_tls_referenced_p (rtx x)
2721 {
2722   if (!TARGET_HAVE_TLS)
2723     return false;
2724
2725   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2726 }
2727
2728
2729 static int
2730 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2731 {
2732   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2733   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2734
2735   if (*imm1 < *imm2)
2736     return -1;
2737   if (*imm1 > *imm2)
2738     return +1;
2739   return 0;
2740 }
2741
2742
2743 static void
2744 aarch64_build_bitmask_table (void)
2745 {
2746   unsigned HOST_WIDE_INT mask, imm;
2747   unsigned int log_e, e, s, r;
2748   unsigned int nimms = 0;
2749
2750   for (log_e = 1; log_e <= 6; log_e++)
2751     {
2752       e = 1 << log_e;
2753       if (e == 64)
2754         mask = ~(HOST_WIDE_INT) 0;
2755       else
2756         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2757       for (s = 1; s < e; s++)
2758         {
2759           for (r = 0; r < e; r++)
2760             {
2761               /* set s consecutive bits to 1 (s < 64) */
2762               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2763               /* rotate right by r */
2764               if (r != 0)
2765                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2766               /* replicate the constant depending on SIMD size */
2767               switch (log_e) {
2768               case 1: imm |= (imm <<  2);
2769               case 2: imm |= (imm <<  4);
2770               case 3: imm |= (imm <<  8);
2771               case 4: imm |= (imm << 16);
2772               case 5: imm |= (imm << 32);
2773               case 6:
2774                 break;
2775               default:
2776                 gcc_unreachable ();
2777               }
2778               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2779               aarch64_bitmasks[nimms++] = imm;
2780             }
2781         }
2782     }
2783
2784   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2785   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2786          aarch64_bitmasks_cmp);
2787 }
2788
2789
2790 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2791    a left shift of 0 or 12 bits.  */
2792 bool
2793 aarch64_uimm12_shift (HOST_WIDE_INT val)
2794 {
2795   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2796           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2797           );
2798 }
2799
2800
2801 /* Return true if val is an immediate that can be loaded into a
2802    register by a MOVZ instruction.  */
2803 static bool
2804 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2805 {
2806   if (GET_MODE_SIZE (mode) > 4)
2807     {
2808       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2809           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2810         return 1;
2811     }
2812   else
2813     {
2814       /* Ignore sign extension.  */
2815       val &= (HOST_WIDE_INT) 0xffffffff;
2816     }
2817   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2818           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2819 }
2820
2821
2822 /* Return true if val is a valid bitmask immediate.  */
2823 bool
2824 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2825 {
2826   if (GET_MODE_SIZE (mode) < 8)
2827     {
2828       /* Replicate bit pattern.  */
2829       val &= (HOST_WIDE_INT) 0xffffffff;
2830       val |= val << 32;
2831     }
2832   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2833                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2834 }
2835
2836
2837 /* Return true if val is an immediate that can be loaded into a
2838    register in a single instruction.  */
2839 bool
2840 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2841 {
2842   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2843     return 1;
2844   return aarch64_bitmask_imm (val, mode);
2845 }
2846
2847 static bool
2848 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2849 {
2850   rtx base, offset;
2851
2852   if (GET_CODE (x) == HIGH)
2853     return true;
2854
2855   split_const (x, &base, &offset);
2856   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2857     {
2858       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2859           != SYMBOL_FORCE_TO_MEM)
2860         return true;
2861       else
2862         /* Avoid generating a 64-bit relocation in ILP32; leave
2863            to aarch64_expand_mov_immediate to handle it properly.  */
2864         return mode != ptr_mode;
2865     }
2866
2867   return aarch64_tls_referenced_p (x);
2868 }
2869
2870 /* Return true if register REGNO is a valid index register.
2871    STRICT_P is true if REG_OK_STRICT is in effect.  */
2872
2873 bool
2874 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2875 {
2876   if (!HARD_REGISTER_NUM_P (regno))
2877     {
2878       if (!strict_p)
2879         return true;
2880
2881       if (!reg_renumber)
2882         return false;
2883
2884       regno = reg_renumber[regno];
2885     }
2886   return GP_REGNUM_P (regno);
2887 }
2888
2889 /* Return true if register REGNO is a valid base register for mode MODE.
2890    STRICT_P is true if REG_OK_STRICT is in effect.  */
2891
2892 bool
2893 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2894 {
2895   if (!HARD_REGISTER_NUM_P (regno))
2896     {
2897       if (!strict_p)
2898         return true;
2899
2900       if (!reg_renumber)
2901         return false;
2902
2903       regno = reg_renumber[regno];
2904     }
2905
2906   /* The fake registers will be eliminated to either the stack or
2907      hard frame pointer, both of which are usually valid base registers.
2908      Reload deals with the cases where the eliminated form isn't valid.  */
2909   return (GP_REGNUM_P (regno)
2910           || regno == SP_REGNUM
2911           || regno == FRAME_POINTER_REGNUM
2912           || regno == ARG_POINTER_REGNUM);
2913 }
2914
2915 /* Return true if X is a valid base register for mode MODE.
2916    STRICT_P is true if REG_OK_STRICT is in effect.  */
2917
2918 static bool
2919 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2920 {
2921   if (!strict_p && GET_CODE (x) == SUBREG)
2922     x = SUBREG_REG (x);
2923
2924   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2925 }
2926
2927 /* Return true if address offset is a valid index.  If it is, fill in INFO
2928    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2929
2930 static bool
2931 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2932                         enum machine_mode mode, bool strict_p)
2933 {
2934   enum aarch64_address_type type;
2935   rtx index;
2936   int shift;
2937
2938   /* (reg:P) */
2939   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2940       && GET_MODE (x) == Pmode)
2941     {
2942       type = ADDRESS_REG_REG;
2943       index = x;
2944       shift = 0;
2945     }
2946   /* (sign_extend:DI (reg:SI)) */
2947   else if ((GET_CODE (x) == SIGN_EXTEND
2948             || GET_CODE (x) == ZERO_EXTEND)
2949            && GET_MODE (x) == DImode
2950            && GET_MODE (XEXP (x, 0)) == SImode)
2951     {
2952       type = (GET_CODE (x) == SIGN_EXTEND)
2953         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2954       index = XEXP (x, 0);
2955       shift = 0;
2956     }
2957   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2958   else if (GET_CODE (x) == MULT
2959            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2960                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2961            && GET_MODE (XEXP (x, 0)) == DImode
2962            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2963            && CONST_INT_P (XEXP (x, 1)))
2964     {
2965       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2966         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2967       index = XEXP (XEXP (x, 0), 0);
2968       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2969     }
2970   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2971   else if (GET_CODE (x) == ASHIFT
2972            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2973                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2974            && GET_MODE (XEXP (x, 0)) == DImode
2975            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2976            && CONST_INT_P (XEXP (x, 1)))
2977     {
2978       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2979         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2980       index = XEXP (XEXP (x, 0), 0);
2981       shift = INTVAL (XEXP (x, 1));
2982     }
2983   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2984   else if ((GET_CODE (x) == SIGN_EXTRACT
2985             || GET_CODE (x) == ZERO_EXTRACT)
2986            && GET_MODE (x) == DImode
2987            && GET_CODE (XEXP (x, 0)) == MULT
2988            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2989            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2990     {
2991       type = (GET_CODE (x) == SIGN_EXTRACT)
2992         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2993       index = XEXP (XEXP (x, 0), 0);
2994       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2995       if (INTVAL (XEXP (x, 1)) != 32 + shift
2996           || INTVAL (XEXP (x, 2)) != 0)
2997         shift = -1;
2998     }
2999   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3000      (const_int 0xffffffff<<shift)) */
3001   else if (GET_CODE (x) == AND
3002            && GET_MODE (x) == DImode
3003            && GET_CODE (XEXP (x, 0)) == MULT
3004            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3005            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3006            && CONST_INT_P (XEXP (x, 1)))
3007     {
3008       type = ADDRESS_REG_UXTW;
3009       index = XEXP (XEXP (x, 0), 0);
3010       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3011       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3012         shift = -1;
3013     }
3014   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3015   else if ((GET_CODE (x) == SIGN_EXTRACT
3016             || GET_CODE (x) == ZERO_EXTRACT)
3017            && GET_MODE (x) == DImode
3018            && GET_CODE (XEXP (x, 0)) == ASHIFT
3019            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3020            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3021     {
3022       type = (GET_CODE (x) == SIGN_EXTRACT)
3023         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3024       index = XEXP (XEXP (x, 0), 0);
3025       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3026       if (INTVAL (XEXP (x, 1)) != 32 + shift
3027           || INTVAL (XEXP (x, 2)) != 0)
3028         shift = -1;
3029     }
3030   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3031      (const_int 0xffffffff<<shift)) */
3032   else if (GET_CODE (x) == AND
3033            && GET_MODE (x) == DImode
3034            && GET_CODE (XEXP (x, 0)) == ASHIFT
3035            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3037            && CONST_INT_P (XEXP (x, 1)))
3038     {
3039       type = ADDRESS_REG_UXTW;
3040       index = XEXP (XEXP (x, 0), 0);
3041       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3042       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3043         shift = -1;
3044     }
3045   /* (mult:P (reg:P) (const_int scale)) */
3046   else if (GET_CODE (x) == MULT
3047            && GET_MODE (x) == Pmode
3048            && GET_MODE (XEXP (x, 0)) == Pmode
3049            && CONST_INT_P (XEXP (x, 1)))
3050     {
3051       type = ADDRESS_REG_REG;
3052       index = XEXP (x, 0);
3053       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3054     }
3055   /* (ashift:P (reg:P) (const_int shift)) */
3056   else if (GET_CODE (x) == ASHIFT
3057            && GET_MODE (x) == Pmode
3058            && GET_MODE (XEXP (x, 0)) == Pmode
3059            && CONST_INT_P (XEXP (x, 1)))
3060     {
3061       type = ADDRESS_REG_REG;
3062       index = XEXP (x, 0);
3063       shift = INTVAL (XEXP (x, 1));
3064     }
3065   else
3066     return false;
3067
3068   if (GET_CODE (index) == SUBREG)
3069     index = SUBREG_REG (index);
3070
3071   if ((shift == 0 ||
3072        (shift > 0 && shift <= 3
3073         && (1 << shift) == GET_MODE_SIZE (mode)))
3074       && REG_P (index)
3075       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3076     {
3077       info->type = type;
3078       info->offset = index;
3079       info->shift = shift;
3080       return true;
3081     }
3082
3083   return false;
3084 }
3085
3086 static inline bool
3087 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3088 {
3089   return (offset >= -64 * GET_MODE_SIZE (mode)
3090           && offset < 64 * GET_MODE_SIZE (mode)
3091           && offset % GET_MODE_SIZE (mode) == 0);
3092 }
3093
3094 static inline bool
3095 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3096                                HOST_WIDE_INT offset)
3097 {
3098   return offset >= -256 && offset < 256;
3099 }
3100
3101 static inline bool
3102 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3103 {
3104   return (offset >= 0
3105           && offset < 4096 * GET_MODE_SIZE (mode)
3106           && offset % GET_MODE_SIZE (mode) == 0);
3107 }
3108
3109 /* Return true if X is a valid address for machine mode MODE.  If it is,
3110    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3111    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3112
3113 static bool
3114 aarch64_classify_address (struct aarch64_address_info *info,
3115                           rtx x, enum machine_mode mode,
3116                           RTX_CODE outer_code, bool strict_p)
3117 {
3118   enum rtx_code code = GET_CODE (x);
3119   rtx op0, op1;
3120   bool allow_reg_index_p =
3121     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3122
3123   /* Don't support anything other than POST_INC or REG addressing for
3124      AdvSIMD.  */
3125   if (aarch64_vector_mode_p (mode)
3126       && (code != POST_INC && code != REG))
3127     return false;
3128
3129   switch (code)
3130     {
3131     case REG:
3132     case SUBREG:
3133       info->type = ADDRESS_REG_IMM;
3134       info->base = x;
3135       info->offset = const0_rtx;
3136       return aarch64_base_register_rtx_p (x, strict_p);
3137
3138     case PLUS:
3139       op0 = XEXP (x, 0);
3140       op1 = XEXP (x, 1);
3141       if (GET_MODE_SIZE (mode) != 0
3142           && CONST_INT_P (op1)
3143           && aarch64_base_register_rtx_p (op0, strict_p))
3144         {
3145           HOST_WIDE_INT offset = INTVAL (op1);
3146
3147           info->type = ADDRESS_REG_IMM;
3148           info->base = op0;
3149           info->offset = op1;
3150
3151           /* TImode and TFmode values are allowed in both pairs of X
3152              registers and individual Q registers.  The available
3153              address modes are:
3154              X,X: 7-bit signed scaled offset
3155              Q:   9-bit signed offset
3156              We conservatively require an offset representable in either mode.
3157            */
3158           if (mode == TImode || mode == TFmode)
3159             return (offset_7bit_signed_scaled_p (mode, offset)
3160                     && offset_9bit_signed_unscaled_p (mode, offset));
3161
3162           if (outer_code == PARALLEL)
3163             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3164                     && offset_7bit_signed_scaled_p (mode, offset));
3165           else
3166             return (offset_9bit_signed_unscaled_p (mode, offset)
3167                     || offset_12bit_unsigned_scaled_p (mode, offset));
3168         }
3169
3170       if (allow_reg_index_p)
3171         {
3172           /* Look for base + (scaled/extended) index register.  */
3173           if (aarch64_base_register_rtx_p (op0, strict_p)
3174               && aarch64_classify_index (info, op1, mode, strict_p))
3175             {
3176               info->base = op0;
3177               return true;
3178             }
3179           if (aarch64_base_register_rtx_p (op1, strict_p)
3180               && aarch64_classify_index (info, op0, mode, strict_p))
3181             {
3182               info->base = op1;
3183               return true;
3184             }
3185         }
3186
3187       return false;
3188
3189     case POST_INC:
3190     case POST_DEC:
3191     case PRE_INC:
3192     case PRE_DEC:
3193       info->type = ADDRESS_REG_WB;
3194       info->base = XEXP (x, 0);
3195       info->offset = NULL_RTX;
3196       return aarch64_base_register_rtx_p (info->base, strict_p);
3197
3198     case POST_MODIFY:
3199     case PRE_MODIFY:
3200       info->type = ADDRESS_REG_WB;
3201       info->base = XEXP (x, 0);
3202       if (GET_CODE (XEXP (x, 1)) == PLUS
3203           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3204           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3205           && aarch64_base_register_rtx_p (info->base, strict_p))
3206         {
3207           HOST_WIDE_INT offset;
3208           info->offset = XEXP (XEXP (x, 1), 1);
3209           offset = INTVAL (info->offset);
3210
3211           /* TImode and TFmode values are allowed in both pairs of X
3212              registers and individual Q registers.  The available
3213              address modes are:
3214              X,X: 7-bit signed scaled offset
3215              Q:   9-bit signed offset
3216              We conservatively require an offset representable in either mode.
3217            */
3218           if (mode == TImode || mode == TFmode)
3219             return (offset_7bit_signed_scaled_p (mode, offset)
3220                     && offset_9bit_signed_unscaled_p (mode, offset));
3221
3222           if (outer_code == PARALLEL)
3223             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3224                     && offset_7bit_signed_scaled_p (mode, offset));
3225           else
3226             return offset_9bit_signed_unscaled_p (mode, offset);
3227         }
3228       return false;
3229
3230     case CONST:
3231     case SYMBOL_REF:
3232     case LABEL_REF:
3233       /* load literal: pc-relative constant pool entry.  Only supported
3234          for SI mode or larger.  */
3235       info->type = ADDRESS_SYMBOLIC;
3236       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3237         {
3238           rtx sym, addend;
3239
3240           split_const (x, &sym, &addend);
3241           return (GET_CODE (sym) == LABEL_REF
3242                   || (GET_CODE (sym) == SYMBOL_REF
3243                       && CONSTANT_POOL_ADDRESS_P (sym)));
3244         }
3245       return false;
3246
3247     case LO_SUM:
3248       info->type = ADDRESS_LO_SUM;
3249       info->base = XEXP (x, 0);
3250       info->offset = XEXP (x, 1);
3251       if (allow_reg_index_p
3252           && aarch64_base_register_rtx_p (info->base, strict_p))
3253         {
3254           rtx sym, offs;
3255           split_const (info->offset, &sym, &offs);
3256           if (GET_CODE (sym) == SYMBOL_REF
3257               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3258                   == SYMBOL_SMALL_ABSOLUTE))
3259             {
3260               /* The symbol and offset must be aligned to the access size.  */
3261               unsigned int align;
3262               unsigned int ref_size;
3263
3264               if (CONSTANT_POOL_ADDRESS_P (sym))
3265                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3266               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3267                 {
3268                   tree exp = SYMBOL_REF_DECL (sym);
3269                   align = TYPE_ALIGN (TREE_TYPE (exp));
3270                   align = CONSTANT_ALIGNMENT (exp, align);
3271                 }
3272               else if (SYMBOL_REF_DECL (sym))
3273                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3274               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3275                        && SYMBOL_REF_BLOCK (sym) != NULL)
3276                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3277               else
3278                 align = BITS_PER_UNIT;
3279
3280               ref_size = GET_MODE_SIZE (mode);
3281               if (ref_size == 0)
3282                 ref_size = GET_MODE_SIZE (DImode);
3283
3284               return ((INTVAL (offs) & (ref_size - 1)) == 0
3285                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3286             }
3287         }
3288       return false;
3289
3290     default:
3291       return false;
3292     }
3293 }
3294
3295 bool
3296 aarch64_symbolic_address_p (rtx x)
3297 {
3298   rtx offset;
3299
3300   split_const (x, &x, &offset);
3301   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3302 }
3303
3304 /* Classify the base of symbolic expression X, given that X appears in
3305    context CONTEXT.  */
3306
3307 enum aarch64_symbol_type
3308 aarch64_classify_symbolic_expression (rtx x,
3309                                       enum aarch64_symbol_context context)
3310 {
3311   rtx offset;
3312
3313   split_const (x, &x, &offset);
3314   return aarch64_classify_symbol (x, context);
3315 }
3316
3317
3318 /* Return TRUE if X is a legitimate address for accessing memory in
3319    mode MODE.  */
3320 static bool
3321 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3322 {
3323   struct aarch64_address_info addr;
3324
3325   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3326 }
3327
3328 /* Return TRUE if X is a legitimate address for accessing memory in
3329    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3330    pair operation.  */
3331 bool
3332 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3333                               RTX_CODE outer_code, bool strict_p)
3334 {
3335   struct aarch64_address_info addr;
3336
3337   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3338 }
3339
3340 /* Return TRUE if rtx X is immediate constant 0.0 */
3341 bool
3342 aarch64_float_const_zero_rtx_p (rtx x)
3343 {
3344   REAL_VALUE_TYPE r;
3345
3346   if (GET_MODE (x) == VOIDmode)
3347     return false;
3348
3349   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3350   if (REAL_VALUE_MINUS_ZERO (r))
3351     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3352   return REAL_VALUES_EQUAL (r, dconst0);
3353 }
3354
3355 /* Return the fixed registers used for condition codes.  */
3356
3357 static bool
3358 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3359 {
3360   *p1 = CC_REGNUM;
3361   *p2 = INVALID_REGNUM;
3362   return true;
3363 }
3364
3365 enum machine_mode
3366 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3367 {
3368   /* All floating point compares return CCFP if it is an equality
3369      comparison, and CCFPE otherwise.  */
3370   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3371     {
3372       switch (code)
3373         {
3374         case EQ:
3375         case NE:
3376         case UNORDERED:
3377         case ORDERED:
3378         case UNLT:
3379         case UNLE:
3380         case UNGT:
3381         case UNGE:
3382         case UNEQ:
3383         case LTGT:
3384           return CCFPmode;
3385
3386         case LT:
3387         case LE:
3388         case GT:
3389         case GE:
3390           return CCFPEmode;
3391
3392         default:
3393           gcc_unreachable ();
3394         }
3395     }
3396
3397   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3398       && y == const0_rtx
3399       && (code == EQ || code == NE || code == LT || code == GE)
3400       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3401           || GET_CODE (x) == NEG))
3402     return CC_NZmode;
3403
3404   /* A compare with a shifted operand.  Because of canonicalization,
3405      the comparison will have to be swapped when we emit the assembly
3406      code.  */
3407   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3408       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3409       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3410           || GET_CODE (x) == LSHIFTRT
3411           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3412     return CC_SWPmode;
3413
3414   /* Similarly for a negated operand, but we can only do this for
3415      equalities.  */
3416   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3417       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3418       && (code == EQ || code == NE)
3419       && GET_CODE (x) == NEG)
3420     return CC_Zmode;
3421
3422   /* A compare of a mode narrower than SI mode against zero can be done
3423      by extending the value in the comparison.  */
3424   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3425       && y == const0_rtx)
3426     /* Only use sign-extension if we really need it.  */
3427     return ((code == GT || code == GE || code == LE || code == LT)
3428             ? CC_SESWPmode : CC_ZESWPmode);
3429
3430   /* For everything else, return CCmode.  */
3431   return CCmode;
3432 }
3433
3434 static unsigned
3435 aarch64_get_condition_code (rtx x)
3436 {
3437   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3438   enum rtx_code comp_code = GET_CODE (x);
3439
3440   if (GET_MODE_CLASS (mode) != MODE_CC)
3441     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3442
3443   switch (mode)
3444     {
3445     case CCFPmode:
3446     case CCFPEmode:
3447       switch (comp_code)
3448         {
3449         case GE: return AARCH64_GE;
3450         case GT: return AARCH64_GT;
3451         case LE: return AARCH64_LS;
3452         case LT: return AARCH64_MI;
3453         case NE: return AARCH64_NE;
3454         case EQ: return AARCH64_EQ;
3455         case ORDERED: return AARCH64_VC;
3456         case UNORDERED: return AARCH64_VS;
3457         case UNLT: return AARCH64_LT;
3458         case UNLE: return AARCH64_LE;
3459         case UNGT: return AARCH64_HI;
3460         case UNGE: return AARCH64_PL;
3461         default: gcc_unreachable ();
3462         }
3463       break;
3464
3465     case CCmode:
3466       switch (comp_code)
3467         {
3468         case NE: return AARCH64_NE;
3469         case EQ: return AARCH64_EQ;
3470         case GE: return AARCH64_GE;
3471         case GT: return AARCH64_GT;
3472         case LE: return AARCH64_LE;
3473         case LT: return AARCH64_LT;
3474         case GEU: return AARCH64_CS;
3475         case GTU: return AARCH64_HI;
3476         case LEU: return AARCH64_LS;
3477         case LTU: return AARCH64_CC;
3478         default: gcc_unreachable ();
3479         }
3480       break;
3481
3482     case CC_SWPmode:
3483     case CC_ZESWPmode:
3484     case CC_SESWPmode:
3485       switch (comp_code)
3486         {
3487         case NE: return AARCH64_NE;
3488         case EQ: return AARCH64_EQ;
3489         case GE: return AARCH64_LE;
3490         case GT: return AARCH64_LT;
3491         case LE: return AARCH64_GE;
3492         case LT: return AARCH64_GT;
3493         case GEU: return AARCH64_LS;
3494         case GTU: return AARCH64_CC;
3495         case LEU: return AARCH64_CS;
3496         case LTU: return AARCH64_HI;
3497         default: gcc_unreachable ();
3498         }
3499       break;
3500
3501     case CC_NZmode:
3502       switch (comp_code)
3503         {
3504         case NE: return AARCH64_NE;
3505         case EQ: return AARCH64_EQ;
3506         case GE: return AARCH64_PL;
3507         case LT: return AARCH64_MI;
3508         default: gcc_unreachable ();
3509         }
3510       break;
3511
3512     case CC_Zmode:
3513       switch (comp_code)
3514         {
3515         case NE: return AARCH64_NE;
3516         case EQ: return AARCH64_EQ;
3517         default: gcc_unreachable ();
3518         }
3519       break;
3520
3521     default:
3522       gcc_unreachable ();
3523       break;
3524     }
3525 }
3526
3527 static unsigned
3528 bit_count (unsigned HOST_WIDE_INT value)
3529 {
3530   unsigned count = 0;
3531
3532   while (value)
3533     {
3534       count++;
3535       value &= value - 1;
3536     }
3537
3538   return count;
3539 }
3540
3541 void
3542 aarch64_print_operand (FILE *f, rtx x, char code)
3543 {
3544   switch (code)
3545     {
3546     /* An integer or symbol address without a preceding # sign.  */
3547     case 'c':
3548       switch (GET_CODE (x))
3549         {
3550         case CONST_INT:
3551           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3552           break;
3553
3554         case SYMBOL_REF:
3555           output_addr_const (f, x);
3556           break;
3557
3558         case CONST:
3559           if (GET_CODE (XEXP (x, 0)) == PLUS
3560               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3561             {
3562               output_addr_const (f, x);
3563               break;
3564             }
3565           /* Fall through.  */
3566
3567         default:
3568           output_operand_lossage ("Unsupported operand for code '%c'", code);
3569         }
3570       break;
3571
3572     case 'e':
3573       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3574       {
3575         int n;
3576
3577         if (GET_CODE (x) != CONST_INT
3578             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3579           {
3580             output_operand_lossage ("invalid operand for '%%%c'", code);
3581             return;
3582           }
3583
3584         switch (n)
3585           {
3586           case 3:
3587             fputc ('b', f);
3588             break;
3589           case 4:
3590             fputc ('h', f);
3591             break;
3592           case 5:
3593             fputc ('w', f);
3594             break;
3595           default:
3596             output_operand_lossage ("invalid operand for '%%%c'", code);
3597             return;
3598           }
3599       }
3600       break;
3601
3602     case 'p':
3603       {
3604         int n;
3605
3606         /* Print N such that 2^N == X.  */
3607         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3608           {
3609             output_operand_lossage ("invalid operand for '%%%c'", code);
3610             return;
3611           }
3612
3613         asm_fprintf (f, "%d", n);
3614       }
3615       break;
3616
3617     case 'P':
3618       /* Print the number of non-zero bits in X (a const_int).  */
3619       if (GET_CODE (x) != CONST_INT)
3620         {
3621           output_operand_lossage ("invalid operand for '%%%c'", code);
3622           return;
3623         }
3624
3625       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3626       break;
3627
3628     case 'H':
3629       /* Print the higher numbered register of a pair (TImode) of regs.  */
3630       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3631         {
3632           output_operand_lossage ("invalid operand for '%%%c'", code);
3633           return;
3634         }
3635
3636       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3637       break;
3638
3639     case 'm':
3640       /* Print a condition (eq, ne, etc).  */
3641
3642       /* CONST_TRUE_RTX means always -- that's the default.  */
3643       if (x == const_true_rtx)
3644         return;
3645
3646       if (!COMPARISON_P (x))
3647         {
3648           output_operand_lossage ("invalid operand for '%%%c'", code);
3649           return;
3650         }
3651
3652       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3653       break;
3654
3655     case 'M':
3656       /* Print the inverse of a condition (eq <-> ne, etc).  */
3657
3658       /* CONST_TRUE_RTX means never -- that's the default.  */
3659       if (x == const_true_rtx)
3660         {
3661           fputs ("nv", f);
3662           return;
3663         }
3664
3665       if (!COMPARISON_P (x))
3666         {
3667           output_operand_lossage ("invalid operand for '%%%c'", code);
3668           return;
3669         }
3670
3671       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3672                                   (aarch64_get_condition_code (x))], f);
3673       break;
3674
3675     case 'b':
3676     case 'h':
3677     case 's':
3678     case 'd':
3679     case 'q':
3680       /* Print a scalar FP/SIMD register name.  */
3681       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3682         {
3683           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3684           return;
3685         }
3686       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3687       break;
3688
3689     case 'S':
3690     case 'T':
3691     case 'U':
3692     case 'V':
3693       /* Print the first FP/SIMD register name in a list.  */
3694       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3695         {
3696           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3697           return;
3698         }
3699       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3700       break;
3701
3702     case 'X':
3703       /* Print bottom 16 bits of integer constant in hex.  */
3704       if (GET_CODE (x) != CONST_INT)
3705         {
3706           output_operand_lossage ("invalid operand for '%%%c'", code);
3707           return;
3708         }
3709       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3710       break;
3711
3712     case 'w':
3713     case 'x':
3714       /* Print a general register name or the zero register (32-bit or
3715          64-bit).  */
3716       if (x == const0_rtx
3717           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3718         {
3719           asm_fprintf (f, "%czr", code);
3720           break;
3721         }
3722
3723       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3724         {
3725           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3726           break;
3727         }
3728
3729       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3730         {
3731           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3732           break;
3733         }
3734
3735       /* Fall through */
3736
3737     case 0:
3738       /* Print a normal operand, if it's a general register, then we
3739          assume DImode.  */
3740       if (x == NULL)
3741         {
3742           output_operand_lossage ("missing operand");
3743           return;
3744         }
3745
3746       switch (GET_CODE (x))
3747         {
3748         case REG:
3749           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3750           break;
3751
3752         case MEM:
3753           aarch64_memory_reference_mode = GET_MODE (x);
3754           output_address (XEXP (x, 0));
3755           break;
3756
3757         case LABEL_REF:
3758         case SYMBOL_REF:
3759           output_addr_const (asm_out_file, x);
3760           break;
3761
3762         case CONST_INT:
3763           asm_fprintf (f, "%wd", INTVAL (x));
3764           break;
3765
3766         case CONST_VECTOR:
3767           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3768             {
3769               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3770                                                             HOST_WIDE_INT_MIN,
3771                                                             HOST_WIDE_INT_MAX));
3772               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3773             }
3774           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3775             {
3776               fputc ('0', f);
3777             }
3778           else
3779             gcc_unreachable ();
3780           break;
3781
3782         case CONST_DOUBLE:
3783           /* CONST_DOUBLE can represent a double-width integer.
3784              In this case, the mode of x is VOIDmode.  */
3785           if (GET_MODE (x) == VOIDmode)
3786             ; /* Do Nothing.  */
3787           else if (aarch64_float_const_zero_rtx_p (x))
3788             {
3789               fputc ('0', f);
3790               break;
3791             }
3792           else if (aarch64_float_const_representable_p (x))
3793             {
3794 #define buf_size 20
3795               char float_buf[buf_size] = {'\0'};
3796               REAL_VALUE_TYPE r;
3797               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3798               real_to_decimal_for_mode (float_buf, &r,
3799                                         buf_size, buf_size,
3800                                         1, GET_MODE (x));
3801               asm_fprintf (asm_out_file, "%s", float_buf);
3802               break;
3803 #undef buf_size
3804             }
3805           output_operand_lossage ("invalid constant");
3806           return;
3807         default:
3808           output_operand_lossage ("invalid operand");
3809           return;
3810         }
3811       break;
3812
3813     case 'A':
3814       if (GET_CODE (x) == HIGH)
3815         x = XEXP (x, 0);
3816
3817       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3818         {
3819         case SYMBOL_SMALL_GOT:
3820           asm_fprintf (asm_out_file, ":got:");
3821           break;
3822
3823         case SYMBOL_SMALL_TLSGD:
3824           asm_fprintf (asm_out_file, ":tlsgd:");
3825           break;
3826
3827         case SYMBOL_SMALL_TLSDESC:
3828           asm_fprintf (asm_out_file, ":tlsdesc:");
3829           break;
3830
3831         case SYMBOL_SMALL_GOTTPREL:
3832           asm_fprintf (asm_out_file, ":gottprel:");
3833           break;
3834
3835         case SYMBOL_SMALL_TPREL:
3836           asm_fprintf (asm_out_file, ":tprel:");
3837           break;
3838
3839         case SYMBOL_TINY_GOT:
3840           gcc_unreachable ();
3841           break;
3842
3843         default:
3844           break;
3845         }
3846       output_addr_const (asm_out_file, x);
3847       break;
3848
3849     case 'L':
3850       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3851         {
3852         case SYMBOL_SMALL_GOT:
3853           asm_fprintf (asm_out_file, ":lo12:");
3854           break;
3855
3856         case SYMBOL_SMALL_TLSGD:
3857           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3858           break;
3859
3860         case SYMBOL_SMALL_TLSDESC:
3861           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3862           break;
3863
3864         case SYMBOL_SMALL_GOTTPREL:
3865           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3866           break;
3867
3868         case SYMBOL_SMALL_TPREL:
3869           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3870           break;
3871
3872         case SYMBOL_TINY_GOT:
3873           asm_fprintf (asm_out_file, ":got:");
3874           break;
3875
3876         default:
3877           break;
3878         }
3879       output_addr_const (asm_out_file, x);
3880       break;
3881
3882     case 'G':
3883
3884       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3885         {
3886         case SYMBOL_SMALL_TPREL:
3887           asm_fprintf (asm_out_file, ":tprel_hi12:");
3888           break;
3889         default:
3890           break;
3891         }
3892       output_addr_const (asm_out_file, x);
3893       break;
3894
3895     default:
3896       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3897       return;
3898     }
3899 }
3900
3901 void
3902 aarch64_print_operand_address (FILE *f, rtx x)
3903 {
3904   struct aarch64_address_info addr;
3905
3906   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3907                              MEM, true))
3908     switch (addr.type)
3909       {
3910       case ADDRESS_REG_IMM:
3911         if (addr.offset == const0_rtx)
3912           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3913         else
3914           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3915                        INTVAL (addr.offset));
3916         return;
3917
3918       case ADDRESS_REG_REG:
3919         if (addr.shift == 0)
3920           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3921                        reg_names [REGNO (addr.offset)]);
3922         else
3923           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3924                        reg_names [REGNO (addr.offset)], addr.shift);
3925         return;
3926
3927       case ADDRESS_REG_UXTW:
3928         if (addr.shift == 0)
3929           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3930                        REGNO (addr.offset) - R0_REGNUM);
3931         else
3932           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3933                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3934         return;
3935
3936       case ADDRESS_REG_SXTW:
3937         if (addr.shift == 0)
3938           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3939                        REGNO (addr.offset) - R0_REGNUM);
3940         else
3941           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3942                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3943         return;
3944
3945       case ADDRESS_REG_WB:
3946         switch (GET_CODE (x))
3947           {
3948           case PRE_INC:
3949             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3950                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3951             return;
3952           case POST_INC:
3953             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3954                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3955             return;
3956           case PRE_DEC:
3957             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3958                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3959             return;
3960           case POST_DEC:
3961             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3962                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3963             return;
3964           case PRE_MODIFY:
3965             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3966                          INTVAL (addr.offset));
3967             return;
3968           case POST_MODIFY:
3969             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3970                          INTVAL (addr.offset));
3971             return;
3972           default:
3973             break;
3974           }
3975         break;
3976
3977       case ADDRESS_LO_SUM:
3978         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3979         output_addr_const (f, addr.offset);
3980         asm_fprintf (f, "]");
3981         return;
3982
3983       case ADDRESS_SYMBOLIC:
3984         break;
3985       }
3986
3987   output_addr_const (f, x);
3988 }
3989
3990 bool
3991 aarch64_label_mentioned_p (rtx x)
3992 {
3993   const char *fmt;
3994   int i;
3995
3996   if (GET_CODE (x) == LABEL_REF)
3997     return true;
3998
3999   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4000      referencing instruction, but they are constant offsets, not
4001      symbols.  */
4002   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4003     return false;
4004
4005   fmt = GET_RTX_FORMAT (GET_CODE (x));
4006   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4007     {
4008       if (fmt[i] == 'E')
4009         {
4010           int j;
4011
4012           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4013             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4014               return 1;
4015         }
4016       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4017         return 1;
4018     }
4019
4020   return 0;
4021 }
4022
4023 /* Implement REGNO_REG_CLASS.  */
4024
4025 enum reg_class
4026 aarch64_regno_regclass (unsigned regno)
4027 {
4028   if (GP_REGNUM_P (regno))
4029     return CORE_REGS;
4030
4031   if (regno == SP_REGNUM)
4032     return STACK_REG;
4033
4034   if (regno == FRAME_POINTER_REGNUM
4035       || regno == ARG_POINTER_REGNUM)
4036     return POINTER_REGS;
4037
4038   if (FP_REGNUM_P (regno))
4039     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4040
4041   return NO_REGS;
4042 }
4043
4044 /* Try a machine-dependent way of reloading an illegitimate address
4045    operand.  If we find one, push the reload and return the new rtx.  */
4046
4047 rtx
4048 aarch64_legitimize_reload_address (rtx *x_p,
4049                                    enum machine_mode mode,
4050                                    int opnum, int type,
4051                                    int ind_levels ATTRIBUTE_UNUSED)
4052 {
4053   rtx x = *x_p;
4054
4055   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4056   if (aarch64_vector_mode_p (mode)
4057       && GET_CODE (x) == PLUS
4058       && REG_P (XEXP (x, 0))
4059       && CONST_INT_P (XEXP (x, 1)))
4060     {
4061       rtx orig_rtx = x;
4062       x = copy_rtx (x);
4063       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4064                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4065                    opnum, (enum reload_type) type);
4066       return x;
4067     }
4068
4069   /* We must recognize output that we have already generated ourselves.  */
4070   if (GET_CODE (x) == PLUS
4071       && GET_CODE (XEXP (x, 0)) == PLUS
4072       && REG_P (XEXP (XEXP (x, 0), 0))
4073       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4074       && CONST_INT_P (XEXP (x, 1)))
4075     {
4076       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4077                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4078                    opnum, (enum reload_type) type);
4079       return x;
4080     }
4081
4082   /* We wish to handle large displacements off a base register by splitting
4083      the addend across an add and the mem insn.  This can cut the number of
4084      extra insns needed from 3 to 1.  It is only useful for load/store of a
4085      single register with 12 bit offset field.  */
4086   if (GET_CODE (x) == PLUS
4087       && REG_P (XEXP (x, 0))
4088       && CONST_INT_P (XEXP (x, 1))
4089       && HARD_REGISTER_P (XEXP (x, 0))
4090       && mode != TImode
4091       && mode != TFmode
4092       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4093     {
4094       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4095       HOST_WIDE_INT low = val & 0xfff;
4096       HOST_WIDE_INT high = val - low;
4097       HOST_WIDE_INT offs;
4098       rtx cst;
4099       enum machine_mode xmode = GET_MODE (x);
4100
4101       /* In ILP32, xmode can be either DImode or SImode.  */
4102       gcc_assert (xmode == DImode || xmode == SImode);
4103
4104       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4105          BLKmode alignment.  */
4106       if (GET_MODE_SIZE (mode) == 0)
4107         return NULL_RTX;
4108
4109       offs = low % GET_MODE_SIZE (mode);
4110
4111       /* Align misaligned offset by adjusting high part to compensate.  */
4112       if (offs != 0)
4113         {
4114           if (aarch64_uimm12_shift (high + offs))
4115             {
4116               /* Align down.  */
4117               low = low - offs;
4118               high = high + offs;
4119             }
4120           else
4121             {
4122               /* Align up.  */
4123               offs = GET_MODE_SIZE (mode) - offs;
4124               low = low + offs;
4125               high = high + (low & 0x1000) - offs;
4126               low &= 0xfff;
4127             }
4128         }
4129
4130       /* Check for overflow.  */
4131       if (high + low != val)
4132         return NULL_RTX;
4133
4134       cst = GEN_INT (high);
4135       if (!aarch64_uimm12_shift (high))
4136         cst = force_const_mem (xmode, cst);
4137
4138       /* Reload high part into base reg, leaving the low part
4139          in the mem instruction.
4140          Note that replacing this gen_rtx_PLUS with plus_constant is
4141          wrong in this case because we rely on the
4142          (plus (plus reg c1) c2) structure being preserved so that
4143          XEXP (*p, 0) in push_reload below uses the correct term.  */
4144       x = gen_rtx_PLUS (xmode,
4145                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4146                         GEN_INT (low));
4147
4148       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4149                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4150                    opnum, (enum reload_type) type);
4151       return x;
4152     }
4153
4154   return NULL_RTX;
4155 }
4156
4157
4158 static reg_class_t
4159 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4160                           reg_class_t rclass,
4161                           enum machine_mode mode,
4162                           secondary_reload_info *sri)
4163 {
4164   /* Without the TARGET_SIMD instructions we cannot move a Q register
4165      to a Q register directly.  We need a scratch.  */
4166   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4167       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4168       && reg_class_subset_p (rclass, FP_REGS))
4169     {
4170       if (mode == TFmode)
4171         sri->icode = CODE_FOR_aarch64_reload_movtf;
4172       else if (mode == TImode)
4173         sri->icode = CODE_FOR_aarch64_reload_movti;
4174       return NO_REGS;
4175     }
4176
4177   /* A TFmode or TImode memory access should be handled via an FP_REGS
4178      because AArch64 has richer addressing modes for LDR/STR instructions
4179      than LDP/STP instructions.  */
4180   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4181       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4182     return FP_REGS;
4183
4184   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4185       return CORE_REGS;
4186
4187   return NO_REGS;
4188 }
4189
4190 static bool
4191 aarch64_can_eliminate (const int from, const int to)
4192 {
4193   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4194      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4195
4196   if (frame_pointer_needed)
4197     {
4198       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4199         return true;
4200       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4201         return false;
4202       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4203           && !cfun->calls_alloca)
4204         return true;
4205       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4206         return true;
4207
4208       return false;
4209     }
4210
4211   return true;
4212 }
4213
4214 HOST_WIDE_INT
4215 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4216 {
4217   HOST_WIDE_INT frame_size;
4218   HOST_WIDE_INT offset;
4219
4220   aarch64_layout_frame ();
4221   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4222                 + crtl->outgoing_args_size
4223                 + cfun->machine->saved_varargs_size);
4224
4225    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4226    offset = frame_size;
4227
4228    if (to == HARD_FRAME_POINTER_REGNUM)
4229      {
4230        if (from == ARG_POINTER_REGNUM)
4231          return offset - crtl->outgoing_args_size;
4232
4233        if (from == FRAME_POINTER_REGNUM)
4234          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4235      }
4236
4237    if (to == STACK_POINTER_REGNUM)
4238      {
4239        if (from == FRAME_POINTER_REGNUM)
4240          {
4241            HOST_WIDE_INT elim = crtl->outgoing_args_size
4242                               + cfun->machine->frame.saved_regs_size
4243                               + get_frame_size ()
4244                               - cfun->machine->frame.fp_lr_offset;
4245            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4246            return elim;
4247          }
4248      }
4249
4250    return offset;
4251 }
4252
4253
4254 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4255    previous frame.  */
4256
4257 rtx
4258 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4259 {
4260   if (count != 0)
4261     return const0_rtx;
4262   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4263 }
4264
4265
4266 static void
4267 aarch64_asm_trampoline_template (FILE *f)
4268 {
4269   if (TARGET_ILP32)
4270     {
4271       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4272       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4273     }
4274   else
4275     {
4276       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4277       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4278     }
4279   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4280   assemble_aligned_integer (4, const0_rtx);
4281   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4282   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4283 }
4284
4285 static void
4286 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4287 {
4288   rtx fnaddr, mem, a_tramp;
4289   const int tramp_code_sz = 16;
4290
4291   /* Don't need to copy the trailing D-words, we fill those in below.  */
4292   emit_block_move (m_tramp, assemble_trampoline_template (),
4293                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4294   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4295   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4296   if (GET_MODE (fnaddr) != ptr_mode)
4297     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4298   emit_move_insn (mem, fnaddr);
4299
4300   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4301   emit_move_insn (mem, chain_value);
4302
4303   /* XXX We should really define a "clear_cache" pattern and use
4304      gen_clear_cache().  */
4305   a_tramp = XEXP (m_tramp, 0);
4306   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4307                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4308                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4309                      ptr_mode);
4310 }
4311
4312 static unsigned char
4313 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4314 {
4315   switch (regclass)
4316     {
4317     case CORE_REGS:
4318     case POINTER_REGS:
4319     case GENERAL_REGS:
4320     case ALL_REGS:
4321     case FP_REGS:
4322     case FP_LO_REGS:
4323       return
4324         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4325                                        (GET_MODE_SIZE (mode) + 7) / 8;
4326     case STACK_REG:
4327       return 1;
4328
4329     case NO_REGS:
4330       return 0;
4331
4332     default:
4333       break;
4334     }
4335   gcc_unreachable ();
4336 }
4337
4338 static reg_class_t
4339 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4340 {
4341   if (regclass == POINTER_REGS)
4342     return GENERAL_REGS;
4343
4344   if (regclass == STACK_REG)
4345     {
4346       if (REG_P(x)
4347           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4348           return regclass;
4349
4350       return NO_REGS;
4351     }
4352
4353   /* If it's an integer immediate that MOVI can't handle, then
4354      FP_REGS is not an option, so we return NO_REGS instead.  */
4355   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4356       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4357     return NO_REGS;
4358
4359   /* Register eliminiation can result in a request for
4360      SP+constant->FP_REGS.  We cannot support such operations which
4361      use SP as source and an FP_REG as destination, so reject out
4362      right now.  */
4363   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4364     {
4365       rtx lhs = XEXP (x, 0);
4366
4367       /* Look through a possible SUBREG introduced by ILP32.  */
4368       if (GET_CODE (lhs) == SUBREG)
4369         lhs = SUBREG_REG (lhs);
4370
4371       gcc_assert (REG_P (lhs));
4372       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4373                                       POINTER_REGS));
4374       return NO_REGS;
4375     }
4376
4377   return regclass;
4378 }
4379
4380 void
4381 aarch64_asm_output_labelref (FILE* f, const char *name)
4382 {
4383   asm_fprintf (f, "%U%s", name);
4384 }
4385
4386 static void
4387 aarch64_elf_asm_constructor (rtx symbol, int priority)
4388 {
4389   if (priority == DEFAULT_INIT_PRIORITY)
4390     default_ctor_section_asm_out_constructor (symbol, priority);
4391   else
4392     {
4393       section *s;
4394       char buf[18];
4395       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4396       s = get_section (buf, SECTION_WRITE, NULL);
4397       switch_to_section (s);
4398       assemble_align (POINTER_SIZE);
4399       assemble_aligned_integer (POINTER_BYTES, symbol);
4400     }
4401 }
4402
4403 static void
4404 aarch64_elf_asm_destructor (rtx symbol, int priority)
4405 {
4406   if (priority == DEFAULT_INIT_PRIORITY)
4407     default_dtor_section_asm_out_destructor (symbol, priority);
4408   else
4409     {
4410       section *s;
4411       char buf[18];
4412       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4413       s = get_section (buf, SECTION_WRITE, NULL);
4414       switch_to_section (s);
4415       assemble_align (POINTER_SIZE);
4416       assemble_aligned_integer (POINTER_BYTES, symbol);
4417     }
4418 }
4419
4420 const char*
4421 aarch64_output_casesi (rtx *operands)
4422 {
4423   char buf[100];
4424   char label[100];
4425   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4426   int index;
4427   static const char *const patterns[4][2] =
4428   {
4429     {
4430       "ldrb\t%w3, [%0,%w1,uxtw]",
4431       "add\t%3, %4, %w3, sxtb #2"
4432     },
4433     {
4434       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4435       "add\t%3, %4, %w3, sxth #2"
4436     },
4437     {
4438       "ldr\t%w3, [%0,%w1,uxtw #2]",
4439       "add\t%3, %4, %w3, sxtw #2"
4440     },
4441     /* We assume that DImode is only generated when not optimizing and
4442        that we don't really need 64-bit address offsets.  That would
4443        imply an object file with 8GB of code in a single function!  */
4444     {
4445       "ldr\t%w3, [%0,%w1,uxtw #2]",
4446       "add\t%3, %4, %w3, sxtw #2"
4447     }
4448   };
4449
4450   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4451
4452   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4453
4454   gcc_assert (index >= 0 && index <= 3);
4455
4456   /* Need to implement table size reduction, by chaning the code below.  */
4457   output_asm_insn (patterns[index][0], operands);
4458   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4459   snprintf (buf, sizeof (buf),
4460             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4461   output_asm_insn (buf, operands);
4462   output_asm_insn (patterns[index][1], operands);
4463   output_asm_insn ("br\t%3", operands);
4464   assemble_label (asm_out_file, label);
4465   return "";
4466 }
4467
4468
4469 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4470    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4471    operator.  */
4472
4473 int
4474 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4475 {
4476   if (shift >= 0 && shift <= 3)
4477     {
4478       int size;
4479       for (size = 8; size <= 32; size *= 2)
4480         {
4481           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4482           if (mask == bits << shift)
4483             return size;
4484         }
4485     }
4486   return 0;
4487 }
4488
4489 static bool
4490 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4491                                    const_rtx x ATTRIBUTE_UNUSED)
4492 {
4493   /* We can't use blocks for constants when we're using a per-function
4494      constant pool.  */
4495   return false;
4496 }
4497
4498 static section *
4499 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4500                             rtx x ATTRIBUTE_UNUSED,
4501                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4502 {
4503   /* Force all constant pool entries into the current function section.  */
4504   return function_section (current_function_decl);
4505 }
4506
4507
4508 /* Costs.  */
4509
4510 /* Helper function for rtx cost calculation.  Strip a shift expression
4511    from X.  Returns the inner operand if successful, or the original
4512    expression on failure.  */
4513 static rtx
4514 aarch64_strip_shift (rtx x)
4515 {
4516   rtx op = x;
4517
4518   if ((GET_CODE (op) == ASHIFT
4519        || GET_CODE (op) == ASHIFTRT
4520        || GET_CODE (op) == LSHIFTRT)
4521       && CONST_INT_P (XEXP (op, 1)))
4522     return XEXP (op, 0);
4523
4524   if (GET_CODE (op) == MULT
4525       && CONST_INT_P (XEXP (op, 1))
4526       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4527     return XEXP (op, 0);
4528
4529   return x;
4530 }
4531
4532 /* Helper function for rtx cost calculation.  Strip an extend
4533    expression from X.  Returns the inner operand if successful, or the
4534    original expression on failure.  We deal with a number of possible
4535    canonicalization variations here.  */
4536 static rtx
4537 aarch64_strip_extend (rtx x)
4538 {
4539   rtx op = x;
4540
4541   /* Zero and sign extraction of a widened value.  */
4542   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4543       && XEXP (op, 2) == const0_rtx
4544       && GET_CODE (XEXP (op, 0)) == MULT
4545       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4546                                          XEXP (op, 1)))
4547     return XEXP (XEXP (op, 0), 0);
4548
4549   /* It can also be represented (for zero-extend) as an AND with an
4550      immediate.  */
4551   if (GET_CODE (op) == AND
4552       && GET_CODE (XEXP (op, 0)) == MULT
4553       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4554       && CONST_INT_P (XEXP (op, 1))
4555       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4556                            INTVAL (XEXP (op, 1))) != 0)
4557     return XEXP (XEXP (op, 0), 0);
4558
4559   /* Now handle extended register, as this may also have an optional
4560      left shift by 1..4.  */
4561   if (GET_CODE (op) == ASHIFT
4562       && CONST_INT_P (XEXP (op, 1))
4563       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4564     op = XEXP (op, 0);
4565
4566   if (GET_CODE (op) == ZERO_EXTEND
4567       || GET_CODE (op) == SIGN_EXTEND)
4568     op = XEXP (op, 0);
4569
4570   if (op != x)
4571     return op;
4572
4573   return x;
4574 }
4575
4576 /* Helper function for rtx cost calculation.  Calculate the cost of
4577    a MULT, which may be part of a multiply-accumulate rtx.  Return
4578    the calculated cost of the expression, recursing manually in to
4579    operands where needed.  */
4580
4581 static int
4582 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4583 {
4584   rtx op0, op1;
4585   const struct cpu_cost_table *extra_cost
4586     = aarch64_tune_params->insn_extra_cost;
4587   int cost = 0;
4588   bool maybe_fma = (outer == PLUS || outer == MINUS);
4589   enum machine_mode mode = GET_MODE (x);
4590
4591   gcc_checking_assert (code == MULT);
4592
4593   op0 = XEXP (x, 0);
4594   op1 = XEXP (x, 1);
4595
4596   if (VECTOR_MODE_P (mode))
4597     mode = GET_MODE_INNER (mode);
4598
4599   /* Integer multiply/fma.  */
4600   if (GET_MODE_CLASS (mode) == MODE_INT)
4601     {
4602       /* The multiply will be canonicalized as a shift, cost it as such.  */
4603       if (CONST_INT_P (op1)
4604           && exact_log2 (INTVAL (op1)) > 0)
4605         {
4606           if (speed)
4607             {
4608               if (maybe_fma)
4609                 /* ADD (shifted register).  */
4610                 cost += extra_cost->alu.arith_shift;
4611               else
4612                 /* LSL (immediate).  */
4613                 cost += extra_cost->alu.shift;
4614             }
4615
4616           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4617
4618           return cost;
4619         }
4620
4621       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4622       if ((GET_CODE (op0) == ZERO_EXTEND
4623            && GET_CODE (op1) == ZERO_EXTEND)
4624           || (GET_CODE (op0) == SIGN_EXTEND
4625               && GET_CODE (op1) == SIGN_EXTEND))
4626         {
4627           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4629
4630           if (speed)
4631             {
4632               if (maybe_fma)
4633                 /* MADD/SMADDL/UMADDL.  */
4634                 cost += extra_cost->mult[0].extend_add;
4635               else
4636                 /* MUL/SMULL/UMULL.  */
4637                 cost += extra_cost->mult[0].extend;
4638             }
4639
4640           return cost;
4641         }
4642
4643       /* This is either an integer multiply or an FMA.  In both cases
4644          we want to recurse and cost the operands.  */
4645       cost += rtx_cost (op0, MULT, 0, speed)
4646               + rtx_cost (op1, MULT, 1, speed);
4647
4648       if (speed)
4649         {
4650           if (maybe_fma)
4651             /* MADD.  */
4652             cost += extra_cost->mult[mode == DImode].add;
4653           else
4654             /* MUL.  */
4655             cost += extra_cost->mult[mode == DImode].simple;
4656         }
4657
4658       return cost;
4659     }
4660   else
4661     {
4662       if (speed)
4663         {
4664           /* Floating-point FMA can also support negations of the
4665              operands.  */
4666           if (GET_CODE (op0) == NEG)
4667             {
4668               maybe_fma = true;
4669               op0 = XEXP (op0, 0);
4670             }
4671           if (GET_CODE (op1) == NEG)
4672             {
4673               maybe_fma = true;
4674               op1 = XEXP (op1, 0);
4675             }
4676
4677           if (maybe_fma)
4678             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4679             cost += extra_cost->fp[mode == DFmode].fma;
4680           else
4681             /* FMUL.  */
4682             cost += extra_cost->fp[mode == DFmode].mult;
4683         }
4684
4685       cost += rtx_cost (op0, MULT, 0, speed)
4686               + rtx_cost (op1, MULT, 1, speed);
4687       return cost;
4688     }
4689 }
4690
4691 static int
4692 aarch64_address_cost (rtx x,
4693                       enum machine_mode mode,
4694                       addr_space_t as ATTRIBUTE_UNUSED,
4695                       bool speed)
4696 {
4697   enum rtx_code c = GET_CODE (x);
4698   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4699   struct aarch64_address_info info;
4700   int cost = 0;
4701   info.shift = 0;
4702
4703   if (!aarch64_classify_address (&info, x, mode, c, false))
4704     {
4705       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4706         {
4707           /* This is a CONST or SYMBOL ref which will be split
4708              in a different way depending on the code model in use.
4709              Cost it through the generic infrastructure.  */
4710           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4711           /* Divide through by the cost of one instruction to
4712              bring it to the same units as the address costs.  */
4713           cost_symbol_ref /= COSTS_N_INSNS (1);
4714           /* The cost is then the cost of preparing the address,
4715              followed by an immediate (possibly 0) offset.  */
4716           return cost_symbol_ref + addr_cost->imm_offset;
4717         }
4718       else
4719         {
4720           /* This is most likely a jump table from a case
4721              statement.  */
4722           return addr_cost->register_offset;
4723         }
4724     }
4725
4726   switch (info.type)
4727     {
4728       case ADDRESS_LO_SUM:
4729       case ADDRESS_SYMBOLIC:
4730       case ADDRESS_REG_IMM:
4731         cost += addr_cost->imm_offset;
4732         break;
4733
4734       case ADDRESS_REG_WB:
4735         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4736           cost += addr_cost->pre_modify;
4737         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4738           cost += addr_cost->post_modify;
4739         else
4740           gcc_unreachable ();
4741
4742         break;
4743
4744       case ADDRESS_REG_REG:
4745         cost += addr_cost->register_offset;
4746         break;
4747
4748       case ADDRESS_REG_UXTW:
4749       case ADDRESS_REG_SXTW:
4750         cost += addr_cost->register_extend;
4751         break;
4752
4753       default:
4754         gcc_unreachable ();
4755     }
4756
4757
4758   if (info.shift > 0)
4759     {
4760       /* For the sake of calculating the cost of the shifted register
4761          component, we can treat same sized modes in the same way.  */
4762       switch (GET_MODE_BITSIZE (mode))
4763         {
4764           case 16:
4765             cost += addr_cost->addr_scale_costs.hi;
4766             break;
4767
4768           case 32:
4769             cost += addr_cost->addr_scale_costs.si;
4770             break;
4771
4772           case 64:
4773             cost += addr_cost->addr_scale_costs.di;
4774             break;
4775
4776           /* We can't tell, or this is a 128-bit vector.  */
4777           default:
4778             cost += addr_cost->addr_scale_costs.ti;
4779             break;
4780         }
4781     }
4782
4783   return cost;
4784 }
4785
4786 /* Calculate the cost of calculating X, storing it in *COST.  Result
4787    is true if the total cost of the operation has now been calculated.  */
4788 static bool
4789 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4790                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4791 {
4792   rtx op0, op1;
4793   const struct cpu_cost_table *extra_cost
4794     = aarch64_tune_params->insn_extra_cost;
4795   enum machine_mode mode = GET_MODE (x);
4796
4797   /* By default, assume that everything has equivalent cost to the
4798      cheapest instruction.  Any additional costs are applied as a delta
4799      above this default.  */
4800   *cost = COSTS_N_INSNS (1);
4801
4802   /* TODO: The cost infrastructure currently does not handle
4803      vector operations.  Assume that all vector operations
4804      are equally expensive.  */
4805   if (VECTOR_MODE_P (mode))
4806     {
4807       if (speed)
4808         *cost += extra_cost->vect.alu;
4809       return true;
4810     }
4811
4812   switch (code)
4813     {
4814     case SET:
4815       /* The cost depends entirely on the operands to SET.  */
4816       *cost = 0;
4817       op0 = SET_DEST (x);
4818       op1 = SET_SRC (x);
4819
4820       switch (GET_CODE (op0))
4821         {
4822         case MEM:
4823           if (speed)
4824             {
4825               rtx address = XEXP (op0, 0);
4826               if (GET_MODE_CLASS (mode) == MODE_INT)
4827                 *cost += extra_cost->ldst.store;
4828               else if (mode == SFmode)
4829                 *cost += extra_cost->ldst.storef;
4830               else if (mode == DFmode)
4831                 *cost += extra_cost->ldst.stored;
4832
4833               *cost +=
4834                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4835                                                      0, speed));
4836             }
4837
4838           *cost += rtx_cost (op1, SET, 1, speed);
4839           return true;
4840
4841         case SUBREG:
4842           if (! REG_P (SUBREG_REG (op0)))
4843             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4844
4845           /* Fall through.  */
4846         case REG:
4847           /* const0_rtx is in general free, but we will use an
4848              instruction to set a register to 0.  */
4849           if (REG_P (op1) || op1 == const0_rtx)
4850             {
4851               /* The cost is 1 per register copied.  */
4852               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4853                               / UNITS_PER_WORD;
4854               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4855             }
4856           else
4857             /* Cost is just the cost of the RHS of the set.  */
4858             *cost += rtx_cost (op1, SET, 1, speed);
4859           return true;
4860
4861         case ZERO_EXTRACT:
4862         case SIGN_EXTRACT:
4863           /* Bit-field insertion.  Strip any redundant widening of
4864              the RHS to meet the width of the target.  */
4865           if (GET_CODE (op1) == SUBREG)
4866             op1 = SUBREG_REG (op1);
4867           if ((GET_CODE (op1) == ZERO_EXTEND
4868                || GET_CODE (op1) == SIGN_EXTEND)
4869               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4870               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4871                   >= INTVAL (XEXP (op0, 1))))
4872             op1 = XEXP (op1, 0);
4873
4874           if (CONST_INT_P (op1))
4875             {
4876               /* MOV immediate is assumed to always be cheap.  */
4877               *cost = COSTS_N_INSNS (1);
4878             }
4879           else
4880             {
4881               /* BFM.  */
4882               if (speed)
4883                 *cost += extra_cost->alu.bfi;
4884               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4885             }
4886
4887           return true;
4888
4889         default:
4890           /* We can't make sense of this, assume default cost.  */
4891           *cost = COSTS_N_INSNS (1);
4892           break;
4893         }
4894       return false;
4895
4896     case CONST_INT:
4897       /* If an instruction can incorporate a constant within the
4898          instruction, the instruction's expression avoids calling
4899          rtx_cost() on the constant.  If rtx_cost() is called on a
4900          constant, then it is usually because the constant must be
4901          moved into a register by one or more instructions.
4902
4903          The exception is constant 0, which can be expressed
4904          as XZR/WZR and is therefore free.  The exception to this is
4905          if we have (set (reg) (const0_rtx)) in which case we must cost
4906          the move.  However, we can catch that when we cost the SET, so
4907          we don't need to consider that here.  */
4908       if (x == const0_rtx)
4909         *cost = 0;
4910       else
4911         {
4912           /* To an approximation, building any other constant is
4913              proportionally expensive to the number of instructions
4914              required to build that constant.  This is true whether we
4915              are compiling for SPEED or otherwise.  */
4916           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4917                                                          INTVAL (x),
4918                                                          false));
4919         }
4920       return true;
4921
4922     case CONST_DOUBLE:
4923       if (speed)
4924         {
4925           /* mov[df,sf]_aarch64.  */
4926           if (aarch64_float_const_representable_p (x))
4927             /* FMOV (scalar immediate).  */
4928             *cost += extra_cost->fp[mode == DFmode].fpconst;
4929           else if (!aarch64_float_const_zero_rtx_p (x))
4930             {
4931               /* This will be a load from memory.  */
4932               if (mode == DFmode)
4933                 *cost += extra_cost->ldst.loadd;
4934               else
4935                 *cost += extra_cost->ldst.loadf;
4936             }
4937           else
4938             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
4939                or MOV v0.s[0], wzr - neither of which are modeled by the
4940                cost tables.  Just use the default cost.  */
4941             {
4942             }
4943         }
4944
4945       return true;
4946
4947     case MEM:
4948       if (speed)
4949         {
4950           /* For loads we want the base cost of a load, plus an
4951              approximation for the additional cost of the addressing
4952              mode.  */
4953           rtx address = XEXP (x, 0);
4954           if (GET_MODE_CLASS (mode) == MODE_INT)
4955             *cost += extra_cost->ldst.load;
4956           else if (mode == SFmode)
4957             *cost += extra_cost->ldst.loadf;
4958           else if (mode == DFmode)
4959             *cost += extra_cost->ldst.loadd;
4960
4961           *cost +=
4962                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4963                                                      0, speed));
4964         }
4965
4966       return true;
4967
4968     case NEG:
4969       op0 = XEXP (x, 0);
4970
4971       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4972        {
4973           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4974               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4975             {
4976               /* CSETM.  */
4977               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4978               return true;
4979             }
4980
4981           /* Cost this as SUB wzr, X.  */
4982           op0 = CONST0_RTX (GET_MODE (x));
4983           op1 = XEXP (x, 0);
4984           goto cost_minus;
4985         }
4986
4987       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4988         {
4989           /* Support (neg(fma...)) as a single instruction only if
4990              sign of zeros is unimportant.  This matches the decision
4991              making in aarch64.md.  */
4992           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4993             {
4994               /* FNMADD.  */
4995               *cost = rtx_cost (op0, NEG, 0, speed);
4996               return true;
4997             }
4998           if (speed)
4999             /* FNEG.  */
5000             *cost += extra_cost->fp[mode == DFmode].neg;
5001           return false;
5002         }
5003
5004       return false;
5005
5006     case COMPARE:
5007       op0 = XEXP (x, 0);
5008       op1 = XEXP (x, 1);
5009
5010       if (op1 == const0_rtx
5011           && GET_CODE (op0) == AND)
5012         {
5013           x = op0;
5014           goto cost_logic;
5015         }
5016
5017       /* Comparisons can work if the order is swapped.
5018          Canonicalization puts the more complex operation first, but
5019          we want it in op1.  */
5020       if (! (REG_P (op0)
5021              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5022         {
5023           op0 = XEXP (x, 1);
5024           op1 = XEXP (x, 0);
5025         }
5026       goto cost_minus;
5027
5028     case MINUS:
5029       {
5030         op0 = XEXP (x, 0);
5031         op1 = XEXP (x, 1);
5032
5033 cost_minus:
5034         /* Detect valid immediates.  */
5035         if ((GET_MODE_CLASS (mode) == MODE_INT
5036              || (GET_MODE_CLASS (mode) == MODE_CC
5037                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5038             && CONST_INT_P (op1)
5039             && aarch64_uimm12_shift (INTVAL (op1)))
5040           {
5041             *cost += rtx_cost (op0, MINUS, 0, speed);
5042
5043             if (speed)
5044               /* SUB(S) (immediate).  */
5045               *cost += extra_cost->alu.arith;
5046             return true;
5047
5048           }
5049
5050         rtx new_op1 = aarch64_strip_extend (op1);
5051
5052         /* Cost this as an FMA-alike operation.  */
5053         if ((GET_CODE (new_op1) == MULT
5054              || GET_CODE (new_op1) == ASHIFT)
5055             && code != COMPARE)
5056           {
5057             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5058                                             (enum rtx_code) code,
5059                                             speed);
5060             *cost += rtx_cost (op0, MINUS, 0, speed);
5061             return true;
5062           }
5063
5064         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5065
5066         if (speed)
5067           {
5068             if (GET_MODE_CLASS (mode) == MODE_INT)
5069               /* SUB(S).  */
5070               *cost += extra_cost->alu.arith;
5071             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5072               /* FSUB.  */
5073               *cost += extra_cost->fp[mode == DFmode].addsub;
5074           }
5075         return true;
5076       }
5077
5078     case PLUS:
5079       {
5080         rtx new_op0;
5081
5082         op0 = XEXP (x, 0);
5083         op1 = XEXP (x, 1);
5084
5085         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5086             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5087           {
5088             /* CSINC.  */
5089             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5090             *cost += rtx_cost (op1, PLUS, 1, speed);
5091             return true;
5092           }
5093
5094         if (GET_MODE_CLASS (mode) == MODE_INT
5095             && CONST_INT_P (op1)
5096             && aarch64_uimm12_shift (INTVAL (op1)))
5097           {
5098             *cost += rtx_cost (op0, PLUS, 0, speed);
5099
5100             if (speed)
5101               /* ADD (immediate).  */
5102               *cost += extra_cost->alu.arith;
5103             return true;
5104           }
5105
5106         /* Strip any extend, leave shifts behind as we will
5107            cost them through mult_cost.  */
5108         new_op0 = aarch64_strip_extend (op0);
5109
5110         if (GET_CODE (new_op0) == MULT
5111             || GET_CODE (new_op0) == ASHIFT)
5112           {
5113             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5114                                             speed);
5115             *cost += rtx_cost (op1, PLUS, 1, speed);
5116             return true;
5117           }
5118
5119         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5120                   + rtx_cost (op1, PLUS, 1, speed));
5121
5122         if (speed)
5123           {
5124             if (GET_MODE_CLASS (mode) == MODE_INT)
5125               /* ADD.  */
5126               *cost += extra_cost->alu.arith;
5127             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5128               /* FADD.  */
5129               *cost += extra_cost->fp[mode == DFmode].addsub;
5130           }
5131         return true;
5132       }
5133
5134     case IOR:
5135     case XOR:
5136     case AND:
5137     cost_logic:
5138       op0 = XEXP (x, 0);
5139       op1 = XEXP (x, 1);
5140
5141       if (code == AND
5142           && GET_CODE (op0) == MULT
5143           && CONST_INT_P (XEXP (op0, 1))
5144           && CONST_INT_P (op1)
5145           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5146                                INTVAL (op1)) != 0)
5147         {
5148           /* This is a UBFM/SBFM.  */
5149           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5150           if (speed)
5151             *cost += extra_cost->alu.bfx;
5152           return true;
5153         }
5154
5155       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5156         {
5157           /* We possibly get the immediate for free, this is not
5158              modelled.  */
5159           if (CONST_INT_P (op1)
5160               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5161             {
5162               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5163
5164               if (speed)
5165                 *cost += extra_cost->alu.logical;
5166
5167               return true;
5168             }
5169           else
5170             {
5171               rtx new_op0 = op0;
5172
5173               /* Handle ORN, EON, or BIC.  */
5174               if (GET_CODE (op0) == NOT)
5175                 op0 = XEXP (op0, 0);
5176
5177               new_op0 = aarch64_strip_shift (op0);
5178
5179               /* If we had a shift on op0 then this is a logical-shift-
5180                  by-register/immediate operation.  Otherwise, this is just
5181                  a logical operation.  */
5182               if (speed)
5183                 {
5184                   if (new_op0 != op0)
5185                     {
5186                       /* Shift by immediate.  */
5187                       if (CONST_INT_P (XEXP (op0, 1)))
5188                         *cost += extra_cost->alu.log_shift;
5189                       else
5190                         *cost += extra_cost->alu.log_shift_reg;
5191                     }
5192                   else
5193                     *cost += extra_cost->alu.logical;
5194                 }
5195
5196               /* In both cases we want to cost both operands.  */
5197               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5198                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5199
5200               return true;
5201             }
5202         }
5203       return false;
5204
5205     case NOT:
5206       /* MVN.  */
5207       if (speed)
5208         *cost += extra_cost->alu.logical;
5209
5210       /* The logical instruction could have the shifted register form,
5211          but the cost is the same if the shift is processed as a separate
5212          instruction, so we don't bother with it here.  */
5213       return false;
5214
5215     case ZERO_EXTEND:
5216       if ((GET_MODE (x) == DImode
5217            && GET_MODE (XEXP (x, 0)) == SImode)
5218           || GET_CODE (XEXP (x, 0)) == MEM)
5219         {
5220           *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5221           return true;
5222         }
5223       return false;
5224
5225     case SIGN_EXTEND:
5226       if (GET_CODE (XEXP (x, 0)) == MEM)
5227         {
5228           *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
5229           return true;
5230         }
5231       return false;
5232
5233     case ROTATE:
5234       if (!CONST_INT_P (XEXP (x, 1)))
5235         *cost += COSTS_N_INSNS (2);
5236       /* Fall through.  */
5237     case ROTATERT:
5238     case LSHIFTRT:
5239     case ASHIFT:
5240     case ASHIFTRT:
5241
5242       /* Shifting by a register often takes an extra cycle.  */
5243       if (speed && !CONST_INT_P (XEXP (x, 1)))
5244         *cost += extra_cost->alu.arith_shift_reg;
5245
5246       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5247       return true;
5248
5249     case HIGH:
5250       if (!CONSTANT_P (XEXP (x, 0)))
5251         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5252       return true;
5253
5254     case LO_SUM:
5255       if (!CONSTANT_P (XEXP (x, 1)))
5256         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5257       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5258       return true;
5259
5260     case ZERO_EXTRACT:
5261     case SIGN_EXTRACT:
5262       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5263       return true;
5264
5265     case MULT:
5266       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5267       /* aarch64_rtx_mult_cost always handles recursion to its
5268          operands.  */
5269       return true;
5270
5271     case MOD:
5272     case UMOD:
5273       *cost = COSTS_N_INSNS (2);
5274       if (speed)
5275         {
5276           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5277             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5278                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5279           else if (GET_MODE (x) == DFmode)
5280             *cost += (extra_cost->fp[1].mult
5281                       + extra_cost->fp[1].div);
5282           else if (GET_MODE (x) == SFmode)
5283             *cost += (extra_cost->fp[0].mult
5284                       + extra_cost->fp[0].div);
5285         }
5286       return false;  /* All arguments need to be in registers.  */
5287
5288     case DIV:
5289     case UDIV:
5290       *cost = COSTS_N_INSNS (1);
5291       if (speed)
5292         {
5293           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5294             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5295           else if (GET_MODE (x) == DFmode)
5296             *cost += extra_cost->fp[1].div;
5297           else if (GET_MODE (x) == SFmode)
5298             *cost += extra_cost->fp[0].div;
5299         }
5300       return false;  /* All arguments need to be in registers.  */
5301
5302     default:
5303       break;
5304     }
5305   return false;
5306 }
5307
5308 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5309    calculated for X.  This cost is stored in *COST.  Returns true
5310    if the total cost of X was calculated.  */
5311 static bool
5312 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5313                    int param, int *cost, bool speed)
5314 {
5315   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5316
5317   if (dump_file && (dump_flags & TDF_DETAILS))
5318     {
5319       print_rtl_single (dump_file, x);
5320       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5321                speed ? "Hot" : "Cold",
5322                *cost, result ? "final" : "partial");
5323     }
5324
5325   return result;
5326 }
5327
5328 static int
5329 aarch64_register_move_cost (enum machine_mode mode,
5330                             reg_class_t from_i, reg_class_t to_i)
5331 {
5332   enum reg_class from = (enum reg_class) from_i;
5333   enum reg_class to = (enum reg_class) to_i;
5334   const struct cpu_regmove_cost *regmove_cost
5335     = aarch64_tune_params->regmove_cost;
5336
5337   /* Moving between GPR and stack cost is the same as GP2GP.  */
5338   if ((from == GENERAL_REGS && to == STACK_REG)
5339       || (to == GENERAL_REGS && from == STACK_REG))
5340     return regmove_cost->GP2GP;
5341
5342   /* To/From the stack register, we move via the gprs.  */
5343   if (to == STACK_REG || from == STACK_REG)
5344     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5345             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5346
5347   if (from == GENERAL_REGS && to == GENERAL_REGS)
5348     return regmove_cost->GP2GP;
5349   else if (from == GENERAL_REGS)
5350     return regmove_cost->GP2FP;
5351   else if (to == GENERAL_REGS)
5352     return regmove_cost->FP2GP;
5353
5354   /* When AdvSIMD instructions are disabled it is not possible to move
5355      a 128-bit value directly between Q registers.  This is handled in
5356      secondary reload.  A general register is used as a scratch to move
5357      the upper DI value and the lower DI value is moved directly,
5358      hence the cost is the sum of three moves. */
5359   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5360     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5361
5362   return regmove_cost->FP2FP;
5363 }
5364
5365 static int
5366 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5367                           reg_class_t rclass ATTRIBUTE_UNUSED,
5368                           bool in ATTRIBUTE_UNUSED)
5369 {
5370   return aarch64_tune_params->memmov_cost;
5371 }
5372
5373 /* Return the number of instructions that can be issued per cycle.  */
5374 static int
5375 aarch64_sched_issue_rate (void)
5376 {
5377   return aarch64_tune_params->issue_rate;
5378 }
5379
5380 /* Vectorizer cost model target hooks.  */
5381
5382 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5383 static int
5384 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5385                                     tree vectype,
5386                                     int misalign ATTRIBUTE_UNUSED)
5387 {
5388   unsigned elements;
5389
5390   switch (type_of_cost)
5391     {
5392       case scalar_stmt:
5393         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5394
5395       case scalar_load:
5396         return aarch64_tune_params->vec_costs->scalar_load_cost;
5397
5398       case scalar_store:
5399         return aarch64_tune_params->vec_costs->scalar_store_cost;
5400
5401       case vector_stmt:
5402         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5403
5404       case vector_load:
5405         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5406
5407       case vector_store:
5408         return aarch64_tune_params->vec_costs->vec_store_cost;
5409
5410       case vec_to_scalar:
5411         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5412
5413       case scalar_to_vec:
5414         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5415
5416       case unaligned_load:
5417         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5418
5419       case unaligned_store:
5420         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5421
5422       case cond_branch_taken:
5423         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5424
5425       case cond_branch_not_taken:
5426         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5427
5428       case vec_perm:
5429       case vec_promote_demote:
5430         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5431
5432       case vec_construct:
5433         elements = TYPE_VECTOR_SUBPARTS (vectype);
5434         return elements / 2 + 1;
5435
5436       default:
5437         gcc_unreachable ();
5438     }
5439 }
5440
5441 /* Implement targetm.vectorize.add_stmt_cost.  */
5442 static unsigned
5443 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5444                        struct _stmt_vec_info *stmt_info, int misalign,
5445                        enum vect_cost_model_location where)
5446 {
5447   unsigned *cost = (unsigned *) data;
5448   unsigned retval = 0;
5449
5450   if (flag_vect_cost_model)
5451     {
5452       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5453       int stmt_cost =
5454             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5455
5456       /* Statements in an inner loop relative to the loop being
5457          vectorized are weighted more heavily.  The value here is
5458          a function (linear for now) of the loop nest level.  */
5459       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5460         {
5461           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5462           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5463           unsigned nest_level = loop_depth (loop);
5464
5465           count *= nest_level;
5466         }
5467
5468       retval = (unsigned) (count * stmt_cost);
5469       cost[where] += retval;
5470     }
5471
5472   return retval;
5473 }
5474
5475 static void initialize_aarch64_code_model (void);
5476
5477 /* Parse the architecture extension string.  */
5478
5479 static void
5480 aarch64_parse_extension (char *str)
5481 {
5482   /* The extension string is parsed left to right.  */
5483   const struct aarch64_option_extension *opt = NULL;
5484
5485   /* Flag to say whether we are adding or removing an extension.  */
5486   int adding_ext = -1;
5487
5488   while (str != NULL && *str != 0)
5489     {
5490       char *ext;
5491       size_t len;
5492
5493       str++;
5494       ext = strchr (str, '+');
5495
5496       if (ext != NULL)
5497         len = ext - str;
5498       else
5499         len = strlen (str);
5500
5501       if (len >= 2 && strncmp (str, "no", 2) == 0)
5502         {
5503           adding_ext = 0;
5504           len -= 2;
5505           str += 2;
5506         }
5507       else if (len > 0)
5508         adding_ext = 1;
5509
5510       if (len == 0)
5511         {
5512           error ("missing feature modifier after %qs", "+no");
5513           return;
5514         }
5515
5516       /* Scan over the extensions table trying to find an exact match.  */
5517       for (opt = all_extensions; opt->name != NULL; opt++)
5518         {
5519           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5520             {
5521               /* Add or remove the extension.  */
5522               if (adding_ext)
5523                 aarch64_isa_flags |= opt->flags_on;
5524               else
5525                 aarch64_isa_flags &= ~(opt->flags_off);
5526               break;
5527             }
5528         }
5529
5530       if (opt->name == NULL)
5531         {
5532           /* Extension not found in list.  */
5533           error ("unknown feature modifier %qs", str);
5534           return;
5535         }
5536
5537       str = ext;
5538     };
5539
5540   return;
5541 }
5542
5543 /* Parse the ARCH string.  */
5544
5545 static void
5546 aarch64_parse_arch (void)
5547 {
5548   char *ext;
5549   const struct processor *arch;
5550   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5551   size_t len;
5552
5553   strcpy (str, aarch64_arch_string);
5554
5555   ext = strchr (str, '+');
5556
5557   if (ext != NULL)
5558     len = ext - str;
5559   else
5560     len = strlen (str);
5561
5562   if (len == 0)
5563     {
5564       error ("missing arch name in -march=%qs", str);
5565       return;
5566     }
5567
5568   /* Loop through the list of supported ARCHs to find a match.  */
5569   for (arch = all_architectures; arch->name != NULL; arch++)
5570     {
5571       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5572         {
5573           selected_arch = arch;
5574           aarch64_isa_flags = selected_arch->flags;
5575
5576           if (!selected_cpu)
5577             selected_cpu = &all_cores[selected_arch->core];
5578
5579           if (ext != NULL)
5580             {
5581               /* ARCH string contains at least one extension.  */
5582               aarch64_parse_extension (ext);
5583             }
5584
5585           if (strcmp (selected_arch->arch, selected_cpu->arch))
5586             {
5587               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5588                        selected_cpu->name, selected_arch->name);
5589             }
5590
5591           return;
5592         }
5593     }
5594
5595   /* ARCH name not found in list.  */
5596   error ("unknown value %qs for -march", str);
5597   return;
5598 }
5599
5600 /* Parse the CPU string.  */
5601
5602 static void
5603 aarch64_parse_cpu (void)
5604 {
5605   char *ext;
5606   const struct processor *cpu;
5607   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5608   size_t len;
5609
5610   strcpy (str, aarch64_cpu_string);
5611
5612   ext = strchr (str, '+');
5613
5614   if (ext != NULL)
5615     len = ext - str;
5616   else
5617     len = strlen (str);
5618
5619   if (len == 0)
5620     {
5621       error ("missing cpu name in -mcpu=%qs", str);
5622       return;
5623     }
5624
5625   /* Loop through the list of supported CPUs to find a match.  */
5626   for (cpu = all_cores; cpu->name != NULL; cpu++)
5627     {
5628       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5629         {
5630           selected_cpu = cpu;
5631           selected_tune = cpu;
5632           aarch64_isa_flags = selected_cpu->flags;
5633
5634           if (ext != NULL)
5635             {
5636               /* CPU string contains at least one extension.  */
5637               aarch64_parse_extension (ext);
5638             }
5639
5640           return;
5641         }
5642     }
5643
5644   /* CPU name not found in list.  */
5645   error ("unknown value %qs for -mcpu", str);
5646   return;
5647 }
5648
5649 /* Parse the TUNE string.  */
5650
5651 static void
5652 aarch64_parse_tune (void)
5653 {
5654   const struct processor *cpu;
5655   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5656   strcpy (str, aarch64_tune_string);
5657
5658   /* Loop through the list of supported CPUs to find a match.  */
5659   for (cpu = all_cores; cpu->name != NULL; cpu++)
5660     {
5661       if (strcmp (cpu->name, str) == 0)
5662         {
5663           selected_tune = cpu;
5664           return;
5665         }
5666     }
5667
5668   /* CPU name not found in list.  */
5669   error ("unknown value %qs for -mtune", str);
5670   return;
5671 }
5672
5673
5674 /* Implement TARGET_OPTION_OVERRIDE.  */
5675
5676 static void
5677 aarch64_override_options (void)
5678 {
5679   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5680      If either of -march or -mtune is given, they override their
5681      respective component of -mcpu.
5682
5683      So, first parse AARCH64_CPU_STRING, then the others, be careful
5684      with -march as, if -mcpu is not present on the command line, march
5685      must set a sensible default CPU.  */
5686   if (aarch64_cpu_string)
5687     {
5688       aarch64_parse_cpu ();
5689     }
5690
5691   if (aarch64_arch_string)
5692     {
5693       aarch64_parse_arch ();
5694     }
5695
5696   if (aarch64_tune_string)
5697     {
5698       aarch64_parse_tune ();
5699     }
5700
5701 #ifndef HAVE_AS_MABI_OPTION
5702   /* The compiler may have been configured with 2.23.* binutils, which does
5703      not have support for ILP32.  */
5704   if (TARGET_ILP32)
5705     error ("Assembler does not support -mabi=ilp32");
5706 #endif
5707
5708   initialize_aarch64_code_model ();
5709
5710   aarch64_build_bitmask_table ();
5711
5712   /* This target defaults to strict volatile bitfields.  */
5713   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5714     flag_strict_volatile_bitfields = 1;
5715
5716   /* If the user did not specify a processor, choose the default
5717      one for them.  This will be the CPU set during configuration using
5718      --with-cpu, otherwise it is "generic".  */
5719   if (!selected_cpu)
5720     {
5721       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5722       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5723     }
5724
5725   gcc_assert (selected_cpu);
5726
5727   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5728   if (!selected_tune)
5729     selected_tune = &all_cores[selected_cpu->core];
5730
5731   aarch64_tune_flags = selected_tune->flags;
5732   aarch64_tune = selected_tune->core;
5733   aarch64_tune_params = selected_tune->tune;
5734
5735   if (aarch64_fix_a53_err835769 == 2)
5736     {
5737 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5738       aarch64_fix_a53_err835769 = 1;
5739 #else
5740       aarch64_fix_a53_err835769 = 0;
5741 #endif
5742     }
5743
5744   aarch64_override_options_after_change ();
5745 }
5746
5747 /* Implement targetm.override_options_after_change.  */
5748
5749 static void
5750 aarch64_override_options_after_change (void)
5751 {
5752   if (flag_omit_frame_pointer)
5753     flag_omit_leaf_frame_pointer = false;
5754   else if (flag_omit_leaf_frame_pointer)
5755     flag_omit_frame_pointer = true;
5756 }
5757
5758 static struct machine_function *
5759 aarch64_init_machine_status (void)
5760 {
5761   struct machine_function *machine;
5762   machine = ggc_alloc_cleared_machine_function ();
5763   return machine;
5764 }
5765
5766 void
5767 aarch64_init_expanders (void)
5768 {
5769   init_machine_status = aarch64_init_machine_status;
5770 }
5771
5772 /* A checking mechanism for the implementation of the various code models.  */
5773 static void
5774 initialize_aarch64_code_model (void)
5775 {
5776    if (flag_pic)
5777      {
5778        switch (aarch64_cmodel_var)
5779          {
5780          case AARCH64_CMODEL_TINY:
5781            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5782            break;
5783          case AARCH64_CMODEL_SMALL:
5784            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5785            break;
5786          case AARCH64_CMODEL_LARGE:
5787            sorry ("code model %qs with -f%s", "large",
5788                   flag_pic > 1 ? "PIC" : "pic");
5789          default:
5790            gcc_unreachable ();
5791          }
5792      }
5793    else
5794      aarch64_cmodel = aarch64_cmodel_var;
5795 }
5796
5797 /* Return true if SYMBOL_REF X binds locally.  */
5798
5799 static bool
5800 aarch64_symbol_binds_local_p (const_rtx x)
5801 {
5802   return (SYMBOL_REF_DECL (x)
5803           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5804           : SYMBOL_REF_LOCAL_P (x));
5805 }
5806
5807 /* Return true if SYMBOL_REF X is thread local */
5808 static bool
5809 aarch64_tls_symbol_p (rtx x)
5810 {
5811   if (! TARGET_HAVE_TLS)
5812     return false;
5813
5814   if (GET_CODE (x) != SYMBOL_REF)
5815     return false;
5816
5817   return SYMBOL_REF_TLS_MODEL (x) != 0;
5818 }
5819
5820 /* Classify a TLS symbol into one of the TLS kinds.  */
5821 enum aarch64_symbol_type
5822 aarch64_classify_tls_symbol (rtx x)
5823 {
5824   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5825
5826   switch (tls_kind)
5827     {
5828     case TLS_MODEL_GLOBAL_DYNAMIC:
5829     case TLS_MODEL_LOCAL_DYNAMIC:
5830       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5831
5832     case TLS_MODEL_INITIAL_EXEC:
5833       return SYMBOL_SMALL_GOTTPREL;
5834
5835     case TLS_MODEL_LOCAL_EXEC:
5836       return SYMBOL_SMALL_TPREL;
5837
5838     case TLS_MODEL_EMULATED:
5839     case TLS_MODEL_NONE:
5840       return SYMBOL_FORCE_TO_MEM;
5841
5842     default:
5843       gcc_unreachable ();
5844     }
5845 }
5846
5847 /* Return the method that should be used to access SYMBOL_REF or
5848    LABEL_REF X in context CONTEXT.  */
5849
5850 enum aarch64_symbol_type
5851 aarch64_classify_symbol (rtx x,
5852                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5853 {
5854   if (GET_CODE (x) == LABEL_REF)
5855     {
5856       switch (aarch64_cmodel)
5857         {
5858         case AARCH64_CMODEL_LARGE:
5859           return SYMBOL_FORCE_TO_MEM;
5860
5861         case AARCH64_CMODEL_TINY_PIC:
5862         case AARCH64_CMODEL_TINY:
5863           return SYMBOL_TINY_ABSOLUTE;
5864
5865         case AARCH64_CMODEL_SMALL_PIC:
5866         case AARCH64_CMODEL_SMALL:
5867           return SYMBOL_SMALL_ABSOLUTE;
5868
5869         default:
5870           gcc_unreachable ();
5871         }
5872     }
5873
5874   if (GET_CODE (x) == SYMBOL_REF)
5875     {
5876       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5877           return SYMBOL_FORCE_TO_MEM;
5878
5879       if (aarch64_tls_symbol_p (x))
5880         return aarch64_classify_tls_symbol (x);
5881
5882       switch (aarch64_cmodel)
5883         {
5884         case AARCH64_CMODEL_TINY:
5885           if (SYMBOL_REF_WEAK (x))
5886             return SYMBOL_FORCE_TO_MEM;
5887           return SYMBOL_TINY_ABSOLUTE;
5888
5889         case AARCH64_CMODEL_SMALL:
5890           if (SYMBOL_REF_WEAK (x))
5891             return SYMBOL_FORCE_TO_MEM;
5892           return SYMBOL_SMALL_ABSOLUTE;
5893
5894         case AARCH64_CMODEL_TINY_PIC:
5895           if (!aarch64_symbol_binds_local_p (x))
5896             return SYMBOL_TINY_GOT;
5897           return SYMBOL_TINY_ABSOLUTE;
5898
5899         case AARCH64_CMODEL_SMALL_PIC:
5900           if (!aarch64_symbol_binds_local_p (x))
5901             return SYMBOL_SMALL_GOT;
5902           return SYMBOL_SMALL_ABSOLUTE;
5903
5904         default:
5905           gcc_unreachable ();
5906         }
5907     }
5908
5909   /* By default push everything into the constant pool.  */
5910   return SYMBOL_FORCE_TO_MEM;
5911 }
5912
5913 bool
5914 aarch64_constant_address_p (rtx x)
5915 {
5916   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5917 }
5918
5919 bool
5920 aarch64_legitimate_pic_operand_p (rtx x)
5921 {
5922   if (GET_CODE (x) == SYMBOL_REF
5923       || (GET_CODE (x) == CONST
5924           && GET_CODE (XEXP (x, 0)) == PLUS
5925           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5926      return false;
5927
5928   return true;
5929 }
5930
5931 /* Return true if X holds either a quarter-precision or
5932      floating-point +0.0 constant.  */
5933 static bool
5934 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5935 {
5936   if (!CONST_DOUBLE_P (x))
5937     return false;
5938
5939   /* TODO: We could handle moving 0.0 to a TFmode register,
5940      but first we would like to refactor the movtf_aarch64
5941      to be more amicable to split moves properly and
5942      correctly gate on TARGET_SIMD.  For now - reject all
5943      constants which are not to SFmode or DFmode registers.  */
5944   if (!(mode == SFmode || mode == DFmode))
5945     return false;
5946
5947   if (aarch64_float_const_zero_rtx_p (x))
5948     return true;
5949   return aarch64_float_const_representable_p (x);
5950 }
5951
5952 static bool
5953 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5954 {
5955   /* Do not allow vector struct mode constants.  We could support
5956      0 and -1 easily, but they need support in aarch64-simd.md.  */
5957   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5958     return false;
5959
5960   /* This could probably go away because
5961      we now decompose CONST_INTs according to expand_mov_immediate.  */
5962   if ((GET_CODE (x) == CONST_VECTOR
5963        && aarch64_simd_valid_immediate (x, mode, false, NULL))
5964       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5965         return !targetm.cannot_force_const_mem (mode, x);
5966
5967   if (GET_CODE (x) == HIGH
5968       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5969     return true;
5970
5971   return aarch64_constant_address_p (x);
5972 }
5973
5974 rtx
5975 aarch64_load_tp (rtx target)
5976 {
5977   if (!target
5978       || GET_MODE (target) != Pmode
5979       || !register_operand (target, Pmode))
5980     target = gen_reg_rtx (Pmode);
5981
5982   /* Can return in any reg.  */
5983   emit_insn (gen_aarch64_load_tp_hard (target));
5984   return target;
5985 }
5986
5987 /* On AAPCS systems, this is the "struct __va_list".  */
5988 static GTY(()) tree va_list_type;
5989
5990 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5991    Return the type to use as __builtin_va_list.
5992
5993    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5994
5995    struct __va_list
5996    {
5997      void *__stack;
5998      void *__gr_top;
5999      void *__vr_top;
6000      int   __gr_offs;
6001      int   __vr_offs;
6002    };  */
6003
6004 static tree
6005 aarch64_build_builtin_va_list (void)
6006 {
6007   tree va_list_name;
6008   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6009
6010   /* Create the type.  */
6011   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6012   /* Give it the required name.  */
6013   va_list_name = build_decl (BUILTINS_LOCATION,
6014                              TYPE_DECL,
6015                              get_identifier ("__va_list"),
6016                              va_list_type);
6017   DECL_ARTIFICIAL (va_list_name) = 1;
6018   TYPE_NAME (va_list_type) = va_list_name;
6019   TYPE_STUB_DECL (va_list_type) = va_list_name;
6020
6021   /* Create the fields.  */
6022   f_stack = build_decl (BUILTINS_LOCATION,
6023                         FIELD_DECL, get_identifier ("__stack"),
6024                         ptr_type_node);
6025   f_grtop = build_decl (BUILTINS_LOCATION,
6026                         FIELD_DECL, get_identifier ("__gr_top"),
6027                         ptr_type_node);
6028   f_vrtop = build_decl (BUILTINS_LOCATION,
6029                         FIELD_DECL, get_identifier ("__vr_top"),
6030                         ptr_type_node);
6031   f_groff = build_decl (BUILTINS_LOCATION,
6032                         FIELD_DECL, get_identifier ("__gr_offs"),
6033                         integer_type_node);
6034   f_vroff = build_decl (BUILTINS_LOCATION,
6035                         FIELD_DECL, get_identifier ("__vr_offs"),
6036                         integer_type_node);
6037
6038   DECL_ARTIFICIAL (f_stack) = 1;
6039   DECL_ARTIFICIAL (f_grtop) = 1;
6040   DECL_ARTIFICIAL (f_vrtop) = 1;
6041   DECL_ARTIFICIAL (f_groff) = 1;
6042   DECL_ARTIFICIAL (f_vroff) = 1;
6043
6044   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6045   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6046   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6047   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6048   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6049
6050   TYPE_FIELDS (va_list_type) = f_stack;
6051   DECL_CHAIN (f_stack) = f_grtop;
6052   DECL_CHAIN (f_grtop) = f_vrtop;
6053   DECL_CHAIN (f_vrtop) = f_groff;
6054   DECL_CHAIN (f_groff) = f_vroff;
6055
6056   /* Compute its layout.  */
6057   layout_type (va_list_type);
6058
6059   return va_list_type;
6060 }
6061
6062 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6063 static void
6064 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6065 {
6066   const CUMULATIVE_ARGS *cum;
6067   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6068   tree stack, grtop, vrtop, groff, vroff;
6069   tree t;
6070   int gr_save_area_size;
6071   int vr_save_area_size;
6072   int vr_offset;
6073
6074   cum = &crtl->args.info;
6075   gr_save_area_size
6076     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6077   vr_save_area_size
6078     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6079
6080   if (TARGET_GENERAL_REGS_ONLY)
6081     {
6082       if (cum->aapcs_nvrn > 0)
6083         sorry ("%qs and floating point or vector arguments",
6084                "-mgeneral-regs-only");
6085       vr_save_area_size = 0;
6086     }
6087
6088   f_stack = TYPE_FIELDS (va_list_type_node);
6089   f_grtop = DECL_CHAIN (f_stack);
6090   f_vrtop = DECL_CHAIN (f_grtop);
6091   f_groff = DECL_CHAIN (f_vrtop);
6092   f_vroff = DECL_CHAIN (f_groff);
6093
6094   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6095                   NULL_TREE);
6096   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6097                   NULL_TREE);
6098   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6099                   NULL_TREE);
6100   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6101                   NULL_TREE);
6102   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6103                   NULL_TREE);
6104
6105   /* Emit code to initialize STACK, which points to the next varargs stack
6106      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6107      by named arguments.  STACK is 8-byte aligned.  */
6108   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6109   if (cum->aapcs_stack_size > 0)
6110     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6111   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6112   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6113
6114   /* Emit code to initialize GRTOP, the top of the GR save area.
6115      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6116   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6117   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6118   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6119
6120   /* Emit code to initialize VRTOP, the top of the VR save area.
6121      This address is gr_save_area_bytes below GRTOP, rounded
6122      down to the next 16-byte boundary.  */
6123   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6124   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6125                              STACK_BOUNDARY / BITS_PER_UNIT);
6126
6127   if (vr_offset)
6128     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6129   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6130   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6131
6132   /* Emit code to initialize GROFF, the offset from GRTOP of the
6133      next GPR argument.  */
6134   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6135               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6136   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6137
6138   /* Likewise emit code to initialize VROFF, the offset from FTOP
6139      of the next VR argument.  */
6140   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6141               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6142   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6143 }
6144
6145 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6146
6147 static tree
6148 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6149                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6150 {
6151   tree addr;
6152   bool indirect_p;
6153   bool is_ha;           /* is HFA or HVA.  */
6154   bool dw_align;        /* double-word align.  */
6155   enum machine_mode ag_mode = VOIDmode;
6156   int nregs;
6157   enum machine_mode mode;
6158
6159   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6160   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6161   HOST_WIDE_INT size, rsize, adjust, align;
6162   tree t, u, cond1, cond2;
6163
6164   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6165   if (indirect_p)
6166     type = build_pointer_type (type);
6167
6168   mode = TYPE_MODE (type);
6169
6170   f_stack = TYPE_FIELDS (va_list_type_node);
6171   f_grtop = DECL_CHAIN (f_stack);
6172   f_vrtop = DECL_CHAIN (f_grtop);
6173   f_groff = DECL_CHAIN (f_vrtop);
6174   f_vroff = DECL_CHAIN (f_groff);
6175
6176   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6177                   f_stack, NULL_TREE);
6178   size = int_size_in_bytes (type);
6179   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6180
6181   dw_align = false;
6182   adjust = 0;
6183   if (aarch64_vfp_is_call_or_return_candidate (mode,
6184                                                type,
6185                                                &ag_mode,
6186                                                &nregs,
6187                                                &is_ha))
6188     {
6189       /* TYPE passed in fp/simd registers.  */
6190       if (TARGET_GENERAL_REGS_ONLY)
6191         sorry ("%qs and floating point or vector arguments",
6192                "-mgeneral-regs-only");
6193
6194       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6195                       unshare_expr (valist), f_vrtop, NULL_TREE);
6196       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6197                       unshare_expr (valist), f_vroff, NULL_TREE);
6198
6199       rsize = nregs * UNITS_PER_VREG;
6200
6201       if (is_ha)
6202         {
6203           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6204             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6205         }
6206       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6207                && size < UNITS_PER_VREG)
6208         {
6209           adjust = UNITS_PER_VREG - size;
6210         }
6211     }
6212   else
6213     {
6214       /* TYPE passed in general registers.  */
6215       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6216                       unshare_expr (valist), f_grtop, NULL_TREE);
6217       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6218                       unshare_expr (valist), f_groff, NULL_TREE);
6219       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6220       nregs = rsize / UNITS_PER_WORD;
6221
6222       if (align > 8)
6223         dw_align = true;
6224
6225       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6226           && size < UNITS_PER_WORD)
6227         {
6228           adjust = UNITS_PER_WORD  - size;
6229         }
6230     }
6231
6232   /* Get a local temporary for the field value.  */
6233   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6234
6235   /* Emit code to branch if off >= 0.  */
6236   t = build2 (GE_EXPR, boolean_type_node, off,
6237               build_int_cst (TREE_TYPE (off), 0));
6238   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6239
6240   if (dw_align)
6241     {
6242       /* Emit: offs = (offs + 15) & -16.  */
6243       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6244                   build_int_cst (TREE_TYPE (off), 15));
6245       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6246                   build_int_cst (TREE_TYPE (off), -16));
6247       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6248     }
6249   else
6250     roundup = NULL;
6251
6252   /* Update ap.__[g|v]r_offs  */
6253   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6254               build_int_cst (TREE_TYPE (off), rsize));
6255   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6256
6257   /* String up.  */
6258   if (roundup)
6259     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6260
6261   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6262   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6263               build_int_cst (TREE_TYPE (f_off), 0));
6264   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6265
6266   /* String up: make sure the assignment happens before the use.  */
6267   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6268   COND_EXPR_ELSE (cond1) = t;
6269
6270   /* Prepare the trees handling the argument that is passed on the stack;
6271      the top level node will store in ON_STACK.  */
6272   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6273   if (align > 8)
6274     {
6275       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6276       t = fold_convert (intDI_type_node, arg);
6277       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6278                   build_int_cst (TREE_TYPE (t), 15));
6279       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6280                   build_int_cst (TREE_TYPE (t), -16));
6281       t = fold_convert (TREE_TYPE (arg), t);
6282       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6283     }
6284   else
6285     roundup = NULL;
6286   /* Advance ap.__stack  */
6287   t = fold_convert (intDI_type_node, arg);
6288   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6289               build_int_cst (TREE_TYPE (t), size + 7));
6290   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6291               build_int_cst (TREE_TYPE (t), -8));
6292   t = fold_convert (TREE_TYPE (arg), t);
6293   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6294   /* String up roundup and advance.  */
6295   if (roundup)
6296     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6297   /* String up with arg */
6298   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6299   /* Big-endianness related address adjustment.  */
6300   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6301       && size < UNITS_PER_WORD)
6302   {
6303     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6304                 size_int (UNITS_PER_WORD - size));
6305     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6306   }
6307
6308   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6309   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6310
6311   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6312   t = off;
6313   if (adjust)
6314     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6315                 build_int_cst (TREE_TYPE (off), adjust));
6316
6317   t = fold_convert (sizetype, t);
6318   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6319
6320   if (is_ha)
6321     {
6322       /* type ha; // treat as "struct {ftype field[n];}"
6323          ... [computing offs]
6324          for (i = 0; i <nregs; ++i, offs += 16)
6325            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6326          return ha;  */
6327       int i;
6328       tree tmp_ha, field_t, field_ptr_t;
6329
6330       /* Declare a local variable.  */
6331       tmp_ha = create_tmp_var_raw (type, "ha");
6332       gimple_add_tmp_var (tmp_ha);
6333
6334       /* Establish the base type.  */
6335       switch (ag_mode)
6336         {
6337         case SFmode:
6338           field_t = float_type_node;
6339           field_ptr_t = float_ptr_type_node;
6340           break;
6341         case DFmode:
6342           field_t = double_type_node;
6343           field_ptr_t = double_ptr_type_node;
6344           break;
6345         case TFmode:
6346           field_t = long_double_type_node;
6347           field_ptr_t = long_double_ptr_type_node;
6348           break;
6349 /* The half precision and quad precision are not fully supported yet.  Enable
6350    the following code after the support is complete.  Need to find the correct
6351    type node for __fp16 *.  */
6352 #if 0
6353         case HFmode:
6354           field_t = float_type_node;
6355           field_ptr_t = float_ptr_type_node;
6356           break;
6357 #endif
6358         case V2SImode:
6359         case V4SImode:
6360             {
6361               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6362               field_t = build_vector_type_for_mode (innertype, ag_mode);
6363               field_ptr_t = build_pointer_type (field_t);
6364             }
6365           break;
6366         default:
6367           gcc_assert (0);
6368         }
6369
6370       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6371       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6372       addr = t;
6373       t = fold_convert (field_ptr_t, addr);
6374       t = build2 (MODIFY_EXPR, field_t,
6375                   build1 (INDIRECT_REF, field_t, tmp_ha),
6376                   build1 (INDIRECT_REF, field_t, t));
6377
6378       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6379       for (i = 1; i < nregs; ++i)
6380         {
6381           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6382           u = fold_convert (field_ptr_t, addr);
6383           u = build2 (MODIFY_EXPR, field_t,
6384                       build2 (MEM_REF, field_t, tmp_ha,
6385                               build_int_cst (field_ptr_t,
6386                                              (i *
6387                                               int_size_in_bytes (field_t)))),
6388                       build1 (INDIRECT_REF, field_t, u));
6389           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6390         }
6391
6392       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6393       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6394     }
6395
6396   COND_EXPR_ELSE (cond2) = t;
6397   addr = fold_convert (build_pointer_type (type), cond1);
6398   addr = build_va_arg_indirect_ref (addr);
6399
6400   if (indirect_p)
6401     addr = build_va_arg_indirect_ref (addr);
6402
6403   return addr;
6404 }
6405
6406 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6407
6408 static void
6409 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6410                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6411                                 int no_rtl)
6412 {
6413   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6414   CUMULATIVE_ARGS local_cum;
6415   int gr_saved, vr_saved;
6416
6417   /* The caller has advanced CUM up to, but not beyond, the last named
6418      argument.  Advance a local copy of CUM past the last "real" named
6419      argument, to find out how many registers are left over.  */
6420   local_cum = *cum;
6421   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6422
6423   /* Found out how many registers we need to save.  */
6424   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6425   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6426
6427   if (TARGET_GENERAL_REGS_ONLY)
6428     {
6429       if (local_cum.aapcs_nvrn > 0)
6430         sorry ("%qs and floating point or vector arguments",
6431                "-mgeneral-regs-only");
6432       vr_saved = 0;
6433     }
6434
6435   if (!no_rtl)
6436     {
6437       if (gr_saved > 0)
6438         {
6439           rtx ptr, mem;
6440
6441           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6442           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6443                                - gr_saved * UNITS_PER_WORD);
6444           mem = gen_frame_mem (BLKmode, ptr);
6445           set_mem_alias_set (mem, get_varargs_alias_set ());
6446
6447           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6448                                mem, gr_saved);
6449         }
6450       if (vr_saved > 0)
6451         {
6452           /* We can't use move_block_from_reg, because it will use
6453              the wrong mode, storing D regs only.  */
6454           enum machine_mode mode = TImode;
6455           int off, i;
6456
6457           /* Set OFF to the offset from virtual_incoming_args_rtx of
6458              the first vector register.  The VR save area lies below
6459              the GR one, and is aligned to 16 bytes.  */
6460           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6461                                    STACK_BOUNDARY / BITS_PER_UNIT);
6462           off -= vr_saved * UNITS_PER_VREG;
6463
6464           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6465             {
6466               rtx ptr, mem;
6467
6468               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6469               mem = gen_frame_mem (mode, ptr);
6470               set_mem_alias_set (mem, get_varargs_alias_set ());
6471               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6472               off += UNITS_PER_VREG;
6473             }
6474         }
6475     }
6476
6477   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6478      any complication of having crtl->args.pretend_args_size changed.  */
6479   cfun->machine->saved_varargs_size
6480     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6481                       STACK_BOUNDARY / BITS_PER_UNIT)
6482        + vr_saved * UNITS_PER_VREG);
6483 }
6484
6485 static void
6486 aarch64_conditional_register_usage (void)
6487 {
6488   int i;
6489   if (!TARGET_FLOAT)
6490     {
6491       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6492         {
6493           fixed_regs[i] = 1;
6494           call_used_regs[i] = 1;
6495         }
6496     }
6497 }
6498
6499 /* Walk down the type tree of TYPE counting consecutive base elements.
6500    If *MODEP is VOIDmode, then set it to the first valid floating point
6501    type.  If a non-floating point type is found, or if a floating point
6502    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6503    otherwise return the count in the sub-tree.  */
6504 static int
6505 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6506 {
6507   enum machine_mode mode;
6508   HOST_WIDE_INT size;
6509
6510   switch (TREE_CODE (type))
6511     {
6512     case REAL_TYPE:
6513       mode = TYPE_MODE (type);
6514       if (mode != DFmode && mode != SFmode && mode != TFmode)
6515         return -1;
6516
6517       if (*modep == VOIDmode)
6518         *modep = mode;
6519
6520       if (*modep == mode)
6521         return 1;
6522
6523       break;
6524
6525     case COMPLEX_TYPE:
6526       mode = TYPE_MODE (TREE_TYPE (type));
6527       if (mode != DFmode && mode != SFmode && mode != TFmode)
6528         return -1;
6529
6530       if (*modep == VOIDmode)
6531         *modep = mode;
6532
6533       if (*modep == mode)
6534         return 2;
6535
6536       break;
6537
6538     case VECTOR_TYPE:
6539       /* Use V2SImode and V4SImode as representatives of all 64-bit
6540          and 128-bit vector types.  */
6541       size = int_size_in_bytes (type);
6542       switch (size)
6543         {
6544         case 8:
6545           mode = V2SImode;
6546           break;
6547         case 16:
6548           mode = V4SImode;
6549           break;
6550         default:
6551           return -1;
6552         }
6553
6554       if (*modep == VOIDmode)
6555         *modep = mode;
6556
6557       /* Vector modes are considered to be opaque: two vectors are
6558          equivalent for the purposes of being homogeneous aggregates
6559          if they are the same size.  */
6560       if (*modep == mode)
6561         return 1;
6562
6563       break;
6564
6565     case ARRAY_TYPE:
6566       {
6567         int count;
6568         tree index = TYPE_DOMAIN (type);
6569
6570         /* Can't handle incomplete types.  */
6571         if (!COMPLETE_TYPE_P (type))
6572           return -1;
6573
6574         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6575         if (count == -1
6576             || !index
6577             || !TYPE_MAX_VALUE (index)
6578             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6579             || !TYPE_MIN_VALUE (index)
6580             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6581             || count < 0)
6582           return -1;
6583
6584         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6585                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6586
6587         /* There must be no padding.  */
6588         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6589             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6590                 != count * GET_MODE_BITSIZE (*modep)))
6591           return -1;
6592
6593         return count;
6594       }
6595
6596     case RECORD_TYPE:
6597       {
6598         int count = 0;
6599         int sub_count;
6600         tree field;
6601
6602         /* Can't handle incomplete types.  */
6603         if (!COMPLETE_TYPE_P (type))
6604           return -1;
6605
6606         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6607           {
6608             if (TREE_CODE (field) != FIELD_DECL)
6609               continue;
6610
6611             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6612             if (sub_count < 0)
6613               return -1;
6614             count += sub_count;
6615           }
6616
6617         /* There must be no padding.  */
6618         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6619             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6620                 != count * GET_MODE_BITSIZE (*modep)))
6621           return -1;
6622
6623         return count;
6624       }
6625
6626     case UNION_TYPE:
6627     case QUAL_UNION_TYPE:
6628       {
6629         /* These aren't very interesting except in a degenerate case.  */
6630         int count = 0;
6631         int sub_count;
6632         tree field;
6633
6634         /* Can't handle incomplete types.  */
6635         if (!COMPLETE_TYPE_P (type))
6636           return -1;
6637
6638         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6639           {
6640             if (TREE_CODE (field) != FIELD_DECL)
6641               continue;
6642
6643             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6644             if (sub_count < 0)
6645               return -1;
6646             count = count > sub_count ? count : sub_count;
6647           }
6648
6649         /* There must be no padding.  */
6650         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6651             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6652                 != count * GET_MODE_BITSIZE (*modep)))
6653           return -1;
6654
6655         return count;
6656       }
6657
6658     default:
6659       break;
6660     }
6661
6662   return -1;
6663 }
6664
6665 /* Return true if we use LRA instead of reload pass.  */
6666 static bool
6667 aarch64_lra_p (void)
6668 {
6669   return aarch64_lra_flag;
6670 }
6671
6672 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6673    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6674    array types.  The C99 floating-point complex types are also considered
6675    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6676    types, which are GCC extensions and out of the scope of AAPCS64, are
6677    treated as composite types here as well.
6678
6679    Note that MODE itself is not sufficient in determining whether a type
6680    is such a composite type or not.  This is because
6681    stor-layout.c:compute_record_mode may have already changed the MODE
6682    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6683    structure with only one field may have its MODE set to the mode of the
6684    field.  Also an integer mode whose size matches the size of the
6685    RECORD_TYPE type may be used to substitute the original mode
6686    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6687    solely relied on.  */
6688
6689 static bool
6690 aarch64_composite_type_p (const_tree type,
6691                           enum machine_mode mode)
6692 {
6693   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6694     return true;
6695
6696   if (mode == BLKmode
6697       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6698       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6699     return true;
6700
6701   return false;
6702 }
6703
6704 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6705    type as described in AAPCS64 \S 4.1.2.
6706
6707    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6708
6709 static bool
6710 aarch64_short_vector_p (const_tree type,
6711                         enum machine_mode mode)
6712 {
6713   HOST_WIDE_INT size = -1;
6714
6715   if (type && TREE_CODE (type) == VECTOR_TYPE)
6716     size = int_size_in_bytes (type);
6717   else if (!aarch64_composite_type_p (type, mode)
6718            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6719                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6720     size = GET_MODE_SIZE (mode);
6721
6722   return (size == 8 || size == 16) ? true : false;
6723 }
6724
6725 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6726    shall be passed or returned in simd/fp register(s) (providing these
6727    parameter passing registers are available).
6728
6729    Upon successful return, *COUNT returns the number of needed registers,
6730    *BASE_MODE returns the mode of the individual register and when IS_HAF
6731    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6732    floating-point aggregate or a homogeneous short-vector aggregate.  */
6733
6734 static bool
6735 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6736                                          const_tree type,
6737                                          enum machine_mode *base_mode,
6738                                          int *count,
6739                                          bool *is_ha)
6740 {
6741   enum machine_mode new_mode = VOIDmode;
6742   bool composite_p = aarch64_composite_type_p (type, mode);
6743
6744   if (is_ha != NULL) *is_ha = false;
6745
6746   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6747       || aarch64_short_vector_p (type, mode))
6748     {
6749       *count = 1;
6750       new_mode = mode;
6751     }
6752   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6753     {
6754       if (is_ha != NULL) *is_ha = true;
6755       *count = 2;
6756       new_mode = GET_MODE_INNER (mode);
6757     }
6758   else if (type && composite_p)
6759     {
6760       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6761
6762       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6763         {
6764           if (is_ha != NULL) *is_ha = true;
6765           *count = ag_count;
6766         }
6767       else
6768         return false;
6769     }
6770   else
6771     return false;
6772
6773   *base_mode = new_mode;
6774   return true;
6775 }
6776
6777 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6778
6779 static rtx
6780 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6781                           int incoming ATTRIBUTE_UNUSED)
6782 {
6783   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6784 }
6785
6786 /* Implements target hook vector_mode_supported_p.  */
6787 static bool
6788 aarch64_vector_mode_supported_p (enum machine_mode mode)
6789 {
6790   if (TARGET_SIMD
6791       && (mode == V4SImode  || mode == V8HImode
6792           || mode == V16QImode || mode == V2DImode
6793           || mode == V2SImode  || mode == V4HImode
6794           || mode == V8QImode || mode == V2SFmode
6795           || mode == V4SFmode || mode == V2DFmode
6796           || mode == V1DFmode))
6797     return true;
6798
6799   return false;
6800 }
6801
6802 /* Return appropriate SIMD container
6803    for MODE within a vector of WIDTH bits.  */
6804 static enum machine_mode
6805 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6806 {
6807   gcc_assert (width == 64 || width == 128);
6808   if (TARGET_SIMD)
6809     {
6810       if (width == 128)
6811         switch (mode)
6812           {
6813           case DFmode:
6814             return V2DFmode;
6815           case SFmode:
6816             return V4SFmode;
6817           case SImode:
6818             return V4SImode;
6819           case HImode:
6820             return V8HImode;
6821           case QImode:
6822             return V16QImode;
6823           case DImode:
6824             return V2DImode;
6825           default:
6826             break;
6827           }
6828       else
6829         switch (mode)
6830           {
6831           case SFmode:
6832             return V2SFmode;
6833           case SImode:
6834             return V2SImode;
6835           case HImode:
6836             return V4HImode;
6837           case QImode:
6838             return V8QImode;
6839           default:
6840             break;
6841           }
6842     }
6843   return word_mode;
6844 }
6845
6846 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6847 static enum machine_mode
6848 aarch64_preferred_simd_mode (enum machine_mode mode)
6849 {
6850   return aarch64_simd_container_mode (mode, 128);
6851 }
6852
6853 /* Return the bitmask of possible vector sizes for the vectorizer
6854    to iterate over.  */
6855 static unsigned int
6856 aarch64_autovectorize_vector_sizes (void)
6857 {
6858   return (16 | 8);
6859 }
6860
6861 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6862    vector types in order to conform to the AAPCS64 (see "Procedure
6863    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6864    qualify for emission with the mangled names defined in that document,
6865    a vector type must not only be of the correct mode but also be
6866    composed of AdvSIMD vector element types (e.g.
6867    _builtin_aarch64_simd_qi); these types are registered by
6868    aarch64_init_simd_builtins ().  In other words, vector types defined
6869    in other ways e.g. via vector_size attribute will get default
6870    mangled names.  */
6871 typedef struct
6872 {
6873   enum machine_mode mode;
6874   const char *element_type_name;
6875   const char *mangled_name;
6876 } aarch64_simd_mangle_map_entry;
6877
6878 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6879   /* 64-bit containerized types.  */
6880   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6881   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6882   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6883   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6884   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6885   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6886   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6887   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6888   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6889   /* 128-bit containerized types.  */
6890   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6891   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6892   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6893   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6894   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6895   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6896   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6897   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6898   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6899   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6900   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6901   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6902   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6903   { VOIDmode, NULL, NULL }
6904 };
6905
6906 /* Implement TARGET_MANGLE_TYPE.  */
6907
6908 static const char *
6909 aarch64_mangle_type (const_tree type)
6910 {
6911   /* The AArch64 ABI documents say that "__va_list" has to be
6912      managled as if it is in the "std" namespace.  */
6913   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6914     return "St9__va_list";
6915
6916   /* Check the mode of the vector type, and the name of the vector
6917      element type, against the table.  */
6918   if (TREE_CODE (type) == VECTOR_TYPE)
6919     {
6920       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6921
6922       while (pos->mode != VOIDmode)
6923         {
6924           tree elt_type = TREE_TYPE (type);
6925
6926           if (pos->mode == TYPE_MODE (type)
6927               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6928               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6929                           pos->element_type_name))
6930             return pos->mangled_name;
6931
6932           pos++;
6933         }
6934     }
6935
6936   /* Use the default mangling.  */
6937   return NULL;
6938 }
6939
6940 static int
6941 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6942 {
6943   return MEM_P (*x);
6944 }
6945
6946 static bool
6947 is_memory_op (rtx mem_insn)
6948 {
6949    rtx pattern = PATTERN (mem_insn);
6950    return for_each_rtx (&pattern, is_mem_p, NULL);
6951 }
6952
6953 /* Find the first rtx before insn that will generate an assembly
6954    instruction.  */
6955
6956 static rtx
6957 aarch64_prev_real_insn (rtx insn)
6958 {
6959   if (!insn)
6960     return NULL;
6961
6962   do
6963     {
6964       insn = prev_real_insn (insn);
6965     }
6966   while (insn && recog_memoized (insn) < 0);
6967
6968   return insn;
6969 }
6970
6971 static bool
6972 is_madd_op (enum attr_type t1)
6973 {
6974   unsigned int i;
6975   /* A number of these may be AArch32 only.  */
6976   enum attr_type mlatypes[] = {
6977     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
6978     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
6979     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
6980   };
6981
6982   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
6983     {
6984       if (t1 == mlatypes[i])
6985         return true;
6986     }
6987
6988   return false;
6989 }
6990
6991 /* Check if there is a register dependency between a load and the insn
6992    for which we hold recog_data.  */
6993
6994 static bool
6995 dep_between_memop_and_curr (rtx memop)
6996 {
6997   rtx load_reg;
6998   int opno;
6999
7000   if (!memop)
7001     return false;
7002
7003   if (!REG_P (SET_DEST (memop)))
7004     return false;
7005
7006   load_reg = SET_DEST (memop);
7007   for (opno = 0; opno < recog_data.n_operands; opno++)
7008     {
7009       rtx operand = recog_data.operand[opno];
7010       if (REG_P (operand)
7011           && reg_overlap_mentioned_p (load_reg, operand))
7012         return true;
7013
7014     }
7015   return false;
7016 }
7017
7018 bool
7019 aarch64_madd_needs_nop (rtx insn)
7020 {
7021   enum attr_type attr_type;
7022   rtx prev;
7023   rtx body;
7024
7025   if (!aarch64_fix_a53_err835769)
7026     return false;
7027
7028   if (recog_memoized (insn) < 0)
7029     return false;
7030
7031   attr_type = get_attr_type (insn);
7032   if (!is_madd_op (attr_type))
7033     return false;
7034
7035   prev = aarch64_prev_real_insn (insn);
7036   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7037      Restore recog state to INSN to avoid state corruption.  */
7038   extract_constrain_insn_cached (insn);
7039
7040   if (!prev)
7041     return false;
7042
7043   body = single_set (prev);
7044
7045   /* If the previous insn is a memory op and there is no dependency between
7046      it and the madd, emit a nop between them.  If we know the previous insn is
7047      a memory op but body is NULL, emit the nop to be safe, it's probably a
7048      load/store pair insn.  */
7049   if (is_memory_op (prev)
7050       && GET_MODE (recog_data.operand[0]) == DImode
7051       && (!dep_between_memop_and_curr (body)))
7052     return true;
7053
7054   return false;
7055
7056 }
7057
7058 void
7059 aarch64_final_prescan_insn (rtx insn)
7060 {
7061   if (aarch64_madd_needs_nop (insn))
7062     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7063 }
7064
7065
7066 /* Return the equivalent letter for size.  */
7067 static char
7068 sizetochar (int size)
7069 {
7070   switch (size)
7071     {
7072     case 64: return 'd';
7073     case 32: return 's';
7074     case 16: return 'h';
7075     case 8 : return 'b';
7076     default: gcc_unreachable ();
7077     }
7078 }
7079
7080 /* Return true iff x is a uniform vector of floating-point
7081    constants, and the constant can be represented in
7082    quarter-precision form.  Note, as aarch64_float_const_representable
7083    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7084 static bool
7085 aarch64_vect_float_const_representable_p (rtx x)
7086 {
7087   int i = 0;
7088   REAL_VALUE_TYPE r0, ri;
7089   rtx x0, xi;
7090
7091   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7092     return false;
7093
7094   x0 = CONST_VECTOR_ELT (x, 0);
7095   if (!CONST_DOUBLE_P (x0))
7096     return false;
7097
7098   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7099
7100   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7101     {
7102       xi = CONST_VECTOR_ELT (x, i);
7103       if (!CONST_DOUBLE_P (xi))
7104         return false;
7105
7106       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7107       if (!REAL_VALUES_EQUAL (r0, ri))
7108         return false;
7109     }
7110
7111   return aarch64_float_const_representable_p (x0);
7112 }
7113
7114 /* Return true for valid and false for invalid.  */
7115 bool
7116 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7117                               struct simd_immediate_info *info)
7118 {
7119 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7120   matches = 1;                                          \
7121   for (i = 0; i < idx; i += (STRIDE))                   \
7122     if (!(TEST))                                        \
7123       matches = 0;                                      \
7124   if (matches)                                          \
7125     {                                                   \
7126       immtype = (CLASS);                                \
7127       elsize = (ELSIZE);                                \
7128       eshift = (SHIFT);                                 \
7129       emvn = (NEG);                                     \
7130       break;                                            \
7131     }
7132
7133   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7134   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7135   unsigned char bytes[16];
7136   int immtype = -1, matches;
7137   unsigned int invmask = inverse ? 0xff : 0;
7138   int eshift, emvn;
7139
7140   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7141     {
7142       if (! (aarch64_simd_imm_zero_p (op, mode)
7143              || aarch64_vect_float_const_representable_p (op)))
7144         return false;
7145
7146       if (info)
7147         {
7148           info->value = CONST_VECTOR_ELT (op, 0);
7149           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7150           info->mvn = false;
7151           info->shift = 0;
7152         }
7153
7154       return true;
7155     }
7156
7157   /* Splat vector constant out into a byte vector.  */
7158   for (i = 0; i < n_elts; i++)
7159     {
7160       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7161          it must be laid out in the vector register in reverse order.  */
7162       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7163       unsigned HOST_WIDE_INT elpart;
7164       unsigned int part, parts;
7165
7166       if (GET_CODE (el) == CONST_INT)
7167         {
7168           elpart = INTVAL (el);
7169           parts = 1;
7170         }
7171       else if (GET_CODE (el) == CONST_DOUBLE)
7172         {
7173           elpart = CONST_DOUBLE_LOW (el);
7174           parts = 2;
7175         }
7176       else
7177         gcc_unreachable ();
7178
7179       for (part = 0; part < parts; part++)
7180         {
7181           unsigned int byte;
7182           for (byte = 0; byte < innersize; byte++)
7183             {
7184               bytes[idx++] = (elpart & 0xff) ^ invmask;
7185               elpart >>= BITS_PER_UNIT;
7186             }
7187           if (GET_CODE (el) == CONST_DOUBLE)
7188             elpart = CONST_DOUBLE_HIGH (el);
7189         }
7190     }
7191
7192   /* Sanity check.  */
7193   gcc_assert (idx == GET_MODE_SIZE (mode));
7194
7195   do
7196     {
7197       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7198              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7199
7200       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7201              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7202
7203       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7204              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7205
7206       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7207              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7208
7209       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7210
7211       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7212
7213       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7214              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7215
7216       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7217              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7218
7219       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7220              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7221
7222       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7223              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7224
7225       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7226
7227       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7228
7229       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7230              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7231
7232       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7233              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7234
7235       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7236              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7237
7238       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7239              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7240
7241       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7242
7243       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7244              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7245     }
7246   while (0);
7247
7248   if (immtype == -1)
7249     return false;
7250
7251   if (info)
7252     {
7253       info->element_width = elsize;
7254       info->mvn = emvn != 0;
7255       info->shift = eshift;
7256
7257       unsigned HOST_WIDE_INT imm = 0;
7258
7259       if (immtype >= 12 && immtype <= 15)
7260         info->msl = true;
7261
7262       /* Un-invert bytes of recognized vector, if necessary.  */
7263       if (invmask != 0)
7264         for (i = 0; i < idx; i++)
7265           bytes[i] ^= invmask;
7266
7267       if (immtype == 17)
7268         {
7269           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7270           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7271
7272           for (i = 0; i < 8; i++)
7273             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7274               << (i * BITS_PER_UNIT);
7275
7276
7277           info->value = GEN_INT (imm);
7278         }
7279       else
7280         {
7281           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7282             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7283
7284           /* Construct 'abcdefgh' because the assembler cannot handle
7285              generic constants.  */
7286           if (info->mvn)
7287             imm = ~imm;
7288           imm = (imm >> info->shift) & 0xff;
7289           info->value = GEN_INT (imm);
7290         }
7291     }
7292
7293   return true;
7294 #undef CHECK
7295 }
7296
7297 static bool
7298 aarch64_const_vec_all_same_int_p (rtx x,
7299                                   HOST_WIDE_INT minval,
7300                                   HOST_WIDE_INT maxval)
7301 {
7302   HOST_WIDE_INT firstval;
7303   int count, i;
7304
7305   if (GET_CODE (x) != CONST_VECTOR
7306       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7307     return false;
7308
7309   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7310   if (firstval < minval || firstval > maxval)
7311     return false;
7312
7313   count = CONST_VECTOR_NUNITS (x);
7314   for (i = 1; i < count; i++)
7315     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7316       return false;
7317
7318   return true;
7319 }
7320
7321 /* Check of immediate shift constants are within range.  */
7322 bool
7323 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7324 {
7325   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7326   if (left)
7327     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7328   else
7329     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7330 }
7331
7332 /* Return true if X is a uniform vector where all elements
7333    are either the floating-point constant 0.0 or the
7334    integer constant 0.  */
7335 bool
7336 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7337 {
7338   return x == CONST0_RTX (mode);
7339 }
7340
7341 bool
7342 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7343 {
7344   HOST_WIDE_INT imm = INTVAL (x);
7345   int i;
7346
7347   for (i = 0; i < 8; i++)
7348     {
7349       unsigned int byte = imm & 0xff;
7350       if (byte != 0xff && byte != 0)
7351        return false;
7352       imm >>= 8;
7353     }
7354
7355   return true;
7356 }
7357
7358 bool
7359 aarch64_mov_operand_p (rtx x,
7360                        enum aarch64_symbol_context context,
7361                        enum machine_mode mode)
7362 {
7363   if (GET_CODE (x) == HIGH
7364       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7365     return true;
7366
7367   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7368     return true;
7369
7370   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7371     return true;
7372
7373   return aarch64_classify_symbolic_expression (x, context)
7374     == SYMBOL_TINY_ABSOLUTE;
7375 }
7376
7377 /* Return a const_int vector of VAL.  */
7378 rtx
7379 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7380 {
7381   int nunits = GET_MODE_NUNITS (mode);
7382   rtvec v = rtvec_alloc (nunits);
7383   int i;
7384
7385   for (i=0; i < nunits; i++)
7386     RTVEC_ELT (v, i) = GEN_INT (val);
7387
7388   return gen_rtx_CONST_VECTOR (mode, v);
7389 }
7390
7391 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7392
7393 bool
7394 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7395 {
7396   enum machine_mode vmode;
7397
7398   gcc_assert (!VECTOR_MODE_P (mode));
7399   vmode = aarch64_preferred_simd_mode (mode);
7400   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7401   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7402 }
7403
7404 /* Construct and return a PARALLEL RTX vector.  */
7405 rtx
7406 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7407 {
7408   int nunits = GET_MODE_NUNITS (mode);
7409   rtvec v = rtvec_alloc (nunits / 2);
7410   int base = high ? nunits / 2 : 0;
7411   rtx t1;
7412   int i;
7413
7414   for (i=0; i < nunits / 2; i++)
7415     RTVEC_ELT (v, i) = GEN_INT (base + i);
7416
7417   t1 = gen_rtx_PARALLEL (mode, v);
7418   return t1;
7419 }
7420
7421 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7422    HIGH (exclusive).  */
7423 void
7424 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7425 {
7426   HOST_WIDE_INT lane;
7427   gcc_assert (GET_CODE (operand) == CONST_INT);
7428   lane = INTVAL (operand);
7429
7430   if (lane < low || lane >= high)
7431     error ("lane out of range");
7432 }
7433
7434 void
7435 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7436 {
7437   gcc_assert (GET_CODE (operand) == CONST_INT);
7438   HOST_WIDE_INT lane = INTVAL (operand);
7439
7440   if (lane < low || lane >= high)
7441     error ("constant out of range");
7442 }
7443
7444 /* Emit code to reinterpret one AdvSIMD type as another,
7445    without altering bits.  */
7446 void
7447 aarch64_simd_reinterpret (rtx dest, rtx src)
7448 {
7449   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7450 }
7451
7452 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7453    registers).  */
7454 void
7455 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7456                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7457                             rtx op1)
7458 {
7459   rtx mem = gen_rtx_MEM (mode, destaddr);
7460   rtx tmp1 = gen_reg_rtx (mode);
7461   rtx tmp2 = gen_reg_rtx (mode);
7462
7463   emit_insn (intfn (tmp1, op1, tmp2));
7464
7465   emit_move_insn (mem, tmp1);
7466   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7467   emit_move_insn (mem, tmp2);
7468 }
7469
7470 /* Return TRUE if OP is a valid vector addressing mode.  */
7471 bool
7472 aarch64_simd_mem_operand_p (rtx op)
7473 {
7474   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7475                         || GET_CODE (XEXP (op, 0)) == REG);
7476 }
7477
7478 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7479    not to early-clobber SRC registers in the process.
7480
7481    We assume that the operands described by SRC and DEST represent a
7482    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7483    number of components into which the copy has been decomposed.  */
7484 void
7485 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7486                                 rtx *src, unsigned int count)
7487 {
7488   unsigned int i;
7489
7490   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7491       || REGNO (operands[0]) < REGNO (operands[1]))
7492     {
7493       for (i = 0; i < count; i++)
7494         {
7495           operands[2 * i] = dest[i];
7496           operands[2 * i + 1] = src[i];
7497         }
7498     }
7499   else
7500     {
7501       for (i = 0; i < count; i++)
7502         {
7503           operands[2 * i] = dest[count - i - 1];
7504           operands[2 * i + 1] = src[count - i - 1];
7505         }
7506     }
7507 }
7508
7509 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7510    one of VSTRUCT modes: OI, CI or XI.  */
7511 int
7512 aarch64_simd_attr_length_move (rtx insn)
7513 {
7514   enum machine_mode mode;
7515
7516   extract_insn_cached (insn);
7517
7518   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7519     {
7520       mode = GET_MODE (recog_data.operand[0]);
7521       switch (mode)
7522         {
7523         case OImode:
7524           return 8;
7525         case CImode:
7526           return 12;
7527         case XImode:
7528           return 16;
7529         default:
7530           gcc_unreachable ();
7531         }
7532     }
7533   return 4;
7534 }
7535
7536 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7537    alignment of a vector to 128 bits.  */
7538 static HOST_WIDE_INT
7539 aarch64_simd_vector_alignment (const_tree type)
7540 {
7541   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7542   return MIN (align, 128);
7543 }
7544
7545 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7546 static bool
7547 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7548 {
7549   if (is_packed)
7550     return false;
7551
7552   /* We guarantee alignment for vectors up to 128-bits.  */
7553   if (tree_int_cst_compare (TYPE_SIZE (type),
7554                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7555     return false;
7556
7557   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7558   return true;
7559 }
7560
7561 /* If VALS is a vector constant that can be loaded into a register
7562    using DUP, generate instructions to do so and return an RTX to
7563    assign to the register.  Otherwise return NULL_RTX.  */
7564 static rtx
7565 aarch64_simd_dup_constant (rtx vals)
7566 {
7567   enum machine_mode mode = GET_MODE (vals);
7568   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7569   int n_elts = GET_MODE_NUNITS (mode);
7570   bool all_same = true;
7571   rtx x;
7572   int i;
7573
7574   if (GET_CODE (vals) != CONST_VECTOR)
7575     return NULL_RTX;
7576
7577   for (i = 1; i < n_elts; ++i)
7578     {
7579       x = CONST_VECTOR_ELT (vals, i);
7580       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7581         all_same = false;
7582     }
7583
7584   if (!all_same)
7585     return NULL_RTX;
7586
7587   /* We can load this constant by using DUP and a constant in a
7588      single ARM register.  This will be cheaper than a vector
7589      load.  */
7590   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7591   return gen_rtx_VEC_DUPLICATE (mode, x);
7592 }
7593
7594
7595 /* Generate code to load VALS, which is a PARALLEL containing only
7596    constants (for vec_init) or CONST_VECTOR, efficiently into a
7597    register.  Returns an RTX to copy into the register, or NULL_RTX
7598    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7599 static rtx
7600 aarch64_simd_make_constant (rtx vals)
7601 {
7602   enum machine_mode mode = GET_MODE (vals);
7603   rtx const_dup;
7604   rtx const_vec = NULL_RTX;
7605   int n_elts = GET_MODE_NUNITS (mode);
7606   int n_const = 0;
7607   int i;
7608
7609   if (GET_CODE (vals) == CONST_VECTOR)
7610     const_vec = vals;
7611   else if (GET_CODE (vals) == PARALLEL)
7612     {
7613       /* A CONST_VECTOR must contain only CONST_INTs and
7614          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7615          Only store valid constants in a CONST_VECTOR.  */
7616       for (i = 0; i < n_elts; ++i)
7617         {
7618           rtx x = XVECEXP (vals, 0, i);
7619           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7620             n_const++;
7621         }
7622       if (n_const == n_elts)
7623         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7624     }
7625   else
7626     gcc_unreachable ();
7627
7628   if (const_vec != NULL_RTX
7629       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7630     /* Load using MOVI/MVNI.  */
7631     return const_vec;
7632   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7633     /* Loaded using DUP.  */
7634     return const_dup;
7635   else if (const_vec != NULL_RTX)
7636     /* Load from constant pool. We can not take advantage of single-cycle
7637        LD1 because we need a PC-relative addressing mode.  */
7638     return const_vec;
7639   else
7640     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7641        We can not construct an initializer.  */
7642     return NULL_RTX;
7643 }
7644
7645 void
7646 aarch64_expand_vector_init (rtx target, rtx vals)
7647 {
7648   enum machine_mode mode = GET_MODE (target);
7649   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7650   int n_elts = GET_MODE_NUNITS (mode);
7651   int n_var = 0, one_var = -1;
7652   bool all_same = true;
7653   rtx x, mem;
7654   int i;
7655
7656   x = XVECEXP (vals, 0, 0);
7657   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7658     n_var = 1, one_var = 0;
7659
7660   for (i = 1; i < n_elts; ++i)
7661     {
7662       x = XVECEXP (vals, 0, i);
7663       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7664         ++n_var, one_var = i;
7665
7666       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7667         all_same = false;
7668     }
7669
7670   if (n_var == 0)
7671     {
7672       rtx constant = aarch64_simd_make_constant (vals);
7673       if (constant != NULL_RTX)
7674         {
7675           emit_move_insn (target, constant);
7676           return;
7677         }
7678     }
7679
7680   /* Splat a single non-constant element if we can.  */
7681   if (all_same)
7682     {
7683       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7684       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7685       return;
7686     }
7687
7688   /* One field is non-constant.  Load constant then overwrite varying
7689      field.  This is more efficient than using the stack.  */
7690   if (n_var == 1)
7691     {
7692       rtx copy = copy_rtx (vals);
7693       rtx index = GEN_INT (one_var);
7694       enum insn_code icode;
7695
7696       /* Load constant part of vector, substitute neighboring value for
7697          varying element.  */
7698       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7699       aarch64_expand_vector_init (target, copy);
7700
7701       /* Insert variable.  */
7702       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7703       icode = optab_handler (vec_set_optab, mode);
7704       gcc_assert (icode != CODE_FOR_nothing);
7705       emit_insn (GEN_FCN (icode) (target, x, index));
7706       return;
7707     }
7708
7709   /* Construct the vector in memory one field at a time
7710      and load the whole vector.  */
7711   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7712   for (i = 0; i < n_elts; i++)
7713     emit_move_insn (adjust_address_nv (mem, inner_mode,
7714                                     i * GET_MODE_SIZE (inner_mode)),
7715                     XVECEXP (vals, 0, i));
7716   emit_move_insn (target, mem);
7717
7718 }
7719
7720 static unsigned HOST_WIDE_INT
7721 aarch64_shift_truncation_mask (enum machine_mode mode)
7722 {
7723   return
7724     (aarch64_vector_mode_supported_p (mode)
7725      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7726 }
7727
7728 #ifndef TLS_SECTION_ASM_FLAG
7729 #define TLS_SECTION_ASM_FLAG 'T'
7730 #endif
7731
7732 void
7733 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7734                                tree decl ATTRIBUTE_UNUSED)
7735 {
7736   char flagchars[10], *f = flagchars;
7737
7738   /* If we have already declared this section, we can use an
7739      abbreviated form to switch back to it -- unless this section is
7740      part of a COMDAT groups, in which case GAS requires the full
7741      declaration every time.  */
7742   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7743       && (flags & SECTION_DECLARED))
7744     {
7745       fprintf (asm_out_file, "\t.section\t%s\n", name);
7746       return;
7747     }
7748
7749   if (!(flags & SECTION_DEBUG))
7750     *f++ = 'a';
7751   if (flags & SECTION_WRITE)
7752     *f++ = 'w';
7753   if (flags & SECTION_CODE)
7754     *f++ = 'x';
7755   if (flags & SECTION_SMALL)
7756     *f++ = 's';
7757   if (flags & SECTION_MERGE)
7758     *f++ = 'M';
7759   if (flags & SECTION_STRINGS)
7760     *f++ = 'S';
7761   if (flags & SECTION_TLS)
7762     *f++ = TLS_SECTION_ASM_FLAG;
7763   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7764     *f++ = 'G';
7765   *f = '\0';
7766
7767   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7768
7769   if (!(flags & SECTION_NOTYPE))
7770     {
7771       const char *type;
7772       const char *format;
7773
7774       if (flags & SECTION_BSS)
7775         type = "nobits";
7776       else
7777         type = "progbits";
7778
7779 #ifdef TYPE_OPERAND_FMT
7780       format = "," TYPE_OPERAND_FMT;
7781 #else
7782       format = ",@%s";
7783 #endif
7784
7785       fprintf (asm_out_file, format, type);
7786
7787       if (flags & SECTION_ENTSIZE)
7788         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7789       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7790         {
7791           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7792             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7793           else
7794             fprintf (asm_out_file, ",%s,comdat",
7795                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7796         }
7797     }
7798
7799   putc ('\n', asm_out_file);
7800 }
7801
7802 /* Select a format to encode pointers in exception handling data.  */
7803 int
7804 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7805 {
7806    int type;
7807    switch (aarch64_cmodel)
7808      {
7809      case AARCH64_CMODEL_TINY:
7810      case AARCH64_CMODEL_TINY_PIC:
7811      case AARCH64_CMODEL_SMALL:
7812      case AARCH64_CMODEL_SMALL_PIC:
7813        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7814           for everything.  */
7815        type = DW_EH_PE_sdata4;
7816        break;
7817      default:
7818        /* No assumptions here.  8-byte relocs required.  */
7819        type = DW_EH_PE_sdata8;
7820        break;
7821      }
7822    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7823 }
7824
7825 /* Emit load exclusive.  */
7826
7827 static void
7828 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7829                              rtx mem, rtx model_rtx)
7830 {
7831   rtx (*gen) (rtx, rtx, rtx);
7832
7833   switch (mode)
7834     {
7835     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7836     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7837     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7838     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7839     default:
7840       gcc_unreachable ();
7841     }
7842
7843   emit_insn (gen (rval, mem, model_rtx));
7844 }
7845
7846 /* Emit store exclusive.  */
7847
7848 static void
7849 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7850                               rtx rval, rtx mem, rtx model_rtx)
7851 {
7852   rtx (*gen) (rtx, rtx, rtx, rtx);
7853
7854   switch (mode)
7855     {
7856     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7857     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7858     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7859     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7860     default:
7861       gcc_unreachable ();
7862     }
7863
7864   emit_insn (gen (bval, rval, mem, model_rtx));
7865 }
7866
7867 /* Mark the previous jump instruction as unlikely.  */
7868
7869 static void
7870 aarch64_emit_unlikely_jump (rtx insn)
7871 {
7872   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7873
7874   insn = emit_jump_insn (insn);
7875   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7876 }
7877
7878 /* Expand a compare and swap pattern.  */
7879
7880 void
7881 aarch64_expand_compare_and_swap (rtx operands[])
7882 {
7883   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7884   enum machine_mode mode, cmp_mode;
7885   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7886
7887   bval = operands[0];
7888   rval = operands[1];
7889   mem = operands[2];
7890   oldval = operands[3];
7891   newval = operands[4];
7892   is_weak = operands[5];
7893   mod_s = operands[6];
7894   mod_f = operands[7];
7895   mode = GET_MODE (mem);
7896   cmp_mode = mode;
7897
7898   /* Normally the succ memory model must be stronger than fail, but in the
7899      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7900      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7901
7902   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7903       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7904     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7905
7906   switch (mode)
7907     {
7908     case QImode:
7909     case HImode:
7910       /* For short modes, we're going to perform the comparison in SImode,
7911          so do the zero-extension now.  */
7912       cmp_mode = SImode;
7913       rval = gen_reg_rtx (SImode);
7914       oldval = convert_modes (SImode, mode, oldval, true);
7915       /* Fall through.  */
7916
7917     case SImode:
7918     case DImode:
7919       /* Force the value into a register if needed.  */
7920       if (!aarch64_plus_operand (oldval, mode))
7921         oldval = force_reg (cmp_mode, oldval);
7922       break;
7923
7924     default:
7925       gcc_unreachable ();
7926     }
7927
7928   switch (mode)
7929     {
7930     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7931     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7932     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7933     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7934     default:
7935       gcc_unreachable ();
7936     }
7937
7938   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7939
7940   if (mode == QImode || mode == HImode)
7941     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7942
7943   x = gen_rtx_REG (CCmode, CC_REGNUM);
7944   x = gen_rtx_EQ (SImode, x, const0_rtx);
7945   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7946 }
7947
7948 /* Split a compare and swap pattern.  */
7949
7950 void
7951 aarch64_split_compare_and_swap (rtx operands[])
7952 {
7953   rtx rval, mem, oldval, newval, scratch;
7954   enum machine_mode mode;
7955   bool is_weak;
7956   rtx label1, label2, x, cond;
7957
7958   rval = operands[0];
7959   mem = operands[1];
7960   oldval = operands[2];
7961   newval = operands[3];
7962   is_weak = (operands[4] != const0_rtx);
7963   scratch = operands[7];
7964   mode = GET_MODE (mem);
7965
7966   label1 = NULL_RTX;
7967   if (!is_weak)
7968     {
7969       label1 = gen_label_rtx ();
7970       emit_label (label1);
7971     }
7972   label2 = gen_label_rtx ();
7973
7974   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7975
7976   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7977   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7978   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7979                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7980   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7981
7982   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7983
7984   if (!is_weak)
7985     {
7986       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7987       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7988                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7989       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7990     }
7991   else
7992     {
7993       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7994       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7995       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7996     }
7997
7998   emit_label (label2);
7999 }
8000
8001 /* Split an atomic operation.  */
8002
8003 void
8004 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8005                      rtx value, rtx model_rtx, rtx cond)
8006 {
8007   enum machine_mode mode = GET_MODE (mem);
8008   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8009   rtx label, x;
8010
8011   label = gen_label_rtx ();
8012   emit_label (label);
8013
8014   if (new_out)
8015     new_out = gen_lowpart (wmode, new_out);
8016   if (old_out)
8017     old_out = gen_lowpart (wmode, old_out);
8018   else
8019     old_out = new_out;
8020   value = simplify_gen_subreg (wmode, value, mode, 0);
8021
8022   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8023
8024   switch (code)
8025     {
8026     case SET:
8027       new_out = value;
8028       break;
8029
8030     case NOT:
8031       x = gen_rtx_AND (wmode, old_out, value);
8032       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8033       x = gen_rtx_NOT (wmode, new_out);
8034       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8035       break;
8036
8037     case MINUS:
8038       if (CONST_INT_P (value))
8039         {
8040           value = GEN_INT (-INTVAL (value));
8041           code = PLUS;
8042         }
8043       /* Fall through.  */
8044
8045     default:
8046       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8047       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8048       break;
8049     }
8050
8051   aarch64_emit_store_exclusive (mode, cond, mem,
8052                                 gen_lowpart (mode, new_out), model_rtx);
8053
8054   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8055   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8056                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8057   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8058 }
8059
8060 static void
8061 aarch64_print_extension (void)
8062 {
8063   const struct aarch64_option_extension *opt = NULL;
8064
8065   for (opt = all_extensions; opt->name != NULL; opt++)
8066     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8067       asm_fprintf (asm_out_file, "+%s", opt->name);
8068
8069   asm_fprintf (asm_out_file, "\n");
8070 }
8071
8072 static void
8073 aarch64_start_file (void)
8074 {
8075   if (selected_arch)
8076     {
8077       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8078       aarch64_print_extension ();
8079     }
8080   else if (selected_cpu)
8081     {
8082       const char *truncated_name
8083             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8084       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8085       aarch64_print_extension ();
8086     }
8087   default_file_start();
8088 }
8089
8090 /* Target hook for c_mode_for_suffix.  */
8091 static enum machine_mode
8092 aarch64_c_mode_for_suffix (char suffix)
8093 {
8094   if (suffix == 'q')
8095     return TFmode;
8096
8097   return VOIDmode;
8098 }
8099
8100 /* We can only represent floating point constants which will fit in
8101    "quarter-precision" values.  These values are characterised by
8102    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8103    by:
8104
8105    (-1)^s * (n/16) * 2^r
8106
8107    Where:
8108      's' is the sign bit.
8109      'n' is an integer in the range 16 <= n <= 31.
8110      'r' is an integer in the range -3 <= r <= 4.  */
8111
8112 /* Return true iff X can be represented by a quarter-precision
8113    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8114 bool
8115 aarch64_float_const_representable_p (rtx x)
8116 {
8117   /* This represents our current view of how many bits
8118      make up the mantissa.  */
8119   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8120   int exponent;
8121   unsigned HOST_WIDE_INT mantissa, mask;
8122   HOST_WIDE_INT m1, m2;
8123   REAL_VALUE_TYPE r, m;
8124
8125   if (!CONST_DOUBLE_P (x))
8126     return false;
8127
8128   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8129
8130   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8131      know if we have +zero until we analyse the mantissa, but we
8132      can reject the other invalid values.  */
8133   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8134       || REAL_VALUE_MINUS_ZERO (r))
8135     return false;
8136
8137   /* Extract exponent.  */
8138   r = real_value_abs (&r);
8139   exponent = REAL_EXP (&r);
8140
8141   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8142      highest (sign) bit, with a fixed binary point at bit point_pos.
8143      m1 holds the low part of the mantissa, m2 the high part.
8144      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8145      bits for the mantissa, this can fail (low bits will be lost).  */
8146   real_ldexp (&m, &r, point_pos - exponent);
8147   REAL_VALUE_TO_INT (&m1, &m2, m);
8148
8149   /* If the low part of the mantissa has bits set we cannot represent
8150      the value.  */
8151   if (m1 != 0)
8152     return false;
8153   /* We have rejected the lower HOST_WIDE_INT, so update our
8154      understanding of how many bits lie in the mantissa and
8155      look only at the high HOST_WIDE_INT.  */
8156   mantissa = m2;
8157   point_pos -= HOST_BITS_PER_WIDE_INT;
8158
8159   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8160   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8161   if ((mantissa & mask) != 0)
8162     return false;
8163
8164   /* Having filtered unrepresentable values, we may now remove all
8165      but the highest 5 bits.  */
8166   mantissa >>= point_pos - 5;
8167
8168   /* We cannot represent the value 0.0, so reject it.  This is handled
8169      elsewhere.  */
8170   if (mantissa == 0)
8171     return false;
8172
8173   /* Then, as bit 4 is always set, we can mask it off, leaving
8174      the mantissa in the range [0, 15].  */
8175   mantissa &= ~(1 << 4);
8176   gcc_assert (mantissa <= 15);
8177
8178   /* GCC internally does not use IEEE754-like encoding (where normalized
8179      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8180      Our mantissa values are shifted 4 places to the left relative to
8181      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8182      by 5 places to correct for GCC's representation.  */
8183   exponent = 5 - exponent;
8184
8185   return (exponent >= 0 && exponent <= 7);
8186 }
8187
8188 char*
8189 aarch64_output_simd_mov_immediate (rtx const_vector,
8190                                    enum machine_mode mode,
8191                                    unsigned width)
8192 {
8193   bool is_valid;
8194   static char templ[40];
8195   const char *mnemonic;
8196   const char *shift_op;
8197   unsigned int lane_count = 0;
8198   char element_char;
8199
8200   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8201
8202   /* This will return true to show const_vector is legal for use as either
8203      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8204      also update INFO to show how the immediate should be generated.  */
8205   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8206   gcc_assert (is_valid);
8207
8208   element_char = sizetochar (info.element_width);
8209   lane_count = width / info.element_width;
8210
8211   mode = GET_MODE_INNER (mode);
8212   if (mode == SFmode || mode == DFmode)
8213     {
8214       gcc_assert (info.shift == 0 && ! info.mvn);
8215       if (aarch64_float_const_zero_rtx_p (info.value))
8216         info.value = GEN_INT (0);
8217       else
8218         {
8219 #define buf_size 20
8220           REAL_VALUE_TYPE r;
8221           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8222           char float_buf[buf_size] = {'\0'};
8223           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8224 #undef buf_size
8225
8226           if (lane_count == 1)
8227             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8228           else
8229             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8230                       lane_count, element_char, float_buf);
8231           return templ;
8232         }
8233     }
8234
8235   mnemonic = info.mvn ? "mvni" : "movi";
8236   shift_op = info.msl ? "msl" : "lsl";
8237
8238   if (lane_count == 1)
8239     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8240               mnemonic, UINTVAL (info.value));
8241   else if (info.shift)
8242     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8243               ", %s %d", mnemonic, lane_count, element_char,
8244               UINTVAL (info.value), shift_op, info.shift);
8245   else
8246     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8247               mnemonic, lane_count, element_char, UINTVAL (info.value));
8248   return templ;
8249 }
8250
8251 char*
8252 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8253                                           enum machine_mode mode)
8254 {
8255   enum machine_mode vmode;
8256
8257   gcc_assert (!VECTOR_MODE_P (mode));
8258   vmode = aarch64_simd_container_mode (mode, 64);
8259   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8260   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8261 }
8262
8263 /* Split operands into moves from op[1] + op[2] into op[0].  */
8264
8265 void
8266 aarch64_split_combinev16qi (rtx operands[3])
8267 {
8268   unsigned int dest = REGNO (operands[0]);
8269   unsigned int src1 = REGNO (operands[1]);
8270   unsigned int src2 = REGNO (operands[2]);
8271   enum machine_mode halfmode = GET_MODE (operands[1]);
8272   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8273   rtx destlo, desthi;
8274
8275   gcc_assert (halfmode == V16QImode);
8276
8277   if (src1 == dest && src2 == dest + halfregs)
8278     {
8279       /* No-op move.  Can't split to nothing; emit something.  */
8280       emit_note (NOTE_INSN_DELETED);
8281       return;
8282     }
8283
8284   /* Preserve register attributes for variable tracking.  */
8285   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8286   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8287                                GET_MODE_SIZE (halfmode));
8288
8289   /* Special case of reversed high/low parts.  */
8290   if (reg_overlap_mentioned_p (operands[2], destlo)
8291       && reg_overlap_mentioned_p (operands[1], desthi))
8292     {
8293       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8294       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8295       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8296     }
8297   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8298     {
8299       /* Try to avoid unnecessary moves if part of the result
8300          is in the right place already.  */
8301       if (src1 != dest)
8302         emit_move_insn (destlo, operands[1]);
8303       if (src2 != dest + halfregs)
8304         emit_move_insn (desthi, operands[2]);
8305     }
8306   else
8307     {
8308       if (src2 != dest + halfregs)
8309         emit_move_insn (desthi, operands[2]);
8310       if (src1 != dest)
8311         emit_move_insn (destlo, operands[1]);
8312     }
8313 }
8314
8315 /* vec_perm support.  */
8316
8317 #define MAX_VECT_LEN 16
8318
8319 struct expand_vec_perm_d
8320 {
8321   rtx target, op0, op1;
8322   unsigned char perm[MAX_VECT_LEN];
8323   enum machine_mode vmode;
8324   unsigned char nelt;
8325   bool one_vector_p;
8326   bool testing_p;
8327 };
8328
8329 /* Generate a variable permutation.  */
8330
8331 static void
8332 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8333 {
8334   enum machine_mode vmode = GET_MODE (target);
8335   bool one_vector_p = rtx_equal_p (op0, op1);
8336
8337   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8338   gcc_checking_assert (GET_MODE (op0) == vmode);
8339   gcc_checking_assert (GET_MODE (op1) == vmode);
8340   gcc_checking_assert (GET_MODE (sel) == vmode);
8341   gcc_checking_assert (TARGET_SIMD);
8342
8343   if (one_vector_p)
8344     {
8345       if (vmode == V8QImode)
8346         {
8347           /* Expand the argument to a V16QI mode by duplicating it.  */
8348           rtx pair = gen_reg_rtx (V16QImode);
8349           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8350           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8351         }
8352       else
8353         {
8354           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8355         }
8356     }
8357   else
8358     {
8359       rtx pair;
8360
8361       if (vmode == V8QImode)
8362         {
8363           pair = gen_reg_rtx (V16QImode);
8364           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8365           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8366         }
8367       else
8368         {
8369           pair = gen_reg_rtx (OImode);
8370           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8371           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8372         }
8373     }
8374 }
8375
8376 void
8377 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8378 {
8379   enum machine_mode vmode = GET_MODE (target);
8380   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8381   bool one_vector_p = rtx_equal_p (op0, op1);
8382   rtx rmask[MAX_VECT_LEN], mask;
8383
8384   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8385
8386   /* The TBL instruction does not use a modulo index, so we must take care
8387      of that ourselves.  */
8388   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8389   for (i = 0; i < nelt; ++i)
8390     rmask[i] = mask;
8391   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8392   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8393
8394   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8395 }
8396
8397 /* Recognize patterns suitable for the TRN instructions.  */
8398 static bool
8399 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8400 {
8401   unsigned int i, odd, mask, nelt = d->nelt;
8402   rtx out, in0, in1, x;
8403   rtx (*gen) (rtx, rtx, rtx);
8404   enum machine_mode vmode = d->vmode;
8405
8406   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8407     return false;
8408
8409   /* Note that these are little-endian tests.
8410      We correct for big-endian later.  */
8411   if (d->perm[0] == 0)
8412     odd = 0;
8413   else if (d->perm[0] == 1)
8414     odd = 1;
8415   else
8416     return false;
8417   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8418
8419   for (i = 0; i < nelt; i += 2)
8420     {
8421       if (d->perm[i] != i + odd)
8422         return false;
8423       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8424         return false;
8425     }
8426
8427   /* Success!  */
8428   if (d->testing_p)
8429     return true;
8430
8431   in0 = d->op0;
8432   in1 = d->op1;
8433   if (BYTES_BIG_ENDIAN)
8434     {
8435       x = in0, in0 = in1, in1 = x;
8436       odd = !odd;
8437     }
8438   out = d->target;
8439
8440   if (odd)
8441     {
8442       switch (vmode)
8443         {
8444         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8445         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8446         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8447         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8448         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8449         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8450         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8451         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8452         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8453         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8454         default:
8455           return false;
8456         }
8457     }
8458   else
8459     {
8460       switch (vmode)
8461         {
8462         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8463         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8464         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8465         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8466         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8467         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8468         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8469         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8470         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8471         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8472         default:
8473           return false;
8474         }
8475     }
8476
8477   emit_insn (gen (out, in0, in1));
8478   return true;
8479 }
8480
8481 /* Recognize patterns suitable for the UZP instructions.  */
8482 static bool
8483 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8484 {
8485   unsigned int i, odd, mask, nelt = d->nelt;
8486   rtx out, in0, in1, x;
8487   rtx (*gen) (rtx, rtx, rtx);
8488   enum machine_mode vmode = d->vmode;
8489
8490   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8491     return false;
8492
8493   /* Note that these are little-endian tests.
8494      We correct for big-endian later.  */
8495   if (d->perm[0] == 0)
8496     odd = 0;
8497   else if (d->perm[0] == 1)
8498     odd = 1;
8499   else
8500     return false;
8501   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8502
8503   for (i = 0; i < nelt; i++)
8504     {
8505       unsigned elt = (i * 2 + odd) & mask;
8506       if (d->perm[i] != elt)
8507         return false;
8508     }
8509
8510   /* Success!  */
8511   if (d->testing_p)
8512     return true;
8513
8514   in0 = d->op0;
8515   in1 = d->op1;
8516   if (BYTES_BIG_ENDIAN)
8517     {
8518       x = in0, in0 = in1, in1 = x;
8519       odd = !odd;
8520     }
8521   out = d->target;
8522
8523   if (odd)
8524     {
8525       switch (vmode)
8526         {
8527         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8528         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8529         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8530         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8531         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8532         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8533         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8534         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8535         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8536         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8537         default:
8538           return false;
8539         }
8540     }
8541   else
8542     {
8543       switch (vmode)
8544         {
8545         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8546         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8547         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8548         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8549         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8550         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8551         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8552         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8553         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8554         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8555         default:
8556           return false;
8557         }
8558     }
8559
8560   emit_insn (gen (out, in0, in1));
8561   return true;
8562 }
8563
8564 /* Recognize patterns suitable for the ZIP instructions.  */
8565 static bool
8566 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8567 {
8568   unsigned int i, high, mask, nelt = d->nelt;
8569   rtx out, in0, in1, x;
8570   rtx (*gen) (rtx, rtx, rtx);
8571   enum machine_mode vmode = d->vmode;
8572
8573   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8574     return false;
8575
8576   /* Note that these are little-endian tests.
8577      We correct for big-endian later.  */
8578   high = nelt / 2;
8579   if (d->perm[0] == high)
8580     /* Do Nothing.  */
8581     ;
8582   else if (d->perm[0] == 0)
8583     high = 0;
8584   else
8585     return false;
8586   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8587
8588   for (i = 0; i < nelt / 2; i++)
8589     {
8590       unsigned elt = (i + high) & mask;
8591       if (d->perm[i * 2] != elt)
8592         return false;
8593       elt = (elt + nelt) & mask;
8594       if (d->perm[i * 2 + 1] != elt)
8595         return false;
8596     }
8597
8598   /* Success!  */
8599   if (d->testing_p)
8600     return true;
8601
8602   in0 = d->op0;
8603   in1 = d->op1;
8604   if (BYTES_BIG_ENDIAN)
8605     {
8606       x = in0, in0 = in1, in1 = x;
8607       high = !high;
8608     }
8609   out = d->target;
8610
8611   if (high)
8612     {
8613       switch (vmode)
8614         {
8615         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8616         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8617         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8618         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8619         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8620         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8621         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8622         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8623         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8624         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8625         default:
8626           return false;
8627         }
8628     }
8629   else
8630     {
8631       switch (vmode)
8632         {
8633         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8634         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8635         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8636         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8637         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8638         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8639         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8640         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8641         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8642         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8643         default:
8644           return false;
8645         }
8646     }
8647
8648   emit_insn (gen (out, in0, in1));
8649   return true;
8650 }
8651
8652 static bool
8653 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8654 {
8655   rtx (*gen) (rtx, rtx, rtx);
8656   rtx out = d->target;
8657   rtx in0;
8658   enum machine_mode vmode = d->vmode;
8659   unsigned int i, elt, nelt = d->nelt;
8660   rtx lane;
8661
8662   /* TODO: This may not be big-endian safe.  */
8663   if (BYTES_BIG_ENDIAN)
8664     return false;
8665
8666   elt = d->perm[0];
8667   for (i = 1; i < nelt; i++)
8668     {
8669       if (elt != d->perm[i])
8670         return false;
8671     }
8672
8673   /* The generic preparation in aarch64_expand_vec_perm_const_1
8674      swaps the operand order and the permute indices if it finds
8675      d->perm[0] to be in the second operand.  Thus, we can always
8676      use d->op0 and need not do any extra arithmetic to get the
8677      correct lane number.  */
8678   in0 = d->op0;
8679   lane = GEN_INT (elt);
8680
8681   switch (vmode)
8682     {
8683     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8684     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8685     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8686     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8687     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8688     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8689     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8690     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8691     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8692     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8693     default:
8694       return false;
8695     }
8696
8697   emit_insn (gen (out, in0, lane));
8698   return true;
8699 }
8700
8701 static bool
8702 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8703 {
8704   rtx rperm[MAX_VECT_LEN], sel;
8705   enum machine_mode vmode = d->vmode;
8706   unsigned int i, nelt = d->nelt;
8707
8708   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8709      numbering of elements for big-endian, we must reverse the order.  */
8710   if (BYTES_BIG_ENDIAN)
8711     return false;
8712
8713   if (d->testing_p)
8714     return true;
8715
8716   /* Generic code will try constant permutation twice.  Once with the
8717      original mode and again with the elements lowered to QImode.
8718      So wait and don't do the selector expansion ourselves.  */
8719   if (vmode != V8QImode && vmode != V16QImode)
8720     return false;
8721
8722   for (i = 0; i < nelt; ++i)
8723     rperm[i] = GEN_INT (d->perm[i]);
8724   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8725   sel = force_reg (vmode, sel);
8726
8727   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8728   return true;
8729 }
8730
8731 static bool
8732 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8733 {
8734   /* The pattern matching functions above are written to look for a small
8735      number to begin the sequence (0, 1, N/2).  If we begin with an index
8736      from the second operand, we can swap the operands.  */
8737   if (d->perm[0] >= d->nelt)
8738     {
8739       unsigned i, nelt = d->nelt;
8740       rtx x;
8741
8742       for (i = 0; i < nelt; ++i)
8743         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8744
8745       x = d->op0;
8746       d->op0 = d->op1;
8747       d->op1 = x;
8748     }
8749
8750   if (TARGET_SIMD)
8751     {
8752       if (aarch64_evpc_zip (d))
8753         return true;
8754       else if (aarch64_evpc_uzp (d))
8755         return true;
8756       else if (aarch64_evpc_trn (d))
8757         return true;
8758       else if (aarch64_evpc_dup (d))
8759         return true;
8760       return aarch64_evpc_tbl (d);
8761     }
8762   return false;
8763 }
8764
8765 /* Expand a vec_perm_const pattern.  */
8766
8767 bool
8768 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8769 {
8770   struct expand_vec_perm_d d;
8771   int i, nelt, which;
8772
8773   d.target = target;
8774   d.op0 = op0;
8775   d.op1 = op1;
8776
8777   d.vmode = GET_MODE (target);
8778   gcc_assert (VECTOR_MODE_P (d.vmode));
8779   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8780   d.testing_p = false;
8781
8782   for (i = which = 0; i < nelt; ++i)
8783     {
8784       rtx e = XVECEXP (sel, 0, i);
8785       int ei = INTVAL (e) & (2 * nelt - 1);
8786       which |= (ei < nelt ? 1 : 2);
8787       d.perm[i] = ei;
8788     }
8789
8790   switch (which)
8791     {
8792     default:
8793       gcc_unreachable ();
8794
8795     case 3:
8796       d.one_vector_p = false;
8797       if (!rtx_equal_p (op0, op1))
8798         break;
8799
8800       /* The elements of PERM do not suggest that only the first operand
8801          is used, but both operands are identical.  Allow easier matching
8802          of the permutation by folding the permutation into the single
8803          input vector.  */
8804       /* Fall Through.  */
8805     case 2:
8806       for (i = 0; i < nelt; ++i)
8807         d.perm[i] &= nelt - 1;
8808       d.op0 = op1;
8809       d.one_vector_p = true;
8810       break;
8811
8812     case 1:
8813       d.op1 = op0;
8814       d.one_vector_p = true;
8815       break;
8816     }
8817
8818   return aarch64_expand_vec_perm_const_1 (&d);
8819 }
8820
8821 static bool
8822 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8823                                      const unsigned char *sel)
8824 {
8825   struct expand_vec_perm_d d;
8826   unsigned int i, nelt, which;
8827   bool ret;
8828
8829   d.vmode = vmode;
8830   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8831   d.testing_p = true;
8832   memcpy (d.perm, sel, nelt);
8833
8834   /* Calculate whether all elements are in one vector.  */
8835   for (i = which = 0; i < nelt; ++i)
8836     {
8837       unsigned char e = d.perm[i];
8838       gcc_assert (e < 2 * nelt);
8839       which |= (e < nelt ? 1 : 2);
8840     }
8841
8842   /* If all elements are from the second vector, reindex as if from the
8843      first vector.  */
8844   if (which == 2)
8845     for (i = 0; i < nelt; ++i)
8846       d.perm[i] -= nelt;
8847
8848   /* Check whether the mask can be applied to a single vector.  */
8849   d.one_vector_p = (which != 3);
8850
8851   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8852   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8853   if (!d.one_vector_p)
8854     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8855
8856   start_sequence ();
8857   ret = aarch64_expand_vec_perm_const_1 (&d);
8858   end_sequence ();
8859
8860   return ret;
8861 }
8862
8863 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8864 bool
8865 aarch64_cannot_change_mode_class (enum machine_mode from,
8866                                   enum machine_mode to,
8867                                   enum reg_class rclass)
8868 {
8869   /* Full-reg subregs are allowed on general regs or any class if they are
8870      the same size.  */
8871   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8872       || !reg_classes_intersect_p (FP_REGS, rclass))
8873     return false;
8874
8875   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8876      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8877      2. Scalar to Scalar for integer modes or same size float modes.
8878      3. Vector to Vector modes.  */
8879   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8880     {
8881       if (aarch64_vector_mode_supported_p (from)
8882           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8883         return false;
8884
8885       if (GET_MODE_NUNITS (from) == 1
8886           && GET_MODE_NUNITS (to) == 1
8887           && (GET_MODE_CLASS (from) == MODE_INT
8888               || from == to))
8889         return false;
8890
8891       if (aarch64_vector_mode_supported_p (from)
8892           && aarch64_vector_mode_supported_p (to))
8893         return false;
8894     }
8895
8896   return true;
8897 }
8898
8899 #undef TARGET_ADDRESS_COST
8900 #define TARGET_ADDRESS_COST aarch64_address_cost
8901
8902 /* This hook will determines whether unnamed bitfields affect the alignment
8903    of the containing structure.  The hook returns true if the structure
8904    should inherit the alignment requirements of an unnamed bitfield's
8905    type.  */
8906 #undef TARGET_ALIGN_ANON_BITFIELD
8907 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8908
8909 #undef TARGET_ASM_ALIGNED_DI_OP
8910 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8911
8912 #undef TARGET_ASM_ALIGNED_HI_OP
8913 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8914
8915 #undef TARGET_ASM_ALIGNED_SI_OP
8916 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8917
8918 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8919 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8920   hook_bool_const_tree_hwi_hwi_const_tree_true
8921
8922 #undef TARGET_ASM_FILE_START
8923 #define TARGET_ASM_FILE_START aarch64_start_file
8924
8925 #undef TARGET_ASM_OUTPUT_MI_THUNK
8926 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8927
8928 #undef TARGET_ASM_SELECT_RTX_SECTION
8929 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8930
8931 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8932 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8933
8934 #undef TARGET_BUILD_BUILTIN_VA_LIST
8935 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8936
8937 #undef TARGET_CALLEE_COPIES
8938 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8939
8940 #undef TARGET_CAN_ELIMINATE
8941 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8942
8943 #undef TARGET_CANNOT_FORCE_CONST_MEM
8944 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8945
8946 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8947 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8948
8949 /* Only the least significant bit is used for initialization guard
8950    variables.  */
8951 #undef TARGET_CXX_GUARD_MASK_BIT
8952 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8953
8954 #undef TARGET_C_MODE_FOR_SUFFIX
8955 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8956
8957 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8958 #undef  TARGET_DEFAULT_TARGET_FLAGS
8959 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8960 #endif
8961
8962 #undef TARGET_CLASS_MAX_NREGS
8963 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8964
8965 #undef TARGET_BUILTIN_DECL
8966 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8967
8968 #undef  TARGET_EXPAND_BUILTIN
8969 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8970
8971 #undef TARGET_EXPAND_BUILTIN_VA_START
8972 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8973
8974 #undef TARGET_FOLD_BUILTIN
8975 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8976
8977 #undef TARGET_FUNCTION_ARG
8978 #define TARGET_FUNCTION_ARG aarch64_function_arg
8979
8980 #undef TARGET_FUNCTION_ARG_ADVANCE
8981 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8982
8983 #undef TARGET_FUNCTION_ARG_BOUNDARY
8984 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8985
8986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8987 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8988
8989 #undef TARGET_FUNCTION_VALUE
8990 #define TARGET_FUNCTION_VALUE aarch64_function_value
8991
8992 #undef TARGET_FUNCTION_VALUE_REGNO_P
8993 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8994
8995 #undef TARGET_FRAME_POINTER_REQUIRED
8996 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8997
8998 #undef TARGET_GIMPLE_FOLD_BUILTIN
8999 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9000
9001 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9002 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9003
9004 #undef  TARGET_INIT_BUILTINS
9005 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9006
9007 #undef TARGET_LEGITIMATE_ADDRESS_P
9008 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9009
9010 #undef TARGET_LEGITIMATE_CONSTANT_P
9011 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9012
9013 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9014 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9015
9016 #undef TARGET_LRA_P
9017 #define TARGET_LRA_P aarch64_lra_p
9018
9019 #undef TARGET_MANGLE_TYPE
9020 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9021
9022 #undef TARGET_MEMORY_MOVE_COST
9023 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9024
9025 #undef TARGET_MUST_PASS_IN_STACK
9026 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9027
9028 /* This target hook should return true if accesses to volatile bitfields
9029    should use the narrowest mode possible.  It should return false if these
9030    accesses should use the bitfield container type.  */
9031 #undef TARGET_NARROW_VOLATILE_BITFIELD
9032 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9033
9034 #undef  TARGET_OPTION_OVERRIDE
9035 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9036
9037 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9038 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9039   aarch64_override_options_after_change
9040
9041 #undef TARGET_PASS_BY_REFERENCE
9042 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9043
9044 #undef TARGET_PREFERRED_RELOAD_CLASS
9045 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9046
9047 #undef TARGET_SECONDARY_RELOAD
9048 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9049
9050 #undef TARGET_SHIFT_TRUNCATION_MASK
9051 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9052
9053 #undef TARGET_SETUP_INCOMING_VARARGS
9054 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9055
9056 #undef TARGET_STRUCT_VALUE_RTX
9057 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9058
9059 #undef TARGET_REGISTER_MOVE_COST
9060 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9061
9062 #undef TARGET_RETURN_IN_MEMORY
9063 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9064
9065 #undef TARGET_RETURN_IN_MSB
9066 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9067
9068 #undef TARGET_RTX_COSTS
9069 #define TARGET_RTX_COSTS aarch64_rtx_costs
9070
9071 #undef TARGET_SCHED_ISSUE_RATE
9072 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9073
9074 #undef TARGET_TRAMPOLINE_INIT
9075 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9076
9077 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9078 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9079
9080 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9081 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9082
9083 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9084 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9085
9086 #undef TARGET_VECTORIZE_ADD_STMT_COST
9087 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9088
9089 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9090 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9091   aarch64_builtin_vectorization_cost
9092
9093 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9094 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9095
9096 #undef TARGET_VECTORIZE_BUILTINS
9097 #define TARGET_VECTORIZE_BUILTINS
9098
9099 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9100 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9101   aarch64_builtin_vectorized_function
9102
9103 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9104 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9105   aarch64_autovectorize_vector_sizes
9106
9107 /* Section anchor support.  */
9108
9109 #undef TARGET_MIN_ANCHOR_OFFSET
9110 #define TARGET_MIN_ANCHOR_OFFSET -256
9111
9112 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9113    byte offset; we can do much more for larger data types, but have no way
9114    to determine the size of the access.  We assume accesses are aligned.  */
9115 #undef TARGET_MAX_ANCHOR_OFFSET
9116 #define TARGET_MAX_ANCHOR_OFFSET 4095
9117
9118 #undef TARGET_VECTOR_ALIGNMENT
9119 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9120
9121 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9122 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9123   aarch64_simd_vector_alignment_reachable
9124
9125 /* vec_perm support.  */
9126
9127 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9128 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9129   aarch64_vectorize_vec_perm_const_ok
9130
9131
9132 #undef TARGET_FIXED_CONDITION_CODE_REGS
9133 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9134
9135 #undef TARGET_FLAGS_REGNUM
9136 #define TARGET_FLAGS_REGNUM CC_REGNUM
9137
9138 struct gcc_target targetm = TARGET_INITIALIZER;
9139
9140 #include "gt-aarch64.h"