gcc/config/aarch64/aarch64.c

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2014 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "insn-codes.h"
  26 #include "rtl.h"
  27 #include "insn-attr.h"
  28 #include "tree.h"
  29 #include "stringpool.h"
  30 #include "stor-layout.h"
  31 #include "calls.h"
  32 #include "varasm.h"
  33 #include "regs.h"
  34 #include "df.h"
  35 #include "hard-reg-set.h"
  36 #include "output.h"
  37 #include "expr.h"
  38 #include "reload.h"
  39 #include "toplev.h"
  40 #include "target.h"
  41 #include "target-def.h"
  42 #include "targhooks.h"
  43 #include "ggc.h"
  44 #include "function.h"
  45 #include "tm_p.h"
  46 #include "recog.h"
  47 #include "langhooks.h"
  48 #include "diagnostic-core.h"
  49 #include "pointer-set.h"
  50 #include "hash-table.h"
  51 #include "vec.h"
  52 #include "basic-block.h"
  53 #include "tree-ssa-alias.h"
  54 #include "internal-fn.h"
  55 #include "gimple-fold.h"
  56 #include "tree-eh.h"
  57 #include "gimple-expr.h"
  58 #include "is-a.h"
  59 #include "gimple.h"
  60 #include "gimplify.h"
  61 #include "optabs.h"
  62 #include "dwarf2.h"
  63 #include "cfgloop.h"
  64 #include "tree-vectorizer.h"
  65 #include "config/arm/aarch-cost-tables.h"
  66 #include "dumpfile.h"
  67
  68 /* Defined for convenience.  */
  69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
  70
  71 /* Classifies an address.
  72
  73    ADDRESS_REG_IMM
  74        A simple base register plus immediate offset.
  75
  76    ADDRESS_REG_WB
  77        A base register indexed by immediate offset with writeback.
  78
  79    ADDRESS_REG_REG
  80        A base register indexed by (optionally scaled) register.
  81
  82    ADDRESS_REG_UXTW
  83        A base register indexed by (optionally scaled) zero-extended register.
  84
  85    ADDRESS_REG_SXTW
  86        A base register indexed by (optionally scaled) sign-extended register.
  87
  88    ADDRESS_LO_SUM
  89        A LO_SUM rtx with a base register and "LO12" symbol relocation.
  90
  91    ADDRESS_SYMBOLIC:
  92        A constant symbolic address, in pc-relative literal pool.  */
  93
  94 enum aarch64_address_type {
  95   ADDRESS_REG_IMM,
  96   ADDRESS_REG_WB,
  97   ADDRESS_REG_REG,
  98   ADDRESS_REG_UXTW,
  99   ADDRESS_REG_SXTW,
 100   ADDRESS_LO_SUM,
 101   ADDRESS_SYMBOLIC
 102 };
 103
 104 struct aarch64_address_info {
 105   enum aarch64_address_type type;
 106   rtx base;
 107   rtx offset;
 108   int shift;
 109   enum aarch64_symbol_type symbol_type;
 110 };
 111
 112 struct simd_immediate_info
 113 {
 114   rtx value;
 115   int shift;
 116   int element_width;
 117   bool mvn;
 118   bool msl;
 119 };
 120
 121 /* The current code model.  */
 122 enum aarch64_code_model aarch64_cmodel;
 123
 124 #ifdef HAVE_AS_TLS
 125 #undef TARGET_HAVE_TLS
 126 #define TARGET_HAVE_TLS 1
 127 #endif
 128
 129 static bool aarch64_lra_p (void);
 130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
 131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 132                                                      const_tree,
 133                                                      enum machine_mode *, int *,
 134                                                      bool *);
 135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 137 static void aarch64_override_options_after_change (void);
 138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 139 static unsigned bit_count (unsigned HOST_WIDE_INT);
 140 static bool aarch64_const_vec_all_same_int_p (rtx,
 141                                               HOST_WIDE_INT, HOST_WIDE_INT);
 142
 143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 144                                                  const unsigned char *sel);
 145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 146
 147 /* The processor for which instructions should be scheduled.  */
 148 enum aarch64_processor aarch64_tune = cortexa53;
 149
 150 /* The current tuning set.  */
 151 const struct tune_params *aarch64_tune_params;
 152
 153 /* Mask to specify which instructions we are allowed to generate.  */
 154 unsigned long aarch64_isa_flags = 0;
 155
 156 /* Mask to specify which instruction scheduling options should be used.  */
 157 unsigned long aarch64_tune_flags = 0;
 158
 159 /* Tuning parameters.  */
 160
 161 #if HAVE_DESIGNATED_INITIALIZERS
 162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
 163 #else
 164 #define NAMED_PARAM(NAME, VAL) (VAL)
 165 #endif
 166
 167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 168 __extension__
 169 #endif
 170
 171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 172 __extension__
 173 #endif
 174 static const struct cpu_addrcost_table generic_addrcost_table =
 175 {
 176 #if HAVE_DESIGNATED_INITIALIZERS
 177   .addr_scale_costs =
 178 #endif
 179     {
 180       NAMED_PARAM (qi, 0),
 181       NAMED_PARAM (hi, 0),
 182       NAMED_PARAM (si, 0),
 183       NAMED_PARAM (ti, 0),
 184     },
 185   NAMED_PARAM (pre_modify, 0),
 186   NAMED_PARAM (post_modify, 0),
 187   NAMED_PARAM (register_offset, 0),
 188   NAMED_PARAM (register_extend, 0),
 189   NAMED_PARAM (imm_offset, 0)
 190 };
 191
 192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 193 __extension__
 194 #endif
 195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
 196 {
 197 #if HAVE_DESIGNATED_INITIALIZERS
 198   .addr_scale_costs =
 199 #endif
 200     {
 201       NAMED_PARAM (qi, 0),
 202       NAMED_PARAM (hi, 1),
 203       NAMED_PARAM (si, 0),
 204       NAMED_PARAM (ti, 1),
 205     },
 206   NAMED_PARAM (pre_modify, 0),
 207   NAMED_PARAM (post_modify, 0),
 208   NAMED_PARAM (register_offset, 0),
 209   NAMED_PARAM (register_extend, 0),
 210   NAMED_PARAM (imm_offset, 0),
 211 };
 212
 213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 214 __extension__
 215 #endif
 216 static const struct cpu_regmove_cost generic_regmove_cost =
 217 {
 218   NAMED_PARAM (GP2GP, 1),
 219   NAMED_PARAM (GP2FP, 2),
 220   NAMED_PARAM (FP2GP, 2),
 221   /* We currently do not provide direct support for TFmode Q->Q move.
 222      Therefore we need to raise the cost above 2 in order to have
 223      reload handle the situation.  */
 224   NAMED_PARAM (FP2FP, 4)
 225 };
 226
 227 /* Generic costs for vector insn classes.  */
 228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 229 __extension__
 230 #endif
 231 static const struct cpu_vector_cost generic_vector_cost =
 232 {
 233   NAMED_PARAM (scalar_stmt_cost, 1),
 234   NAMED_PARAM (scalar_load_cost, 1),
 235   NAMED_PARAM (scalar_store_cost, 1),
 236   NAMED_PARAM (vec_stmt_cost, 1),
 237   NAMED_PARAM (vec_to_scalar_cost, 1),
 238   NAMED_PARAM (scalar_to_vec_cost, 1),
 239   NAMED_PARAM (vec_align_load_cost, 1),
 240   NAMED_PARAM (vec_unalign_load_cost, 1),
 241   NAMED_PARAM (vec_unalign_store_cost, 1),
 242   NAMED_PARAM (vec_store_cost, 1),
 243   NAMED_PARAM (cond_taken_branch_cost, 3),
 244   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 245 };
 246
 247 /* Generic costs for vector insn classes.  */
 248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 249 __extension__
 250 #endif
 251 static const struct cpu_vector_cost cortexa57_vector_cost =
 252 {
 253   NAMED_PARAM (scalar_stmt_cost, 1),
 254   NAMED_PARAM (scalar_load_cost, 4),
 255   NAMED_PARAM (scalar_store_cost, 1),
 256   NAMED_PARAM (vec_stmt_cost, 3),
 257   NAMED_PARAM (vec_to_scalar_cost, 8),
 258   NAMED_PARAM (scalar_to_vec_cost, 8),
 259   NAMED_PARAM (vec_align_load_cost, 5),
 260   NAMED_PARAM (vec_unalign_load_cost, 5),
 261   NAMED_PARAM (vec_unalign_store_cost, 1),
 262   NAMED_PARAM (vec_store_cost, 1),
 263   NAMED_PARAM (cond_taken_branch_cost, 1),
 264   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 265 };
 266
 267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 268 __extension__
 269 #endif
 270 static const struct tune_params generic_tunings =
 271 {
 272   &cortexa57_extra_costs,
 273   &generic_addrcost_table,
 274   &generic_regmove_cost,
 275   &generic_vector_cost,
 276   NAMED_PARAM (memmov_cost, 4),
 277   NAMED_PARAM (issue_rate, 2)
 278 };
 279
 280 static const struct tune_params cortexa53_tunings =
 281 {
 282   &cortexa53_extra_costs,
 283   &generic_addrcost_table,
 284   &generic_regmove_cost,
 285   &generic_vector_cost,
 286   NAMED_PARAM (memmov_cost, 4),
 287   NAMED_PARAM (issue_rate, 2)
 288 };
 289
 290 static const struct tune_params cortexa57_tunings =
 291 {
 292   &cortexa57_extra_costs,
 293   &cortexa57_addrcost_table,
 294   &generic_regmove_cost,
 295   &cortexa57_vector_cost,
 296   NAMED_PARAM (memmov_cost, 4),
 297   NAMED_PARAM (issue_rate, 3)
 298 };
 299
 300 /* A processor implementing AArch64.  */
 301 struct processor
 302 {
 303   const char *const name;
 304   enum aarch64_processor core;
 305   const char *arch;
 306   const unsigned long flags;
 307   const struct tune_params *const tune;
 308 };
 309
 310 /* Processor cores implementing AArch64.  */
 311 static const struct processor all_cores[] =
 312 {
 313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
 314   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
 315 #include "aarch64-cores.def"
 316 #undef AARCH64_CORE
 317   {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
 318   {NULL, aarch64_none, NULL, 0, NULL}
 319 };
 320
 321 /* Architectures implementing AArch64.  */
 322 static const struct processor all_architectures[] =
 323 {
 324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
 325   {NAME, CORE, #ARCH, FLAGS, NULL},
 326 #include "aarch64-arches.def"
 327 #undef AARCH64_ARCH
 328   {NULL, aarch64_none, NULL, 0, NULL}
 329 };
 330
 331 /* Target specification.  These are populated as commandline arguments
 332    are processed, or NULL if not specified.  */
 333 static const struct processor *selected_arch;
 334 static const struct processor *selected_cpu;
 335 static const struct processor *selected_tune;
 336
 337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
 338
 339 /* An ISA extension in the co-processor and main instruction set space.  */
 340 struct aarch64_option_extension
 341 {
 342   const char *const name;
 343   const unsigned long flags_on;
 344   const unsigned long flags_off;
 345 };
 346
 347 /* ISA extensions in AArch64.  */
 348 static const struct aarch64_option_extension all_extensions[] =
 349 {
 350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
 351   {NAME, FLAGS_ON, FLAGS_OFF},
 352 #include "aarch64-option-extensions.def"
 353 #undef AARCH64_OPT_EXTENSION
 354   {NULL, 0, 0}
 355 };
 356
 357 /* Used to track the size of an address when generating a pre/post
 358    increment address.  */
 359 static enum machine_mode aarch64_memory_reference_mode;
 360
 361 /* Used to force GTY into this file.  */
 362 static GTY(()) int gty_dummy;
 363
 364 /* A table of valid AArch64 "bitmask immediate" values for
 365    logical instructions.  */
 366
 367 #define AARCH64_NUM_BITMASKS  5334
 368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
 369
 370 typedef enum aarch64_cond_code
 371 {
 372   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
 373   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
 374   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
 375 }
 376 aarch64_cc;
 377
 378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 379
 380 /* The condition codes of the processor, and the inverse function.  */
 381 static const char * const aarch64_condition_codes[] =
 382 {
 383   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 384   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 385 };
 386
 387 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 388 unsigned
 389 aarch64_dbx_register_number (unsigned regno)
 390 {
 391    if (GP_REGNUM_P (regno))
 392      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
 393    else if (regno == SP_REGNUM)
 394      return AARCH64_DWARF_SP;
 395    else if (FP_REGNUM_P (regno))
 396      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
 397
 398    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
 399       equivalent DWARF register.  */
 400    return DWARF_FRAME_REGISTERS;
 401 }
 402
 403 /* Return TRUE if MODE is any of the large INT modes.  */
 404 static bool
 405 aarch64_vect_struct_mode_p (enum machine_mode mode)
 406 {
 407   return mode == OImode || mode == CImode || mode == XImode;
 408 }
 409
 410 /* Return TRUE if MODE is any of the vector modes.  */
 411 static bool
 412 aarch64_vector_mode_p (enum machine_mode mode)
 413 {
 414   return aarch64_vector_mode_supported_p (mode)
 415          || aarch64_vect_struct_mode_p (mode);
 416 }
 417
 418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
 419 static bool
 420 aarch64_array_mode_supported_p (enum machine_mode mode,
 421                                 unsigned HOST_WIDE_INT nelems)
 422 {
 423   if (TARGET_SIMD
 424       && AARCH64_VALID_SIMD_QREG_MODE (mode)
 425       && (nelems >= 2 && nelems <= 4))
 426     return true;
 427
 428   return false;
 429 }
 430
 431 /* Implement HARD_REGNO_NREGS.  */
 432
 433 int
 434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
 435 {
 436   switch (aarch64_regno_regclass (regno))
 437     {
 438     case FP_REGS:
 439     case FP_LO_REGS:
 440       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
 441     default:
 442       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 443     }
 444   gcc_unreachable ();
 445 }
 446
 447 /* Implement HARD_REGNO_MODE_OK.  */
 448
 449 int
 450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
 451 {
 452   if (GET_MODE_CLASS (mode) == MODE_CC)
 453     return regno == CC_REGNUM;
 454
 455   if (regno == SP_REGNUM)
 456     /* The purpose of comparing with ptr_mode is to support the
 457        global register variable associated with the stack pointer
 458        register via the syntax of asm ("wsp") in ILP32.  */
 459     return mode == Pmode || mode == ptr_mode;
 460
 461   if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
 462     return mode == Pmode;
 463
 464   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
 465     return 1;
 466
 467   if (FP_REGNUM_P (regno))
 468     {
 469       if (aarch64_vect_struct_mode_p (mode))
 470         return
 471           (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
 472       else
 473         return 1;
 474     }
 475
 476   return 0;
 477 }
 478
 479 /* Return true if calls to DECL should be treated as
 480    long-calls (ie called via a register).  */
 481 static bool
 482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
 483 {
 484   return false;
 485 }
 486
 487 /* Return true if calls to symbol-ref SYM should be treated as
 488    long-calls (ie called via a register).  */
 489 bool
 490 aarch64_is_long_call_p (rtx sym)
 491 {
 492   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 493 }
 494
 495 /* Return true if the offsets to a zero/sign-extract operation
 496    represent an expression that matches an extend operation.  The
 497    operands represent the paramters from
 498
 499    (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 500 bool
 501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 502                                 rtx extract_imm)
 503 {
 504   HOST_WIDE_INT mult_val, extract_val;
 505
 506   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
 507     return false;
 508
 509   mult_val = INTVAL (mult_imm);
 510   extract_val = INTVAL (extract_imm);
 511
 512   if (extract_val > 8
 513       && extract_val < GET_MODE_BITSIZE (mode)
 514       && exact_log2 (extract_val & ~7) > 0
 515       && (extract_val & 7) <= 4
 516       && mult_val == (1 << (extract_val & 7)))
 517     return true;
 518
 519   return false;
 520 }
 521
 522 /* Emit an insn that's a simple single-set.  Both the operands must be
 523    known to be valid.  */
 524 inline static rtx
 525 emit_set_insn (rtx x, rtx y)
 526 {
 527   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
 528 }
 529
 530 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 531    return the rtx for register 0 in the proper mode.  */
 532 rtx
 533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 534 {
 535   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 536   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
 537
 538   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
 539   return cc_reg;
 540 }
 541
 542 /* Build the SYMBOL_REF for __tls_get_addr.  */
 543
 544 static GTY(()) rtx tls_get_addr_libfunc;
 545
 546 rtx
 547 aarch64_tls_get_addr (void)
 548 {
 549   if (!tls_get_addr_libfunc)
 550     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
 551   return tls_get_addr_libfunc;
 552 }
 553
 554 /* Return the TLS model to use for ADDR.  */
 555
 556 static enum tls_model
 557 tls_symbolic_operand_type (rtx addr)
 558 {
 559   enum tls_model tls_kind = TLS_MODEL_NONE;
 560   rtx sym, addend;
 561
 562   if (GET_CODE (addr) == CONST)
 563     {
 564       split_const (addr, &sym, &addend);
 565       if (GET_CODE (sym) == SYMBOL_REF)
 566         tls_kind = SYMBOL_REF_TLS_MODEL (sym);
 567     }
 568   else if (GET_CODE (addr) == SYMBOL_REF)
 569     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 570
 571   return tls_kind;
 572 }
 573
 574 /* We'll allow lo_sum's in addresses in our legitimate addresses
 575    so that combine would take care of combining addresses where
 576    necessary, but for generation purposes, we'll generate the address
 577    as :
 578    RTL                               Absolute
 579    tmp = hi (symbol_ref);            adrp  x1, foo
 580    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
 581                                      nop
 582
 583    PIC                               TLS
 584    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
 585    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
 586                                      bl   __tls_get_addr
 587                                      nop
 588
 589    Load TLS symbol, depending on TLS mechanism and TLS access model.
 590
 591    Global Dynamic - Traditional TLS:
 592    adrp tmp, :tlsgd:imm
 593    add  dest, tmp, #:tlsgd_lo12:imm
 594    bl   __tls_get_addr
 595
 596    Global Dynamic - TLS Descriptors:
 597    adrp dest, :tlsdesc:imm
 598    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
 599    add  dest, dest, #:tlsdesc_lo12:imm
 600    blr  tmp
 601    mrs  tp, tpidr_el0
 602    add  dest, dest, tp
 603
 604    Initial Exec:
 605    mrs  tp, tpidr_el0
 606    adrp tmp, :gottprel:imm
 607    ldr  dest, [tmp, #:gottprel_lo12:imm]
 608    add  dest, dest, tp
 609
 610    Local Exec:
 611    mrs  tp, tpidr_el0
 612    add  t0, tp, #:tprel_hi12:imm
 613    add  t0, #:tprel_lo12_nc:imm
 614 */
 615
 616 static void
 617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
 618                                    enum aarch64_symbol_type type)
 619 {
 620   switch (type)
 621     {
 622     case SYMBOL_SMALL_ABSOLUTE:
 623       {
 624         /* In ILP32, the mode of dest can be either SImode or DImode.  */
 625         rtx tmp_reg = dest;
 626         enum machine_mode mode = GET_MODE (dest);
 627
 628         gcc_assert (mode == Pmode || mode == ptr_mode);
 629
 630         if (can_create_pseudo_p ())
 631           tmp_reg = gen_reg_rtx (mode);
 632
 633         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 634         emit_insn (gen_add_losym (dest, tmp_reg, imm));
 635         return;
 636       }
 637
 638     case SYMBOL_TINY_ABSOLUTE:
 639       emit_insn (gen_rtx_SET (Pmode, dest, imm));
 640       return;
 641
 642     case SYMBOL_SMALL_GOT:
 643       {
 644         /* In ILP32, the mode of dest can be either SImode or DImode,
 645            while the got entry is always of SImode size.  The mode of
 646            dest depends on how dest is used: if dest is assigned to a
 647            pointer (e.g. in the memory), it has SImode; it may have
 648            DImode if dest is dereferenced to access the memeory.
 649            This is why we have to handle three different ldr_got_small
 650            patterns here (two patterns for ILP32).  */
 651         rtx tmp_reg = dest;
 652         enum machine_mode mode = GET_MODE (dest);
 653
 654         if (can_create_pseudo_p ())
 655           tmp_reg = gen_reg_rtx (mode);
 656
 657         emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 658         if (mode == ptr_mode)
 659           {
 660             if (mode == DImode)
 661               emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
 662             else
 663               emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
 664           }
 665         else
 666           {
 667             gcc_assert (mode == Pmode);
 668             emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
 669           }
 670
 671         return;
 672       }
 673
 674     case SYMBOL_SMALL_TLSGD:
 675       {
 676         rtx insns;
 677         rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
 678
 679         start_sequence ();
 680         emit_call_insn (gen_tlsgd_small (result, imm));
 681         insns = get_insns ();
 682         end_sequence ();
 683
 684         RTL_CONST_CALL_P (insns) = 1;
 685         emit_libcall_block (insns, dest, result, imm);
 686         return;
 687       }
 688
 689     case SYMBOL_SMALL_TLSDESC:
 690       {
 691         rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
 692         rtx tp;
 693
 694         emit_insn (gen_tlsdesc_small (imm));
 695         tp = aarch64_load_tp (NULL);
 696         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
 697         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 698         return;
 699       }
 700
 701     case SYMBOL_SMALL_GOTTPREL:
 702       {
 703         rtx tmp_reg = gen_reg_rtx (Pmode);
 704         rtx tp = aarch64_load_tp (NULL);
 705         emit_insn (gen_tlsie_small (tmp_reg, imm));
 706         emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
 707         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 708         return;
 709       }
 710
 711     case SYMBOL_SMALL_TPREL:
 712       {
 713         rtx tp = aarch64_load_tp (NULL);
 714         emit_insn (gen_tlsle_small (dest, tp, imm));
 715         set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 716         return;
 717       }
 718
 719     case SYMBOL_TINY_GOT:
 720       emit_insn (gen_ldr_got_tiny (dest, imm));
 721       return;
 722
 723     default:
 724       gcc_unreachable ();
 725     }
 726 }
 727
 728 /* Emit a move from SRC to DEST.  Assume that the move expanders can
 729    handle all moves if !can_create_pseudo_p ().  The distinction is
 730    important because, unlike emit_move_insn, the move expanders know
 731    how to force Pmode objects into the constant pool even when the
 732    constant pool address is not itself legitimate.  */
 733 static rtx
 734 aarch64_emit_move (rtx dest, rtx src)
 735 {
 736   return (can_create_pseudo_p ()
 737           ? emit_move_insn (dest, src)
 738           : emit_move_insn_1 (dest, src));
 739 }
 740
 741 /* Split a 128-bit move operation into two 64-bit move operations,
 742    taking care to handle partial overlap of register to register
 743    copies.  Special cases are needed when moving between GP regs and
 744    FP regs.  SRC can be a register, constant or memory; DST a register
 745    or memory.  If either operand is memory it must not have any side
 746    effects.  */
 747 void
 748 aarch64_split_128bit_move (rtx dst, rtx src)
 749 {
 750   rtx dst_lo, dst_hi;
 751   rtx src_lo, src_hi;
 752
 753   enum machine_mode mode = GET_MODE (dst);
 754
 755   gcc_assert (mode == TImode || mode == TFmode);
 756   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
 757   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 758
 759   if (REG_P (dst) && REG_P (src))
 760     {
 761       int src_regno = REGNO (src);
 762       int dst_regno = REGNO (dst);
 763
 764       /* Handle FP <-> GP regs.  */
 765       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
 766         {
 767           src_lo = gen_lowpart (word_mode, src);
 768           src_hi = gen_highpart (word_mode, src);
 769
 770           if (mode == TImode)
 771             {
 772               emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
 773               emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
 774             }
 775           else
 776             {
 777               emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
 778               emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
 779             }
 780           return;
 781         }
 782       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
 783         {
 784           dst_lo = gen_lowpart (word_mode, dst);
 785           dst_hi = gen_highpart (word_mode, dst);
 786
 787           if (mode == TImode)
 788             {
 789               emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
 790               emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
 791             }
 792           else
 793             {
 794               emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
 795               emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
 796             }
 797           return;
 798         }
 799     }
 800
 801   dst_lo = gen_lowpart (word_mode, dst);
 802   dst_hi = gen_highpart (word_mode, dst);
 803   src_lo = gen_lowpart (word_mode, src);
 804   src_hi = gen_highpart_mode (word_mode, mode, src);
 805
 806   /* At most one pairing may overlap.  */
 807   if (reg_overlap_mentioned_p (dst_lo, src_hi))
 808     {
 809       aarch64_emit_move (dst_hi, src_hi);
 810       aarch64_emit_move (dst_lo, src_lo);
 811     }
 812   else
 813     {
 814       aarch64_emit_move (dst_lo, src_lo);
 815       aarch64_emit_move (dst_hi, src_hi);
 816     }
 817 }
 818
 819 bool
 820 aarch64_split_128bit_move_p (rtx dst, rtx src)
 821 {
 822   return (! REG_P (src)
 823           || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 824 }
 825
 826 /* Split a complex SIMD combine.  */
 827
 828 void
 829 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
 830 {
 831   enum machine_mode src_mode = GET_MODE (src1);
 832   enum machine_mode dst_mode = GET_MODE (dst);
 833
 834   gcc_assert (VECTOR_MODE_P (dst_mode));
 835
 836   if (REG_P (dst) && REG_P (src1) && REG_P (src2))
 837     {
 838       rtx (*gen) (rtx, rtx, rtx);
 839
 840       switch (src_mode)
 841         {
 842         case V8QImode:
 843           gen = gen_aarch64_simd_combinev8qi;
 844           break;
 845         case V4HImode:
 846           gen = gen_aarch64_simd_combinev4hi;
 847           break;
 848         case V2SImode:
 849           gen = gen_aarch64_simd_combinev2si;
 850           break;
 851         case V2SFmode:
 852           gen = gen_aarch64_simd_combinev2sf;
 853           break;
 854         case DImode:
 855           gen = gen_aarch64_simd_combinedi;
 856           break;
 857         case DFmode:
 858           gen = gen_aarch64_simd_combinedf;
 859           break;
 860         default:
 861           gcc_unreachable ();
 862         }
 863
 864       emit_insn (gen (dst, src1, src2));
 865       return;
 866     }
 867 }
 868
 869 /* Split a complex SIMD move.  */
 870
 871 void
 872 aarch64_split_simd_move (rtx dst, rtx src)
 873 {
 874   enum machine_mode src_mode = GET_MODE (src);
 875   enum machine_mode dst_mode = GET_MODE (dst);
 876
 877   gcc_assert (VECTOR_MODE_P (dst_mode));
 878
 879   if (REG_P (dst) && REG_P (src))
 880     {
 881       rtx (*gen) (rtx, rtx);
 882
 883       gcc_assert (VECTOR_MODE_P (src_mode));
 884
 885       switch (src_mode)
 886         {
 887         case V16QImode:
 888           gen = gen_aarch64_split_simd_movv16qi;
 889           break;
 890         case V8HImode:
 891           gen = gen_aarch64_split_simd_movv8hi;
 892           break;
 893         case V4SImode:
 894           gen = gen_aarch64_split_simd_movv4si;
 895           break;
 896         case V2DImode:
 897           gen = gen_aarch64_split_simd_movv2di;
 898           break;
 899         case V4SFmode:
 900           gen = gen_aarch64_split_simd_movv4sf;
 901           break;
 902         case V2DFmode:
 903           gen = gen_aarch64_split_simd_movv2df;
 904           break;
 905         default:
 906           gcc_unreachable ();
 907         }
 908
 909       emit_insn (gen (dst, src));
 910       return;
 911     }
 912 }
 913
 914 static rtx
 915 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 916 {
 917   if (can_create_pseudo_p ())
 918     return force_reg (mode, value);
 919   else
 920     {
 921       x = aarch64_emit_move (x, value);
 922       return x;
 923     }
 924 }
 925
 926
 927 static rtx
 928 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 929 {
 930   if (!aarch64_plus_immediate (GEN_INT (offset), mode))
 931     {
 932       rtx high;
 933       /* Load the full offset into a register.  This
 934          might be improvable in the future.  */
 935       high = GEN_INT (offset);
 936       offset = 0;
 937       high = aarch64_force_temporary (mode, temp, high);
 938       reg = aarch64_force_temporary (mode, temp,
 939                                      gen_rtx_PLUS (mode, high, reg));
 940     }
 941   return plus_constant (mode, reg, offset);
 942 }
 943
 944 void
 945 aarch64_expand_mov_immediate (rtx dest, rtx imm)
 946 {
 947   enum machine_mode mode = GET_MODE (dest);
 948   unsigned HOST_WIDE_INT mask;
 949   int i;
 950   bool first;
 951   unsigned HOST_WIDE_INT val;
 952   bool subtargets;
 953   rtx subtarget;
 954   int one_match, zero_match;
 955
 956   gcc_assert (mode == SImode || mode == DImode);
 957
 958   /* Check on what type of symbol it is.  */
 959   if (GET_CODE (imm) == SYMBOL_REF
 960       || GET_CODE (imm) == LABEL_REF
 961       || GET_CODE (imm) == CONST)
 962     {
 963       rtx mem, base, offset;
 964       enum aarch64_symbol_type sty;
 965
 966       /* If we have (const (plus symbol offset)), separate out the offset
 967          before we start classifying the symbol.  */
 968       split_const (imm, &base, &offset);
 969
 970       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
 971       switch (sty)
 972         {
 973         case SYMBOL_FORCE_TO_MEM:
 974           if (offset != const0_rtx
 975               && targetm.cannot_force_const_mem (mode, imm))
 976             {
 977               gcc_assert (can_create_pseudo_p ());
 978               base = aarch64_force_temporary (mode, dest, base);
 979               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 980               aarch64_emit_move (dest, base);
 981               return;
 982             }
 983           mem = force_const_mem (ptr_mode, imm);
 984           gcc_assert (mem);
 985           if (mode != ptr_mode)
 986             mem = gen_rtx_ZERO_EXTEND (mode, mem);
 987           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
 988           return;
 989
 990         case SYMBOL_SMALL_TLSGD:
 991         case SYMBOL_SMALL_TLSDESC:
 992         case SYMBOL_SMALL_GOTTPREL:
 993         case SYMBOL_SMALL_GOT:
 994         case SYMBOL_TINY_GOT:
 995           if (offset != const0_rtx)
 996             {
 997               gcc_assert(can_create_pseudo_p ());
 998               base = aarch64_force_temporary (mode, dest, base);
 999               base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1000               aarch64_emit_move (dest, base);
1001               return;
1002             }
1003           /* FALLTHRU */
1004
1005         case SYMBOL_SMALL_TPREL:
1006         case SYMBOL_SMALL_ABSOLUTE:
1007         case SYMBOL_TINY_ABSOLUTE:
1008           aarch64_load_symref_appropriately (dest, imm, sty);
1009           return;
1010
1011         default:
1012           gcc_unreachable ();
1013         }
1014     }
1015
1016   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1017     {
1018       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1019       return;
1020     }
1021
1022   if (!CONST_INT_P (imm))
1023     {
1024       if (GET_CODE (imm) == HIGH)
1025         emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1026       else
1027         {
1028           rtx mem = force_const_mem (mode, imm);
1029           gcc_assert (mem);
1030           emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1031         }
1032
1033       return;
1034     }
1035
1036   if (mode == SImode)
1037     {
1038       /* We know we can't do this in 1 insn, and we must be able to do it
1039          in two; so don't mess around looking for sequences that don't buy
1040          us anything.  */
1041       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1042       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1043                                  GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1044       return;
1045     }
1046
1047   /* Remaining cases are all for DImode.  */
1048
1049   val = INTVAL (imm);
1050   subtargets = optimize && can_create_pseudo_p ();
1051
1052   one_match = 0;
1053   zero_match = 0;
1054   mask = 0xffff;
1055
1056   for (i = 0; i < 64; i += 16, mask <<= 16)
1057     {
1058       if ((val & mask) == 0)
1059         zero_match++;
1060       else if ((val & mask) == mask)
1061         one_match++;
1062     }
1063
1064   if (one_match == 2)
1065     {
1066       mask = 0xffff;
1067       for (i = 0; i < 64; i += 16, mask <<= 16)
1068         {
1069           if ((val & mask) != mask)
1070             {
1071               emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1072               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1073                                          GEN_INT ((val >> i) & 0xffff)));
1074               return;
1075             }
1076         }
1077       gcc_unreachable ();
1078     }
1079
1080   if (zero_match == 2)
1081     goto simple_sequence;
1082
1083   mask = 0x0ffff0000UL;
1084   for (i = 16; i < 64; i += 16, mask <<= 16)
1085     {
1086       HOST_WIDE_INT comp = mask & ~(mask - 1);
1087
1088       if (aarch64_uimm12_shift (val - (val & mask)))
1089         {
1090           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1091
1092           emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1093           emit_insn (gen_adddi3 (dest, subtarget,
1094                                  GEN_INT (val - (val & mask))));
1095           return;
1096         }
1097       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1098         {
1099           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1100
1101           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1102                                   GEN_INT ((val + comp) & mask)));
1103           emit_insn (gen_adddi3 (dest, subtarget,
1104                                  GEN_INT (val - ((val + comp) & mask))));
1105           return;
1106         }
1107       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1108         {
1109           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1110
1111           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1112                                   GEN_INT ((val - comp) | ~mask)));
1113           emit_insn (gen_adddi3 (dest, subtarget,
1114                                  GEN_INT (val - ((val - comp) | ~mask))));
1115           return;
1116         }
1117       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1118         {
1119           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1120
1121           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1122                                   GEN_INT (val | ~mask)));
1123           emit_insn (gen_adddi3 (dest, subtarget,
1124                                  GEN_INT (val - (val | ~mask))));
1125           return;
1126         }
1127     }
1128
1129   /* See if we can do it by arithmetically combining two
1130      immediates.  */
1131   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1132     {
1133       int j;
1134       mask = 0xffff;
1135
1136       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1137           || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1138         {
1139           subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1140           emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1141                                   GEN_INT (aarch64_bitmasks[i])));
1142           emit_insn (gen_adddi3 (dest, subtarget,
1143                                  GEN_INT (val - aarch64_bitmasks[i])));
1144           return;
1145         }
1146
1147       for (j = 0; j < 64; j += 16, mask <<= 16)
1148         {
1149           if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1150             {
1151               emit_insn (gen_rtx_SET (VOIDmode, dest,
1152                                       GEN_INT (aarch64_bitmasks[i])));
1153               emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1154                                          GEN_INT ((val >> j) & 0xffff)));
1155               return;
1156             }
1157         }
1158     }
1159
1160   /* See if we can do it by logically combining two immediates.  */
1161   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1162     {
1163       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1164         {
1165           int j;
1166
1167           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1168             if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1169               {
1170                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1171                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1172                                         GEN_INT (aarch64_bitmasks[i])));
1173                 emit_insn (gen_iordi3 (dest, subtarget,
1174                                        GEN_INT (aarch64_bitmasks[j])));
1175                 return;
1176               }
1177         }
1178       else if ((val & aarch64_bitmasks[i]) == val)
1179         {
1180           int j;
1181
1182           for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1183             if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1184               {
1185
1186                 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1187                 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1188                                         GEN_INT (aarch64_bitmasks[j])));
1189                 emit_insn (gen_anddi3 (dest, subtarget,
1190                                        GEN_INT (aarch64_bitmasks[i])));
1191                 return;
1192               }
1193         }
1194     }
1195
1196  simple_sequence:
1197   first = true;
1198   mask = 0xffff;
1199   for (i = 0; i < 64; i += 16, mask <<= 16)
1200     {
1201       if ((val & mask) != 0)
1202         {
1203           if (first)
1204             {
1205               emit_insn (gen_rtx_SET (VOIDmode, dest,
1206                                       GEN_INT (val & mask)));
1207               first = false;
1208             }
1209           else
1210             emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1211                                        GEN_INT ((val >> i) & 0xffff)));
1212         }
1213     }
1214 }
1215
1216 static bool
1217 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1218 {
1219   /* Indirect calls are not currently supported.  */
1220   if (decl == NULL)
1221     return false;
1222
1223   /* Cannot tail-call to long-calls, since these are outside of the
1224      range of a branch instruction (we could handle this if we added
1225      support for indirect tail-calls.  */
1226   if (aarch64_decl_is_long_call_p (decl))
1227     return false;
1228
1229   return true;
1230 }
1231
1232 /* Implement TARGET_PASS_BY_REFERENCE.  */
1233
1234 static bool
1235 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1236                            enum machine_mode mode,
1237                            const_tree type,
1238                            bool named ATTRIBUTE_UNUSED)
1239 {
1240   HOST_WIDE_INT size;
1241   enum machine_mode dummymode;
1242   int nregs;
1243
1244   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
1245   size = (mode == BLKmode && type)
1246     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1247
1248   /* Aggregates are passed by reference based on their size.  */
1249   if (type && AGGREGATE_TYPE_P (type))
1250     {
1251       size = int_size_in_bytes (type);
1252     }
1253
1254   /* Variable sized arguments are always returned by reference.  */
1255   if (size < 0)
1256     return true;
1257
1258   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1259   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1260                                                &dummymode, &nregs,
1261                                                NULL))
1262     return false;
1263
1264   /* Arguments which are variable sized or larger than 2 registers are
1265      passed by reference unless they are a homogenous floating point
1266      aggregate.  */
1267   return size > 2 * UNITS_PER_WORD;
1268 }
1269
1270 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1271 static bool
1272 aarch64_return_in_msb (const_tree valtype)
1273 {
1274   enum machine_mode dummy_mode;
1275   int dummy_int;
1276
1277   /* Never happens in little-endian mode.  */
1278   if (!BYTES_BIG_ENDIAN)
1279     return false;
1280
1281   /* Only composite types smaller than or equal to 16 bytes can
1282      be potentially returned in registers.  */
1283   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1284       || int_size_in_bytes (valtype) <= 0
1285       || int_size_in_bytes (valtype) > 16)
1286     return false;
1287
1288   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1289      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1290      is always passed/returned in the least significant bits of fp/simd
1291      register(s).  */
1292   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1293                                                &dummy_mode, &dummy_int, NULL))
1294     return false;
1295
1296   return true;
1297 }
1298
1299 /* Implement TARGET_FUNCTION_VALUE.
1300    Define how to find the value returned by a function.  */
1301
1302 static rtx
1303 aarch64_function_value (const_tree type, const_tree func,
1304                         bool outgoing ATTRIBUTE_UNUSED)
1305 {
1306   enum machine_mode mode;
1307   int unsignedp;
1308   int count;
1309   enum machine_mode ag_mode;
1310
1311   mode = TYPE_MODE (type);
1312   if (INTEGRAL_TYPE_P (type))
1313     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1314
1315   if (aarch64_return_in_msb (type))
1316     {
1317       HOST_WIDE_INT size = int_size_in_bytes (type);
1318
1319       if (size % UNITS_PER_WORD != 0)
1320         {
1321           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1322           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1323         }
1324     }
1325
1326   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1327                                                &ag_mode, &count, NULL))
1328     {
1329       if (!aarch64_composite_type_p (type, mode))
1330         {
1331           gcc_assert (count == 1 && mode == ag_mode);
1332           return gen_rtx_REG (mode, V0_REGNUM);
1333         }
1334       else
1335         {
1336           int i;
1337           rtx par;
1338
1339           par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1340           for (i = 0; i < count; i++)
1341             {
1342               rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1343               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1344                                        GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1345               XVECEXP (par, 0, i) = tmp;
1346             }
1347           return par;
1348         }
1349     }
1350   else
1351     return gen_rtx_REG (mode, R0_REGNUM);
1352 }
1353
1354 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1355    Return true if REGNO is the number of a hard register in which the values
1356    of called function may come back.  */
1357
1358 static bool
1359 aarch64_function_value_regno_p (const unsigned int regno)
1360 {
1361   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1362      of 16-byte return values are: 128-bit integers and 16-byte small
1363      structures (excluding homogeneous floating-point aggregates).  */
1364   if (regno == R0_REGNUM || regno == R1_REGNUM)
1365     return true;
1366
1367   /* Up to four fp/simd registers can return a function value, e.g. a
1368      homogeneous floating-point aggregate having four members.  */
1369   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1370     return !TARGET_GENERAL_REGS_ONLY;
1371
1372   return false;
1373 }
1374
1375 /* Implement TARGET_RETURN_IN_MEMORY.
1376
1377    If the type T of the result of a function is such that
1378      void func (T arg)
1379    would require that arg be passed as a value in a register (or set of
1380    registers) according to the parameter passing rules, then the result
1381    is returned in the same registers as would be used for such an
1382    argument.  */
1383
1384 static bool
1385 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1386 {
1387   HOST_WIDE_INT size;
1388   enum machine_mode ag_mode;
1389   int count;
1390
1391   if (!AGGREGATE_TYPE_P (type)
1392       && TREE_CODE (type) != COMPLEX_TYPE
1393       && TREE_CODE (type) != VECTOR_TYPE)
1394     /* Simple scalar types always returned in registers.  */
1395     return false;
1396
1397   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1398                                                type,
1399                                                &ag_mode,
1400                                                &count,
1401                                                NULL))
1402     return false;
1403
1404   /* Types larger than 2 registers returned in memory.  */
1405   size = int_size_in_bytes (type);
1406   return (size < 0 || size > 2 * UNITS_PER_WORD);
1407 }
1408
1409 static bool
1410 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1411                                const_tree type, int *nregs)
1412 {
1413   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1414   return aarch64_vfp_is_call_or_return_candidate (mode,
1415                                                   type,
1416                                                   &pcum->aapcs_vfp_rmode,
1417                                                   nregs,
1418                                                   NULL);
1419 }
1420
1421 /* Given MODE and TYPE of a function argument, return the alignment in
1422    bits.  The idea is to suppress any stronger alignment requested by
1423    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1424    This is a helper function for local use only.  */
1425
1426 static unsigned int
1427 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1428 {
1429   unsigned int alignment;
1430
1431   if (type)
1432     {
1433       if (!integer_zerop (TYPE_SIZE (type)))
1434         {
1435           if (TYPE_MODE (type) == mode)
1436             alignment = TYPE_ALIGN (type);
1437           else
1438             alignment = GET_MODE_ALIGNMENT (mode);
1439         }
1440       else
1441         alignment = 0;
1442     }
1443   else
1444     alignment = GET_MODE_ALIGNMENT (mode);
1445
1446   return alignment;
1447 }
1448
1449 /* Layout a function argument according to the AAPCS64 rules.  The rule
1450    numbers refer to the rule numbers in the AAPCS64.  */
1451
1452 static void
1453 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1454                     const_tree type,
1455                     bool named ATTRIBUTE_UNUSED)
1456 {
1457   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1458   int ncrn, nvrn, nregs;
1459   bool allocate_ncrn, allocate_nvrn;
1460   HOST_WIDE_INT size;
1461
1462   /* We need to do this once per argument.  */
1463   if (pcum->aapcs_arg_processed)
1464     return;
1465
1466   pcum->aapcs_arg_processed = true;
1467
1468   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1469   size
1470     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1471                         UNITS_PER_WORD);
1472
1473   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1474   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1475                                                  mode,
1476                                                  type,
1477                                                  &nregs);
1478
1479   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1480      The following code thus handles passing by SIMD/FP registers first.  */
1481
1482   nvrn = pcum->aapcs_nvrn;
1483
1484   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1485      and homogenous short-vector aggregates (HVA).  */
1486   if (allocate_nvrn)
1487     {
1488       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1489         {
1490           pcum->aapcs_nextnvrn = nvrn + nregs;
1491           if (!aarch64_composite_type_p (type, mode))
1492             {
1493               gcc_assert (nregs == 1);
1494               pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1495             }
1496           else
1497             {
1498               rtx par;
1499               int i;
1500               par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1501               for (i = 0; i < nregs; i++)
1502                 {
1503                   rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1504                                          V0_REGNUM + nvrn + i);
1505                   tmp = gen_rtx_EXPR_LIST
1506                     (VOIDmode, tmp,
1507                      GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1508                   XVECEXP (par, 0, i) = tmp;
1509                 }
1510               pcum->aapcs_reg = par;
1511             }
1512           return;
1513         }
1514       else
1515         {
1516           /* C.3 NSRN is set to 8.  */
1517           pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1518           goto on_stack;
1519         }
1520     }
1521
1522   ncrn = pcum->aapcs_ncrn;
1523   nregs = size / UNITS_PER_WORD;
1524
1525   /* C6 - C9.  though the sign and zero extension semantics are
1526      handled elsewhere.  This is the case where the argument fits
1527      entirely general registers.  */
1528   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1529     {
1530       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1531
1532       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1533
1534       /* C.8 if the argument has an alignment of 16 then the NGRN is
1535          rounded up to the next even number.  */
1536       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1537         {
1538           ++ncrn;
1539           gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1540         }
1541       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1542          A reg is still generated for it, but the caller should be smart
1543          enough not to use it.  */
1544       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1545         {
1546           pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1547         }
1548       else
1549         {
1550           rtx par;
1551           int i;
1552
1553           par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1554           for (i = 0; i < nregs; i++)
1555             {
1556               rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1557               tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1558                                        GEN_INT (i * UNITS_PER_WORD));
1559               XVECEXP (par, 0, i) = tmp;
1560             }
1561           pcum->aapcs_reg = par;
1562         }
1563
1564       pcum->aapcs_nextncrn = ncrn + nregs;
1565       return;
1566     }
1567
1568   /* C.11  */
1569   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1570
1571   /* The argument is passed on stack; record the needed number of words for
1572      this argument and align the total size if necessary.  */
1573 on_stack:
1574   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1575   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1576     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1577                                                16 / UNITS_PER_WORD);
1578   return;
1579 }
1580
1581 /* Implement TARGET_FUNCTION_ARG.  */
1582
1583 static rtx
1584 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1585                       const_tree type, bool named)
1586 {
1587   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1588   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1589
1590   if (mode == VOIDmode)
1591     return NULL_RTX;
1592
1593   aarch64_layout_arg (pcum_v, mode, type, named);
1594   return pcum->aapcs_reg;
1595 }
1596
1597 void
1598 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1599                            const_tree fntype ATTRIBUTE_UNUSED,
1600                            rtx libname ATTRIBUTE_UNUSED,
1601                            const_tree fndecl ATTRIBUTE_UNUSED,
1602                            unsigned n_named ATTRIBUTE_UNUSED)
1603 {
1604   pcum->aapcs_ncrn = 0;
1605   pcum->aapcs_nvrn = 0;
1606   pcum->aapcs_nextncrn = 0;
1607   pcum->aapcs_nextnvrn = 0;
1608   pcum->pcs_variant = ARM_PCS_AAPCS64;
1609   pcum->aapcs_reg = NULL_RTX;
1610   pcum->aapcs_arg_processed = false;
1611   pcum->aapcs_stack_words = 0;
1612   pcum->aapcs_stack_size = 0;
1613
1614   return;
1615 }
1616
1617 static void
1618 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1619                               enum machine_mode mode,
1620                               const_tree type,
1621                               bool named)
1622 {
1623   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1624   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1625     {
1626       aarch64_layout_arg (pcum_v, mode, type, named);
1627       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1628                   != (pcum->aapcs_stack_words != 0));
1629       pcum->aapcs_arg_processed = false;
1630       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1631       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1632       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1633       pcum->aapcs_stack_words = 0;
1634       pcum->aapcs_reg = NULL_RTX;
1635     }
1636 }
1637
1638 bool
1639 aarch64_function_arg_regno_p (unsigned regno)
1640 {
1641   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1642           || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1643 }
1644
1645 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1646    PARM_BOUNDARY bits of alignment, but will be given anything up
1647    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1648    that both before and after the layout of each argument, the Next
1649    Stacked Argument Address (NSAA) will have a minimum alignment of
1650    8 bytes.  */
1651
1652 static unsigned int
1653 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1654 {
1655   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1656
1657   if (alignment < PARM_BOUNDARY)
1658     alignment = PARM_BOUNDARY;
1659   if (alignment > STACK_BOUNDARY)
1660     alignment = STACK_BOUNDARY;
1661   return alignment;
1662 }
1663
1664 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1665
1666    Return true if an argument passed on the stack should be padded upwards,
1667    i.e. if the least-significant byte of the stack slot has useful data.
1668
1669    Small aggregate types are placed in the lowest memory address.
1670
1671    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1672
1673 bool
1674 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1675 {
1676   /* On little-endian targets, the least significant byte of every stack
1677      argument is passed at the lowest byte address of the stack slot.  */
1678   if (!BYTES_BIG_ENDIAN)
1679     return true;
1680
1681   /* Otherwise, integral, floating-point and pointer types are padded downward:
1682      the least significant byte of a stack argument is passed at the highest
1683      byte address of the stack slot.  */
1684   if (type
1685       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1686          || POINTER_TYPE_P (type))
1687       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1688     return false;
1689
1690   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1691   return true;
1692 }
1693
1694 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1695
1696    It specifies padding for the last (may also be the only)
1697    element of a block move between registers and memory.  If
1698    assuming the block is in the memory, padding upward means that
1699    the last element is padded after its highest significant byte,
1700    while in downward padding, the last element is padded at the
1701    its least significant byte side.
1702
1703    Small aggregates and small complex types are always padded
1704    upwards.
1705
1706    We don't need to worry about homogeneous floating-point or
1707    short-vector aggregates; their move is not affected by the
1708    padding direction determined here.  Regardless of endianness,
1709    each element of such an aggregate is put in the least
1710    significant bits of a fp/simd register.
1711
1712    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1713    register has useful data, and return the opposite if the most
1714    significant byte does.  */
1715
1716 bool
1717 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1718                      bool first ATTRIBUTE_UNUSED)
1719 {
1720
1721   /* Small composite types are always padded upward.  */
1722   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1723     {
1724       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1725                             : GET_MODE_SIZE (mode));
1726       if (size < 2 * UNITS_PER_WORD)
1727         return true;
1728     }
1729
1730   /* Otherwise, use the default padding.  */
1731   return !BYTES_BIG_ENDIAN;
1732 }
1733
1734 static enum machine_mode
1735 aarch64_libgcc_cmp_return_mode (void)
1736 {
1737   return SImode;
1738 }
1739
1740 static bool
1741 aarch64_frame_pointer_required (void)
1742 {
1743   /* If the function contains dynamic stack allocations, we need to
1744      use the frame pointer to access the static parts of the frame.  */
1745   if (cfun->calls_alloca)
1746     return true;
1747
1748   /* In aarch64_override_options_after_change
1749      flag_omit_leaf_frame_pointer turns off the frame pointer by
1750      default.  Turn it back on now if we've not got a leaf
1751      function.  */
1752   if (flag_omit_leaf_frame_pointer
1753       && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1754     return true;
1755
1756   return false;
1757 }
1758
1759 /* Mark the registers that need to be saved by the callee and calculate
1760    the size of the callee-saved registers area and frame record (both FP
1761    and LR may be omitted).  */
1762 static void
1763 aarch64_layout_frame (void)
1764 {
1765   HOST_WIDE_INT offset = 0;
1766   int regno;
1767
1768   if (reload_completed && cfun->machine->frame.laid_out)
1769     return;
1770
1771   cfun->machine->frame.fp_lr_offset = 0;
1772
1773   /* First mark all the registers that really need to be saved...  */
1774   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1775     cfun->machine->frame.reg_offset[regno] = -1;
1776
1777   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1778     cfun->machine->frame.reg_offset[regno] = -1;
1779
1780   /* ... that includes the eh data registers (if needed)...  */
1781   if (crtl->calls_eh_return)
1782     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1783       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1784
1785   /* ... and any callee saved register that dataflow says is live.  */
1786   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1787     if (df_regs_ever_live_p (regno)
1788         && !call_used_regs[regno])
1789       cfun->machine->frame.reg_offset[regno] = 0;
1790
1791   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1792     if (df_regs_ever_live_p (regno)
1793         && !call_used_regs[regno])
1794       cfun->machine->frame.reg_offset[regno] = 0;
1795
1796   if (frame_pointer_needed)
1797     {
1798       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1799       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1800       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1801     }
1802
1803   /* Now assign stack slots for them.  */
1804   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1805     if (cfun->machine->frame.reg_offset[regno] != -1)
1806       {
1807         cfun->machine->frame.reg_offset[regno] = offset;
1808         offset += UNITS_PER_WORD;
1809       }
1810
1811   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1812     if (cfun->machine->frame.reg_offset[regno] != -1)
1813       {
1814         cfun->machine->frame.reg_offset[regno] = offset;
1815         offset += UNITS_PER_WORD;
1816       }
1817
1818   if (frame_pointer_needed)
1819     {
1820       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1821       offset += UNITS_PER_WORD;
1822       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1823     }
1824
1825   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1826     {
1827       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1828       offset += UNITS_PER_WORD;
1829       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1830     }
1831
1832   cfun->machine->frame.padding0 =
1833     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1834   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1835
1836   cfun->machine->frame.saved_regs_size = offset;
1837   cfun->machine->frame.laid_out = true;
1838 }
1839
1840 /* Make the last instruction frame-related and note that it performs
1841    the operation described by FRAME_PATTERN.  */
1842
1843 static void
1844 aarch64_set_frame_expr (rtx frame_pattern)
1845 {
1846   rtx insn;
1847
1848   insn = get_last_insn ();
1849   RTX_FRAME_RELATED_P (insn) = 1;
1850   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1851   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1852                                       frame_pattern,
1853                                       REG_NOTES (insn));
1854 }
1855
1856 static bool
1857 aarch64_register_saved_on_entry (int regno)
1858 {
1859   return cfun->machine->frame.reg_offset[regno] != -1;
1860 }
1861
1862
1863 static void
1864 aarch64_save_or_restore_fprs (int start_offset, int increment,
1865                               bool restore, rtx base_rtx)
1866
1867 {
1868   unsigned regno;
1869   unsigned regno2;
1870   rtx insn;
1871   rtx (*gen_mem_ref)(enum machine_mode, rtx)
1872     = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1873
1874
1875   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1876     {
1877       if (aarch64_register_saved_on_entry (regno))
1878         {
1879           rtx mem;
1880           mem = gen_mem_ref (DFmode,
1881                              plus_constant (Pmode,
1882                                             base_rtx,
1883                                             start_offset));
1884
1885           for (regno2 = regno + 1;
1886                regno2 <= V31_REGNUM
1887                  && !aarch64_register_saved_on_entry (regno2);
1888                regno2++)
1889             {
1890               /* Empty loop.  */
1891             }
1892           if (regno2 <= V31_REGNUM &&
1893               aarch64_register_saved_on_entry (regno2))
1894             {
1895               rtx mem2;
1896               /* Next highest register to be saved.  */
1897               mem2 = gen_mem_ref (DFmode,
1898                                   plus_constant
1899                                   (Pmode,
1900                                    base_rtx,
1901                                    start_offset + increment));
1902               if (restore == false)
1903                 {
1904                   insn = emit_insn
1905                     ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1906                                         mem2, gen_rtx_REG (DFmode, regno2)));
1907
1908                 }
1909               else
1910                 {
1911                   insn = emit_insn
1912                     ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1913                                        gen_rtx_REG (DFmode, regno2), mem2));
1914
1915                   add_reg_note (insn, REG_CFA_RESTORE,
1916                                 gen_rtx_REG (DFmode, regno));
1917                   add_reg_note (insn, REG_CFA_RESTORE,
1918                                 gen_rtx_REG (DFmode, regno2));
1919                 }
1920
1921                   /* The first part of a frame-related parallel insn
1922                      is always assumed to be relevant to the frame
1923                      calculations; subsequent parts, are only
1924                      frame-related if explicitly marked.  */
1925               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1926               regno = regno2;
1927               start_offset += increment * 2;
1928             }
1929           else
1930             {
1931               if (restore == false)
1932                 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1933               else
1934                 {
1935                   insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1936                   add_reg_note (insn, REG_CFA_RESTORE,
1937                                 gen_rtx_REG (DImode, regno));
1938                 }
1939               start_offset += increment;
1940             }
1941           RTX_FRAME_RELATED_P (insn) = 1;
1942         }
1943     }
1944
1945 }
1946
1947
1948 /* offset from the stack pointer of where the saves and
1949    restore's have to happen.  */
1950 static void
1951 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1952                                             bool restore)
1953 {
1954   rtx insn;
1955   rtx base_rtx = stack_pointer_rtx;
1956   HOST_WIDE_INT start_offset = offset;
1957   HOST_WIDE_INT increment = UNITS_PER_WORD;
1958   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1959   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1960   unsigned regno;
1961   unsigned regno2;
1962
1963   for (regno = R0_REGNUM; regno <= limit; regno++)
1964     {
1965       if (aarch64_register_saved_on_entry (regno))
1966         {
1967           rtx mem;
1968           mem = gen_mem_ref (Pmode,
1969                              plus_constant (Pmode,
1970                                             base_rtx,
1971                                             start_offset));
1972
1973           for (regno2 = regno + 1;
1974                regno2 <= limit
1975                  && !aarch64_register_saved_on_entry (regno2);
1976                regno2++)
1977             {
1978               /* Empty loop.  */
1979             }
1980           if (regno2 <= limit &&
1981               aarch64_register_saved_on_entry (regno2))
1982             {
1983               rtx mem2;
1984               /* Next highest register to be saved.  */
1985               mem2 = gen_mem_ref (Pmode,
1986                                   plus_constant
1987                                   (Pmode,
1988                                    base_rtx,
1989                                    start_offset + increment));
1990               if (restore == false)
1991                 {
1992                   insn = emit_insn
1993                     ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1994                                         mem2, gen_rtx_REG (DImode, regno2)));
1995
1996                 }
1997               else
1998                 {
1999                   insn = emit_insn
2000                     ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2001                                      gen_rtx_REG (DImode, regno2), mem2));
2002
2003                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2004                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2005                 }
2006
2007                   /* The first part of a frame-related parallel insn
2008                      is always assumed to be relevant to the frame
2009                      calculations; subsequent parts, are only
2010                      frame-related if explicitly marked.  */
2011               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2012                                             1)) = 1;
2013               regno = regno2;
2014               start_offset += increment * 2;
2015             }
2016           else
2017             {
2018               if (restore == false)
2019                 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2020               else
2021                 {
2022                   insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2023                   add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2024                 }
2025               start_offset += increment;
2026             }
2027           RTX_FRAME_RELATED_P (insn) = 1;
2028         }
2029     }
2030
2031   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2032
2033 }
2034
2035 /* AArch64 stack frames generated by this compiler look like:
2036
2037         +-------------------------------+
2038         |                               |
2039         |  incoming stack arguments     |
2040         |                               |
2041         +-------------------------------+ <-- arg_pointer_rtx
2042         |                               |
2043         |  callee-allocated save area   |
2044         |  for register varargs         |
2045         |                               |
2046         +-------------------------------+ <-- frame_pointer_rtx
2047         |                               |
2048         |  local variables              |
2049         |                               |
2050         +-------------------------------+
2051         |  padding0                     | \
2052         +-------------------------------+  |
2053         |                               |  |
2054         |                               |  |
2055         |  callee-saved registers       |  | frame.saved_regs_size
2056         |                               |  |
2057         +-------------------------------+  |
2058         |  LR'                          |  |
2059         +-------------------------------+  |
2060         |  FP'                          | /
2061       P +-------------------------------+ <-- hard_frame_pointer_rtx
2062         |  dynamic allocation           |
2063         +-------------------------------+
2064         |                               |
2065         |  outgoing stack arguments     |
2066         |                               |
2067         +-------------------------------+ <-- stack_pointer_rtx
2068
2069    Dynamic stack allocations such as alloca insert data at point P.
2070    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2071    hard_frame_pointer_rtx unchanged.  */
2072
2073 /* Generate the prologue instructions for entry into a function.
2074    Establish the stack frame by decreasing the stack pointer with a
2075    properly calculated size and, if necessary, create a frame record
2076    filled with the values of LR and previous frame pointer.  The
2077    current FP is also set up if it is in use.  */
2078
2079 void
2080 aarch64_expand_prologue (void)
2081 {
2082   /* sub sp, sp, #<frame_size>
2083      stp {fp, lr}, [sp, #<frame_size> - 16]
2084      add fp, sp, #<frame_size> - hardfp_offset
2085      stp {cs_reg}, [fp, #-16] etc.
2086
2087      sub sp, sp, <final_adjustment_if_any>
2088   */
2089   HOST_WIDE_INT original_frame_size;    /* local variables + vararg save */
2090   HOST_WIDE_INT frame_size, offset;
2091   HOST_WIDE_INT fp_offset;              /* FP offset from SP */
2092   rtx insn;
2093
2094   aarch64_layout_frame ();
2095   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2096   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2097               && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2098   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2099                 + crtl->outgoing_args_size);
2100   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2101                                           STACK_BOUNDARY / BITS_PER_UNIT);
2102
2103   if (flag_stack_usage_info)
2104     current_function_static_stack_size = frame_size;
2105
2106   fp_offset = (offset
2107                - original_frame_size
2108                - cfun->machine->frame.saved_regs_size);
2109
2110   /* Store pairs and load pairs have a range only -512 to 504.  */
2111   if (offset >= 512)
2112     {
2113       /* When the frame has a large size, an initial decrease is done on
2114          the stack pointer to jump over the callee-allocated save area for
2115          register varargs, the local variable area and/or the callee-saved
2116          register area.  This will allow the pre-index write-back
2117          store pair instructions to be used for setting up the stack frame
2118          efficiently.  */
2119       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2120       if (offset >= 512)
2121         offset = cfun->machine->frame.saved_regs_size;
2122
2123       frame_size -= (offset + crtl->outgoing_args_size);
2124       fp_offset = 0;
2125
2126       if (frame_size >= 0x1000000)
2127         {
2128           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2129           emit_move_insn (op0, GEN_INT (-frame_size));
2130           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2131           aarch64_set_frame_expr (gen_rtx_SET
2132                                   (Pmode, stack_pointer_rtx,
2133                                    plus_constant (Pmode,
2134                                                   stack_pointer_rtx,
2135                                                   -frame_size)));
2136         }
2137       else if (frame_size > 0)
2138         {
2139           if ((frame_size & 0xfff) != frame_size)
2140             {
2141               insn = emit_insn (gen_add2_insn
2142                                 (stack_pointer_rtx,
2143                                  GEN_INT (-(frame_size
2144                                             & ~(HOST_WIDE_INT)0xfff))));
2145               RTX_FRAME_RELATED_P (insn) = 1;
2146             }
2147           if ((frame_size & 0xfff) != 0)
2148             {
2149               insn = emit_insn (gen_add2_insn
2150                                 (stack_pointer_rtx,
2151                                  GEN_INT (-(frame_size
2152                                             & (HOST_WIDE_INT)0xfff))));
2153               RTX_FRAME_RELATED_P (insn) = 1;
2154             }
2155         }
2156     }
2157   else
2158     frame_size = -1;
2159
2160   if (offset > 0)
2161     {
2162       /* Save the frame pointer and lr if the frame pointer is needed
2163          first.  Make the frame pointer point to the location of the
2164          old frame pointer on the stack.  */
2165       if (frame_pointer_needed)
2166         {
2167           rtx mem_fp, mem_lr;
2168
2169           if (fp_offset)
2170             {
2171               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2172                                                GEN_INT (-offset)));
2173               RTX_FRAME_RELATED_P (insn) = 1;
2174               aarch64_set_frame_expr (gen_rtx_SET
2175                                       (Pmode, stack_pointer_rtx,
2176                                        gen_rtx_MINUS (Pmode,
2177                                                       stack_pointer_rtx,
2178                                                       GEN_INT (offset))));
2179               mem_fp = gen_frame_mem (DImode,
2180                                       plus_constant (Pmode,
2181                                                      stack_pointer_rtx,
2182                                                      fp_offset));
2183               mem_lr = gen_frame_mem (DImode,
2184                                       plus_constant (Pmode,
2185                                                      stack_pointer_rtx,
2186                                                      fp_offset
2187                                                      + UNITS_PER_WORD));
2188               insn = emit_insn (gen_store_pairdi (mem_fp,
2189                                                   hard_frame_pointer_rtx,
2190                                                   mem_lr,
2191                                                   gen_rtx_REG (DImode,
2192                                                                LR_REGNUM)));
2193             }
2194           else
2195             {
2196               insn = emit_insn (gen_storewb_pairdi_di
2197                                 (stack_pointer_rtx, stack_pointer_rtx,
2198                                  hard_frame_pointer_rtx,
2199                                  gen_rtx_REG (DImode, LR_REGNUM),
2200                                  GEN_INT (-offset),
2201                                  GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2202               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2203             }
2204
2205           /* The first part of a frame-related parallel insn is always
2206              assumed to be relevant to the frame calculations;
2207              subsequent parts, are only frame-related if explicitly
2208              marked.  */
2209           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2210           RTX_FRAME_RELATED_P (insn) = 1;
2211
2212           /* Set up frame pointer to point to the location of the
2213              previous frame pointer on the stack.  */
2214           insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2215                                            stack_pointer_rtx,
2216                                            GEN_INT (fp_offset)));
2217           aarch64_set_frame_expr (gen_rtx_SET
2218                                   (Pmode, hard_frame_pointer_rtx,
2219                                    plus_constant (Pmode,
2220                                                   stack_pointer_rtx,
2221                                                   fp_offset)));
2222           RTX_FRAME_RELATED_P (insn) = 1;
2223           insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2224                                            hard_frame_pointer_rtx));
2225         }
2226       else
2227         {
2228           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2229                                            GEN_INT (-offset)));
2230           RTX_FRAME_RELATED_P (insn) = 1;
2231         }
2232
2233       aarch64_save_or_restore_callee_save_registers
2234         (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2235     }
2236
2237   /* when offset >= 512,
2238      sub sp, sp, #<outgoing_args_size> */
2239   if (frame_size > -1)
2240     {
2241       if (crtl->outgoing_args_size > 0)
2242         {
2243           insn = emit_insn (gen_add2_insn
2244                             (stack_pointer_rtx,
2245                              GEN_INT (- crtl->outgoing_args_size)));
2246           RTX_FRAME_RELATED_P (insn) = 1;
2247         }
2248     }
2249 }
2250
2251 /* Generate the epilogue instructions for returning from a function.  */
2252 void
2253 aarch64_expand_epilogue (bool for_sibcall)
2254 {
2255   HOST_WIDE_INT original_frame_size, frame_size, offset;
2256   HOST_WIDE_INT fp_offset;
2257   rtx insn;
2258   rtx cfa_reg;
2259
2260   aarch64_layout_frame ();
2261   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2262   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2263                 + crtl->outgoing_args_size);
2264   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2265                                           STACK_BOUNDARY / BITS_PER_UNIT);
2266
2267   fp_offset = (offset
2268                - original_frame_size
2269                - cfun->machine->frame.saved_regs_size);
2270
2271   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2272
2273   /* Store pairs and load pairs have a range only -512 to 504.  */
2274   if (offset >= 512)
2275     {
2276       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2277       if (offset >= 512)
2278         offset = cfun->machine->frame.saved_regs_size;
2279
2280       frame_size -= (offset + crtl->outgoing_args_size);
2281       fp_offset = 0;
2282       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2283         {
2284           insn = emit_insn (gen_add2_insn
2285                             (stack_pointer_rtx,
2286                              GEN_INT (crtl->outgoing_args_size)));
2287           RTX_FRAME_RELATED_P (insn) = 1;
2288         }
2289     }
2290   else
2291     frame_size = -1;
2292
2293   /* If there were outgoing arguments or we've done dynamic stack
2294      allocation, then restore the stack pointer from the frame
2295      pointer.  This is at most one insn and more efficient than using
2296      GCC's internal mechanism.  */
2297   if (frame_pointer_needed
2298       && (crtl->outgoing_args_size || cfun->calls_alloca))
2299     {
2300       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2301                                        hard_frame_pointer_rtx,
2302                                        GEN_INT (- fp_offset)));
2303       RTX_FRAME_RELATED_P (insn) = 1;
2304       /* As SP is set to (FP - fp_offset), according to the rules in
2305          dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2306          from the value of SP from now on.  */
2307       cfa_reg = stack_pointer_rtx;
2308     }
2309
2310   aarch64_save_or_restore_callee_save_registers
2311     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2312
2313   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2314   if (offset > 0)
2315     {
2316       if (frame_pointer_needed)
2317         {
2318           rtx mem_fp, mem_lr;
2319
2320           if (fp_offset)
2321             {
2322               mem_fp = gen_frame_mem (DImode,
2323                                       plus_constant (Pmode,
2324                                                      stack_pointer_rtx,
2325                                                      fp_offset));
2326               mem_lr = gen_frame_mem (DImode,
2327                                       plus_constant (Pmode,
2328                                                      stack_pointer_rtx,
2329                                                      fp_offset
2330                                                      + UNITS_PER_WORD));
2331               insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2332                                                  mem_fp,
2333                                                  gen_rtx_REG (DImode,
2334                                                               LR_REGNUM),
2335                                                  mem_lr));
2336             }
2337           else
2338             {
2339               insn = emit_insn (gen_loadwb_pairdi_di
2340                                 (stack_pointer_rtx,
2341                                  stack_pointer_rtx,
2342                                  hard_frame_pointer_rtx,
2343                                  gen_rtx_REG (DImode, LR_REGNUM),
2344                                  GEN_INT (offset),
2345                                  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2346               RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2347               add_reg_note (insn, REG_CFA_ADJUST_CFA,
2348                             (gen_rtx_SET (Pmode, stack_pointer_rtx,
2349                                           plus_constant (Pmode, cfa_reg,
2350                                                          offset))));
2351             }
2352
2353           /* The first part of a frame-related parallel insn
2354              is always assumed to be relevant to the frame
2355              calculations; subsequent parts, are only
2356              frame-related if explicitly marked.  */
2357           RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2358           RTX_FRAME_RELATED_P (insn) = 1;
2359           add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2360           add_reg_note (insn, REG_CFA_RESTORE,
2361                         gen_rtx_REG (DImode, LR_REGNUM));
2362
2363           if (fp_offset)
2364             {
2365               insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2366                                                GEN_INT (offset)));
2367               RTX_FRAME_RELATED_P (insn) = 1;
2368             }
2369         }
2370       else
2371         {
2372           insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2373                                            GEN_INT (offset)));
2374           RTX_FRAME_RELATED_P (insn) = 1;
2375         }
2376     }
2377
2378   /* Stack adjustment for exception handler.  */
2379   if (crtl->calls_eh_return)
2380     {
2381       /* We need to unwind the stack by the offset computed by
2382          EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2383          based on SP.  Ideally we would update the SP and define the
2384          CFA along the lines of:
2385
2386          SP = SP + EH_RETURN_STACKADJ_RTX
2387          (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2388
2389          However the dwarf emitter only understands a constant
2390          register offset.
2391
2392          The solution chosen here is to use the otherwise unused IP0
2393          as a temporary register to hold the current SP value.  The
2394          CFA is described using IP0 then SP is modified.  */
2395
2396       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2397
2398       insn = emit_move_insn (ip0, stack_pointer_rtx);
2399       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2400       RTX_FRAME_RELATED_P (insn) = 1;
2401
2402       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2403
2404       /* Ensure the assignment to IP0 does not get optimized away.  */
2405       emit_use (ip0);
2406     }
2407
2408   if (frame_size > -1)
2409     {
2410       if (frame_size >= 0x1000000)
2411         {
2412           rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2413           emit_move_insn (op0, GEN_INT (frame_size));
2414           emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2415           aarch64_set_frame_expr (gen_rtx_SET
2416                                   (Pmode, stack_pointer_rtx,
2417                                    plus_constant (Pmode,
2418                                                   stack_pointer_rtx,
2419                                                   frame_size)));
2420         }
2421       else if (frame_size > 0)
2422         {
2423           if ((frame_size & 0xfff) != 0)
2424             {
2425               insn = emit_insn (gen_add2_insn
2426                                 (stack_pointer_rtx,
2427                                  GEN_INT ((frame_size
2428                                            & (HOST_WIDE_INT) 0xfff))));
2429               RTX_FRAME_RELATED_P (insn) = 1;
2430             }
2431           if ((frame_size & 0xfff) != frame_size)
2432             {
2433               insn = emit_insn (gen_add2_insn
2434                                 (stack_pointer_rtx,
2435                                  GEN_INT ((frame_size
2436                                            & ~ (HOST_WIDE_INT) 0xfff))));
2437               RTX_FRAME_RELATED_P (insn) = 1;
2438             }
2439         }
2440
2441         aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2442                                              plus_constant (Pmode,
2443                                                             stack_pointer_rtx,
2444                                                             offset)));
2445     }
2446
2447   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2448   if (!for_sibcall)
2449     emit_jump_insn (ret_rtx);
2450 }
2451
2452 /* Return the place to copy the exception unwinding return address to.
2453    This will probably be a stack slot, but could (in theory be the
2454    return register).  */
2455 rtx
2456 aarch64_final_eh_return_addr (void)
2457 {
2458   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2459   aarch64_layout_frame ();
2460   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2461   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2462                 + crtl->outgoing_args_size);
2463   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2464                                           STACK_BOUNDARY / BITS_PER_UNIT);
2465   fp_offset = offset
2466     - original_frame_size
2467     - cfun->machine->frame.saved_regs_size;
2468
2469   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2470     return gen_rtx_REG (DImode, LR_REGNUM);
2471
2472   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2473      result in a store to save LR introduced by builtin_eh_return () being
2474      incorrectly deleted because the alias is not detected.
2475      So in the calculation of the address to copy the exception unwinding
2476      return address to, we note 2 cases.
2477      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2478      we return a SP-relative location since all the addresses are SP-relative
2479      in this case.  This prevents the store from being optimized away.
2480      If the fp_offset is not 0, then the addresses will be FP-relative and
2481      therefore we return a FP-relative location.  */
2482
2483   if (frame_pointer_needed)
2484     {
2485       if (fp_offset)
2486         return gen_frame_mem (DImode,
2487                               plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2488       else
2489         return gen_frame_mem (DImode,
2490                               plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2491     }
2492
2493   /* If FP is not needed, we calculate the location of LR, which would be
2494      at the top of the saved registers block.  */
2495
2496   return gen_frame_mem (DImode,
2497                         plus_constant (Pmode,
2498                                        stack_pointer_rtx,
2499                                        fp_offset
2500                                        + cfun->machine->frame.saved_regs_size
2501                                        - 2 * UNITS_PER_WORD));
2502 }
2503
2504 /* Possibly output code to build up a constant in a register.  For
2505    the benefit of the costs infrastructure, returns the number of
2506    instructions which would be emitted.  GENERATE inhibits or
2507    enables code generation.  */
2508
2509 static int
2510 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2511 {
2512   int insns = 0;
2513
2514   if (aarch64_bitmask_imm (val, DImode))
2515     {
2516       if (generate)
2517         emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2518       insns = 1;
2519     }
2520   else
2521     {
2522       int i;
2523       int ncount = 0;
2524       int zcount = 0;
2525       HOST_WIDE_INT valp = val >> 16;
2526       HOST_WIDE_INT valm;
2527       HOST_WIDE_INT tval;
2528
2529       for (i = 16; i < 64; i += 16)
2530         {
2531           valm = (valp & 0xffff);
2532
2533           if (valm != 0)
2534             ++ zcount;
2535
2536           if (valm != 0xffff)
2537             ++ ncount;
2538
2539           valp >>= 16;
2540         }
2541
2542       /* zcount contains the number of additional MOVK instructions
2543          required if the constant is built up with an initial MOVZ instruction,
2544          while ncount is the number of MOVK instructions required if starting
2545          with a MOVN instruction.  Choose the sequence that yields the fewest
2546          number of instructions, preferring MOVZ instructions when they are both
2547          the same.  */
2548       if (ncount < zcount)
2549         {
2550           if (generate)
2551             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2552                             GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2553           tval = 0xffff;
2554           insns++;
2555         }
2556       else
2557         {
2558           if (generate)
2559             emit_move_insn (gen_rtx_REG (Pmode, regnum),
2560                             GEN_INT (val & 0xffff));
2561           tval = 0;
2562           insns++;
2563         }
2564
2565       val >>= 16;
2566
2567       for (i = 16; i < 64; i += 16)
2568         {
2569           if ((val & 0xffff) != tval)
2570             {
2571               if (generate)
2572                 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2573                                            GEN_INT (i),
2574                                            GEN_INT (val & 0xffff)));
2575               insns++;
2576             }
2577           val >>= 16;
2578         }
2579     }
2580   return insns;
2581 }
2582
2583 static void
2584 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2585 {
2586   HOST_WIDE_INT mdelta = delta;
2587   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2588   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2589
2590   if (mdelta < 0)
2591     mdelta = -mdelta;
2592
2593   if (mdelta >= 4096 * 4096)
2594     {
2595       (void) aarch64_build_constant (scratchreg, delta, true);
2596       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2597     }
2598   else if (mdelta > 0)
2599     {
2600       if (mdelta >= 4096)
2601         {
2602           emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2603           rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2604           if (delta < 0)
2605             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2606                                     gen_rtx_MINUS (Pmode, this_rtx, shift)));
2607           else
2608             emit_insn (gen_rtx_SET (Pmode, this_rtx,
2609                                     gen_rtx_PLUS (Pmode, this_rtx, shift)));
2610         }
2611       if (mdelta % 4096 != 0)
2612         {
2613           scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2614           emit_insn (gen_rtx_SET (Pmode, this_rtx,
2615                                   gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2616         }
2617     }
2618 }
2619
2620 /* Output code to add DELTA to the first argument, and then jump
2621    to FUNCTION.  Used for C++ multiple inheritance.  */
2622 static void
2623 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2624                          HOST_WIDE_INT delta,
2625                          HOST_WIDE_INT vcall_offset,
2626                          tree function)
2627 {
2628   /* The this pointer is always in x0.  Note that this differs from
2629      Arm where the this pointer maybe bumped to r1 if r0 is required
2630      to return a pointer to an aggregate.  On AArch64 a result value
2631      pointer will be in x8.  */
2632   int this_regno = R0_REGNUM;
2633   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2634
2635   reload_completed = 1;
2636   emit_note (NOTE_INSN_PROLOGUE_END);
2637
2638   if (vcall_offset == 0)
2639     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2640   else
2641     {
2642       gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2643
2644       this_rtx = gen_rtx_REG (Pmode, this_regno);
2645       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2646       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2647
2648       addr = this_rtx;
2649       if (delta != 0)
2650         {
2651           if (delta >= -256 && delta < 256)
2652             addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2653                                        plus_constant (Pmode, this_rtx, delta));
2654           else
2655             aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2656         }
2657
2658       if (Pmode == ptr_mode)
2659         aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2660       else
2661         aarch64_emit_move (temp0,
2662                            gen_rtx_ZERO_EXTEND (Pmode,
2663                                                 gen_rtx_MEM (ptr_mode, addr)));
2664
2665       if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2666           addr = plus_constant (Pmode, temp0, vcall_offset);
2667       else
2668         {
2669           (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2670           addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2671         }
2672
2673       if (Pmode == ptr_mode)
2674         aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2675       else
2676         aarch64_emit_move (temp1,
2677                            gen_rtx_SIGN_EXTEND (Pmode,
2678                                                 gen_rtx_MEM (ptr_mode, addr)));
2679
2680       emit_insn (gen_add2_insn (this_rtx, temp1));
2681     }
2682
2683   /* Generate a tail call to the target function.  */
2684   if (!TREE_USED (function))
2685     {
2686       assemble_external (function);
2687       TREE_USED (function) = 1;
2688     }
2689   funexp = XEXP (DECL_RTL (function), 0);
2690   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2691   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2692   SIBLING_CALL_P (insn) = 1;
2693
2694   insn = get_insns ();
2695   shorten_branches (insn);
2696   final_start_function (insn, file, 1);
2697   final (insn, file, 1);
2698   final_end_function ();
2699
2700   /* Stop pretending to be a post-reload pass.  */
2701   reload_completed = 0;
2702 }
2703
2704 static int
2705 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2706 {
2707   if (GET_CODE (*x) == SYMBOL_REF)
2708     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2709
2710   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2711      TLS offsets, not real symbol references.  */
2712   if (GET_CODE (*x) == UNSPEC
2713       && XINT (*x, 1) == UNSPEC_TLS)
2714     return -1;
2715
2716   return 0;
2717 }
2718
2719 static bool
2720 aarch64_tls_referenced_p (rtx x)
2721 {
2722   if (!TARGET_HAVE_TLS)
2723     return false;
2724
2725   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2726 }
2727
2728
2729 static int
2730 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2731 {
2732   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2733   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2734
2735   if (*imm1 < *imm2)
2736     return -1;
2737   if (*imm1 > *imm2)
2738     return +1;
2739   return 0;
2740 }
2741
2742
2743 static void
2744 aarch64_build_bitmask_table (void)
2745 {
2746   unsigned HOST_WIDE_INT mask, imm;
2747   unsigned int log_e, e, s, r;
2748   unsigned int nimms = 0;
2749
2750   for (log_e = 1; log_e <= 6; log_e++)
2751     {
2752       e = 1 << log_e;
2753       if (e == 64)
2754         mask = ~(HOST_WIDE_INT) 0;
2755       else
2756         mask = ((HOST_WIDE_INT) 1 << e) - 1;
2757       for (s = 1; s < e; s++)
2758         {
2759           for (r = 0; r < e; r++)
2760             {
2761               /* set s consecutive bits to 1 (s < 64) */
2762               imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2763               /* rotate right by r */
2764               if (r != 0)
2765                 imm = ((imm >> r) | (imm << (e - r))) & mask;
2766               /* replicate the constant depending on SIMD size */
2767               switch (log_e) {
2768               case 1: imm |= (imm <<  2);
2769               case 2: imm |= (imm <<  4);
2770               case 3: imm |= (imm <<  8);
2771               case 4: imm |= (imm << 16);
2772               case 5: imm |= (imm << 32);
2773               case 6:
2774                 break;
2775               default:
2776                 gcc_unreachable ();
2777               }
2778               gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2779               aarch64_bitmasks[nimms++] = imm;
2780             }
2781         }
2782     }
2783
2784   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2785   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2786          aarch64_bitmasks_cmp);
2787 }
2788
2789
2790 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2791    a left shift of 0 or 12 bits.  */
2792 bool
2793 aarch64_uimm12_shift (HOST_WIDE_INT val)
2794 {
2795   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2796           || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2797           );
2798 }
2799
2800
2801 /* Return true if val is an immediate that can be loaded into a
2802    register by a MOVZ instruction.  */
2803 static bool
2804 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2805 {
2806   if (GET_MODE_SIZE (mode) > 4)
2807     {
2808       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2809           || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2810         return 1;
2811     }
2812   else
2813     {
2814       /* Ignore sign extension.  */
2815       val &= (HOST_WIDE_INT) 0xffffffff;
2816     }
2817   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2818           || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2819 }
2820
2821
2822 /* Return true if val is a valid bitmask immediate.  */
2823 bool
2824 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2825 {
2826   if (GET_MODE_SIZE (mode) < 8)
2827     {
2828       /* Replicate bit pattern.  */
2829       val &= (HOST_WIDE_INT) 0xffffffff;
2830       val |= val << 32;
2831     }
2832   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2833                   sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2834 }
2835
2836
2837 /* Return true if val is an immediate that can be loaded into a
2838    register in a single instruction.  */
2839 bool
2840 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2841 {
2842   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2843     return 1;
2844   return aarch64_bitmask_imm (val, mode);
2845 }
2846
2847 static bool
2848 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2849 {
2850   rtx base, offset;
2851
2852   if (GET_CODE (x) == HIGH)
2853     return true;
2854
2855   split_const (x, &base, &offset);
2856   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2857     {
2858       if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2859           != SYMBOL_FORCE_TO_MEM)
2860         return true;
2861       else
2862         /* Avoid generating a 64-bit relocation in ILP32; leave
2863            to aarch64_expand_mov_immediate to handle it properly.  */
2864         return mode != ptr_mode;
2865     }
2866
2867   return aarch64_tls_referenced_p (x);
2868 }
2869
2870 /* Return true if register REGNO is a valid index register.
2871    STRICT_P is true if REG_OK_STRICT is in effect.  */
2872
2873 bool
2874 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2875 {
2876   if (!HARD_REGISTER_NUM_P (regno))
2877     {
2878       if (!strict_p)
2879         return true;
2880
2881       if (!reg_renumber)
2882         return false;
2883
2884       regno = reg_renumber[regno];
2885     }
2886   return GP_REGNUM_P (regno);
2887 }
2888
2889 /* Return true if register REGNO is a valid base register for mode MODE.
2890    STRICT_P is true if REG_OK_STRICT is in effect.  */
2891
2892 bool
2893 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2894 {
2895   if (!HARD_REGISTER_NUM_P (regno))
2896     {
2897       if (!strict_p)
2898         return true;
2899
2900       if (!reg_renumber)
2901         return false;
2902
2903       regno = reg_renumber[regno];
2904     }
2905
2906   /* The fake registers will be eliminated to either the stack or
2907      hard frame pointer, both of which are usually valid base registers.
2908      Reload deals with the cases where the eliminated form isn't valid.  */
2909   return (GP_REGNUM_P (regno)
2910           || regno == SP_REGNUM
2911           || regno == FRAME_POINTER_REGNUM
2912           || regno == ARG_POINTER_REGNUM);
2913 }
2914
2915 /* Return true if X is a valid base register for mode MODE.
2916    STRICT_P is true if REG_OK_STRICT is in effect.  */
2917
2918 static bool
2919 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2920 {
2921   if (!strict_p && GET_CODE (x) == SUBREG)
2922     x = SUBREG_REG (x);
2923
2924   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2925 }
2926
2927 /* Return true if address offset is a valid index.  If it is, fill in INFO
2928    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2929
2930 static bool
2931 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2932                         enum machine_mode mode, bool strict_p)
2933 {
2934   enum aarch64_address_type type;
2935   rtx index;
2936   int shift;
2937
2938   /* (reg:P) */
2939   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2940       && GET_MODE (x) == Pmode)
2941     {
2942       type = ADDRESS_REG_REG;
2943       index = x;
2944       shift = 0;
2945     }
2946   /* (sign_extend:DI (reg:SI)) */
2947   else if ((GET_CODE (x) == SIGN_EXTEND
2948             || GET_CODE (x) == ZERO_EXTEND)
2949            && GET_MODE (x) == DImode
2950            && GET_MODE (XEXP (x, 0)) == SImode)
2951     {
2952       type = (GET_CODE (x) == SIGN_EXTEND)
2953         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2954       index = XEXP (x, 0);
2955       shift = 0;
2956     }
2957   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2958   else if (GET_CODE (x) == MULT
2959            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2960                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2961            && GET_MODE (XEXP (x, 0)) == DImode
2962            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2963            && CONST_INT_P (XEXP (x, 1)))
2964     {
2965       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2966         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2967       index = XEXP (XEXP (x, 0), 0);
2968       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2969     }
2970   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2971   else if (GET_CODE (x) == ASHIFT
2972            && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2973                || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2974            && GET_MODE (XEXP (x, 0)) == DImode
2975            && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2976            && CONST_INT_P (XEXP (x, 1)))
2977     {
2978       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2979         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2980       index = XEXP (XEXP (x, 0), 0);
2981       shift = INTVAL (XEXP (x, 1));
2982     }
2983   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2984   else if ((GET_CODE (x) == SIGN_EXTRACT
2985             || GET_CODE (x) == ZERO_EXTRACT)
2986            && GET_MODE (x) == DImode
2987            && GET_CODE (XEXP (x, 0)) == MULT
2988            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2989            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2990     {
2991       type = (GET_CODE (x) == SIGN_EXTRACT)
2992         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2993       index = XEXP (XEXP (x, 0), 0);
2994       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2995       if (INTVAL (XEXP (x, 1)) != 32 + shift
2996           || INTVAL (XEXP (x, 2)) != 0)
2997         shift = -1;
2998     }
2999   /* (and:DI (mult:DI (reg:DI) (const_int scale))
3000      (const_int 0xffffffff<<shift)) */
3001   else if (GET_CODE (x) == AND
3002            && GET_MODE (x) == DImode
3003            && GET_CODE (XEXP (x, 0)) == MULT
3004            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3005            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3006            && CONST_INT_P (XEXP (x, 1)))
3007     {
3008       type = ADDRESS_REG_UXTW;
3009       index = XEXP (XEXP (x, 0), 0);
3010       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3011       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3012         shift = -1;
3013     }
3014   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3015   else if ((GET_CODE (x) == SIGN_EXTRACT
3016             || GET_CODE (x) == ZERO_EXTRACT)
3017            && GET_MODE (x) == DImode
3018            && GET_CODE (XEXP (x, 0)) == ASHIFT
3019            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3020            && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3021     {
3022       type = (GET_CODE (x) == SIGN_EXTRACT)
3023         ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3024       index = XEXP (XEXP (x, 0), 0);
3025       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3026       if (INTVAL (XEXP (x, 1)) != 32 + shift
3027           || INTVAL (XEXP (x, 2)) != 0)
3028         shift = -1;
3029     }
3030   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3031      (const_int 0xffffffff<<shift)) */
3032   else if (GET_CODE (x) == AND
3033            && GET_MODE (x) == DImode
3034            && GET_CODE (XEXP (x, 0)) == ASHIFT
3035            && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036            && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3037            && CONST_INT_P (XEXP (x, 1)))
3038     {
3039       type = ADDRESS_REG_UXTW;
3040       index = XEXP (XEXP (x, 0), 0);
3041       shift = INTVAL (XEXP (XEXP (x, 0), 1));
3042       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3043         shift = -1;
3044     }
3045   /* (mult:P (reg:P) (const_int scale)) */
3046   else if (GET_CODE (x) == MULT
3047            && GET_MODE (x) == Pmode
3048            && GET_MODE (XEXP (x, 0)) == Pmode
3049            && CONST_INT_P (XEXP (x, 1)))
3050     {
3051       type = ADDRESS_REG_REG;
3052       index = XEXP (x, 0);
3053       shift = exact_log2 (INTVAL (XEXP (x, 1)));
3054     }
3055   /* (ashift:P (reg:P) (const_int shift)) */
3056   else if (GET_CODE (x) == ASHIFT
3057            && GET_MODE (x) == Pmode
3058            && GET_MODE (XEXP (x, 0)) == Pmode
3059            && CONST_INT_P (XEXP (x, 1)))
3060     {
3061       type = ADDRESS_REG_REG;
3062       index = XEXP (x, 0);
3063       shift = INTVAL (XEXP (x, 1));
3064     }
3065   else
3066     return false;
3067
3068   if (GET_CODE (index) == SUBREG)
3069     index = SUBREG_REG (index);
3070
3071   if ((shift == 0 ||
3072        (shift > 0 && shift <= 3
3073         && (1 << shift) == GET_MODE_SIZE (mode)))
3074       && REG_P (index)
3075       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3076     {
3077       info->type = type;
3078       info->offset = index;
3079       info->shift = shift;
3080       return true;
3081     }
3082
3083   return false;
3084 }
3085
3086 static inline bool
3087 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3088 {
3089   return (offset >= -64 * GET_MODE_SIZE (mode)
3090           && offset < 64 * GET_MODE_SIZE (mode)
3091           && offset % GET_MODE_SIZE (mode) == 0);
3092 }
3093
3094 static inline bool
3095 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3096                                HOST_WIDE_INT offset)
3097 {
3098   return offset >= -256 && offset < 256;
3099 }
3100
3101 static inline bool
3102 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3103 {
3104   return (offset >= 0
3105           && offset < 4096 * GET_MODE_SIZE (mode)
3106           && offset % GET_MODE_SIZE (mode) == 0);
3107 }
3108
3109 /* Return true if X is a valid address for machine mode MODE.  If it is,
3110    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
3111    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
3112
3113 static bool
3114 aarch64_classify_address (struct aarch64_address_info *info,
3115                           rtx x, enum machine_mode mode,
3116                           RTX_CODE outer_code, bool strict_p)
3117 {
3118   enum rtx_code code = GET_CODE (x);
3119   rtx op0, op1;
3120   bool allow_reg_index_p =
3121     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3122
3123   /* Don't support anything other than POST_INC or REG addressing for
3124      AdvSIMD.  */
3125   if (aarch64_vector_mode_p (mode)
3126       && (code != POST_INC && code != REG))
3127     return false;
3128
3129   switch (code)
3130     {
3131     case REG:
3132     case SUBREG:
3133       info->type = ADDRESS_REG_IMM;
3134       info->base = x;
3135       info->offset = const0_rtx;
3136       return aarch64_base_register_rtx_p (x, strict_p);
3137
3138     case PLUS:
3139       op0 = XEXP (x, 0);
3140       op1 = XEXP (x, 1);
3141       if (GET_MODE_SIZE (mode) != 0
3142           && CONST_INT_P (op1)
3143           && aarch64_base_register_rtx_p (op0, strict_p))
3144         {
3145           HOST_WIDE_INT offset = INTVAL (op1);
3146
3147           info->type = ADDRESS_REG_IMM;
3148           info->base = op0;
3149           info->offset = op1;
3150
3151           /* TImode and TFmode values are allowed in both pairs of X
3152              registers and individual Q registers.  The available
3153              address modes are:
3154              X,X: 7-bit signed scaled offset
3155              Q:   9-bit signed offset
3156              We conservatively require an offset representable in either mode.
3157            */
3158           if (mode == TImode || mode == TFmode)
3159             return (offset_7bit_signed_scaled_p (mode, offset)
3160                     && offset_9bit_signed_unscaled_p (mode, offset));
3161
3162           if (outer_code == PARALLEL)
3163             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3164                     && offset_7bit_signed_scaled_p (mode, offset));
3165           else
3166             return (offset_9bit_signed_unscaled_p (mode, offset)
3167                     || offset_12bit_unsigned_scaled_p (mode, offset));
3168         }
3169
3170       if (allow_reg_index_p)
3171         {
3172           /* Look for base + (scaled/extended) index register.  */
3173           if (aarch64_base_register_rtx_p (op0, strict_p)
3174               && aarch64_classify_index (info, op1, mode, strict_p))
3175             {
3176               info->base = op0;
3177               return true;
3178             }
3179           if (aarch64_base_register_rtx_p (op1, strict_p)
3180               && aarch64_classify_index (info, op0, mode, strict_p))
3181             {
3182               info->base = op1;
3183               return true;
3184             }
3185         }
3186
3187       return false;
3188
3189     case POST_INC:
3190     case POST_DEC:
3191     case PRE_INC:
3192     case PRE_DEC:
3193       info->type = ADDRESS_REG_WB;
3194       info->base = XEXP (x, 0);
3195       info->offset = NULL_RTX;
3196       return aarch64_base_register_rtx_p (info->base, strict_p);
3197
3198     case POST_MODIFY:
3199     case PRE_MODIFY:
3200       info->type = ADDRESS_REG_WB;
3201       info->base = XEXP (x, 0);
3202       if (GET_CODE (XEXP (x, 1)) == PLUS
3203           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3204           && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3205           && aarch64_base_register_rtx_p (info->base, strict_p))
3206         {
3207           HOST_WIDE_INT offset;
3208           info->offset = XEXP (XEXP (x, 1), 1);
3209           offset = INTVAL (info->offset);
3210
3211           /* TImode and TFmode values are allowed in both pairs of X
3212              registers and individual Q registers.  The available
3213              address modes are:
3214              X,X: 7-bit signed scaled offset
3215              Q:   9-bit signed offset
3216              We conservatively require an offset representable in either mode.
3217            */
3218           if (mode == TImode || mode == TFmode)
3219             return (offset_7bit_signed_scaled_p (mode, offset)
3220                     && offset_9bit_signed_unscaled_p (mode, offset));
3221
3222           if (outer_code == PARALLEL)
3223             return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3224                     && offset_7bit_signed_scaled_p (mode, offset));
3225           else
3226             return offset_9bit_signed_unscaled_p (mode, offset);
3227         }
3228       return false;
3229
3230     case CONST:
3231     case SYMBOL_REF:
3232     case LABEL_REF:
3233       /* load literal: pc-relative constant pool entry.  Only supported
3234          for SI mode or larger.  */
3235       info->type = ADDRESS_SYMBOLIC;
3236       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3237         {
3238           rtx sym, addend;
3239
3240           split_const (x, &sym, &addend);
3241           return (GET_CODE (sym) == LABEL_REF
3242                   || (GET_CODE (sym) == SYMBOL_REF
3243                       && CONSTANT_POOL_ADDRESS_P (sym)));
3244         }
3245       return false;
3246
3247     case LO_SUM:
3248       info->type = ADDRESS_LO_SUM;
3249       info->base = XEXP (x, 0);
3250       info->offset = XEXP (x, 1);
3251       if (allow_reg_index_p
3252           && aarch64_base_register_rtx_p (info->base, strict_p))
3253         {
3254           rtx sym, offs;
3255           split_const (info->offset, &sym, &offs);
3256           if (GET_CODE (sym) == SYMBOL_REF
3257               && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3258                   == SYMBOL_SMALL_ABSOLUTE))
3259             {
3260               /* The symbol and offset must be aligned to the access size.  */
3261               unsigned int align;
3262               unsigned int ref_size;
3263
3264               if (CONSTANT_POOL_ADDRESS_P (sym))
3265                 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3266               else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3267                 {
3268                   tree exp = SYMBOL_REF_DECL (sym);
3269                   align = TYPE_ALIGN (TREE_TYPE (exp));
3270                   align = CONSTANT_ALIGNMENT (exp, align);
3271                 }
3272               else if (SYMBOL_REF_DECL (sym))
3273                 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3274               else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3275                        && SYMBOL_REF_BLOCK (sym) != NULL)
3276                 align = SYMBOL_REF_BLOCK (sym)->alignment;
3277               else
3278                 align = BITS_PER_UNIT;
3279
3280               ref_size = GET_MODE_SIZE (mode);
3281               if (ref_size == 0)
3282                 ref_size = GET_MODE_SIZE (DImode);
3283
3284               return ((INTVAL (offs) & (ref_size - 1)) == 0
3285                       && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3286             }
3287         }
3288       return false;
3289
3290     default:
3291       return false;
3292     }
3293 }
3294
3295 bool
3296 aarch64_symbolic_address_p (rtx x)
3297 {
3298   rtx offset;
3299
3300   split_const (x, &x, &offset);
3301   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3302 }
3303
3304 /* Classify the base of symbolic expression X, given that X appears in
3305    context CONTEXT.  */
3306
3307 enum aarch64_symbol_type
3308 aarch64_classify_symbolic_expression (rtx x,
3309                                       enum aarch64_symbol_context context)
3310 {
3311   rtx offset;
3312
3313   split_const (x, &x, &offset);
3314   return aarch64_classify_symbol (x, context);
3315 }
3316
3317
3318 /* Return TRUE if X is a legitimate address for accessing memory in
3319    mode MODE.  */
3320 static bool
3321 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3322 {
3323   struct aarch64_address_info addr;
3324
3325   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3326 }
3327
3328 /* Return TRUE if X is a legitimate address for accessing memory in
3329    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3330    pair operation.  */
3331 bool
3332 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3333                               RTX_CODE outer_code, bool strict_p)
3334 {
3335   struct aarch64_address_info addr;
3336
3337   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3338 }
3339
3340 /* Return TRUE if rtx X is immediate constant 0.0 */
3341 bool
3342 aarch64_float_const_zero_rtx_p (rtx x)
3343 {
3344   REAL_VALUE_TYPE r;
3345
3346   if (GET_MODE (x) == VOIDmode)
3347     return false;
3348
3349   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3350   if (REAL_VALUE_MINUS_ZERO (r))
3351     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3352   return REAL_VALUES_EQUAL (r, dconst0);
3353 }
3354
3355 /* Return the fixed registers used for condition codes.  */
3356
3357 static bool
3358 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3359 {
3360   *p1 = CC_REGNUM;
3361   *p2 = INVALID_REGNUM;
3362   return true;
3363 }
3364
3365 enum machine_mode
3366 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3367 {
3368   /* All floating point compares return CCFP if it is an equality
3369      comparison, and CCFPE otherwise.  */
3370   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3371     {
3372       switch (code)
3373         {
3374         case EQ:
3375         case NE:
3376         case UNORDERED:
3377         case ORDERED:
3378         case UNLT:
3379         case UNLE:
3380         case UNGT:
3381         case UNGE:
3382         case UNEQ:
3383         case LTGT:
3384           return CCFPmode;
3385
3386         case LT:
3387         case LE:
3388         case GT:
3389         case GE:
3390           return CCFPEmode;
3391
3392         default:
3393           gcc_unreachable ();
3394         }
3395     }
3396
3397   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3398       && y == const0_rtx
3399       && (code == EQ || code == NE || code == LT || code == GE)
3400       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3401           || GET_CODE (x) == NEG))
3402     return CC_NZmode;
3403
3404   /* A compare with a shifted operand.  Because of canonicalization,
3405      the comparison will have to be swapped when we emit the assembly
3406      code.  */
3407   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3408       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3409       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3410           || GET_CODE (x) == LSHIFTRT
3411           || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3412     return CC_SWPmode;
3413
3414   /* Similarly for a negated operand, but we can only do this for
3415      equalities.  */
3416   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3417       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3418       && (code == EQ || code == NE)
3419       && GET_CODE (x) == NEG)
3420     return CC_Zmode;
3421
3422   /* A compare of a mode narrower than SI mode against zero can be done
3423      by extending the value in the comparison.  */
3424   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3425       && y == const0_rtx)
3426     /* Only use sign-extension if we really need it.  */
3427     return ((code == GT || code == GE || code == LE || code == LT)
3428             ? CC_SESWPmode : CC_ZESWPmode);
3429
3430   /* For everything else, return CCmode.  */
3431   return CCmode;
3432 }
3433
3434 static unsigned
3435 aarch64_get_condition_code (rtx x)
3436 {
3437   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3438   enum rtx_code comp_code = GET_CODE (x);
3439
3440   if (GET_MODE_CLASS (mode) != MODE_CC)
3441     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3442
3443   switch (mode)
3444     {
3445     case CCFPmode:
3446     case CCFPEmode:
3447       switch (comp_code)
3448         {
3449         case GE: return AARCH64_GE;
3450         case GT: return AARCH64_GT;
3451         case LE: return AARCH64_LS;
3452         case LT: return AARCH64_MI;
3453         case NE: return AARCH64_NE;
3454         case EQ: return AARCH64_EQ;
3455         case ORDERED: return AARCH64_VC;
3456         case UNORDERED: return AARCH64_VS;
3457         case UNLT: return AARCH64_LT;
3458         case UNLE: return AARCH64_LE;
3459         case UNGT: return AARCH64_HI;
3460         case UNGE: return AARCH64_PL;
3461         default: gcc_unreachable ();
3462         }
3463       break;
3464
3465     case CCmode:
3466       switch (comp_code)
3467         {
3468         case NE: return AARCH64_NE;
3469         case EQ: return AARCH64_EQ;
3470         case GE: return AARCH64_GE;
3471         case GT: return AARCH64_GT;
3472         case LE: return AARCH64_LE;
3473         case LT: return AARCH64_LT;
3474         case GEU: return AARCH64_CS;
3475         case GTU: return AARCH64_HI;
3476         case LEU: return AARCH64_LS;
3477         case LTU: return AARCH64_CC;
3478         default: gcc_unreachable ();
3479         }
3480       break;
3481
3482     case CC_SWPmode:
3483     case CC_ZESWPmode:
3484     case CC_SESWPmode:
3485       switch (comp_code)
3486         {
3487         case NE: return AARCH64_NE;
3488         case EQ: return AARCH64_EQ;
3489         case GE: return AARCH64_LE;
3490         case GT: return AARCH64_LT;
3491         case LE: return AARCH64_GE;
3492         case LT: return AARCH64_GT;
3493         case GEU: return AARCH64_LS;
3494         case GTU: return AARCH64_CC;
3495         case LEU: return AARCH64_CS;
3496         case LTU: return AARCH64_HI;
3497         default: gcc_unreachable ();
3498         }
3499       break;
3500
3501     case CC_NZmode:
3502       switch (comp_code)
3503         {
3504         case NE: return AARCH64_NE;
3505         case EQ: return AARCH64_EQ;
3506         case GE: return AARCH64_PL;
3507         case LT: return AARCH64_MI;
3508         default: gcc_unreachable ();
3509         }
3510       break;
3511
3512     case CC_Zmode:
3513       switch (comp_code)
3514         {
3515         case NE: return AARCH64_NE;
3516         case EQ: return AARCH64_EQ;
3517         default: gcc_unreachable ();
3518         }
3519       break;
3520
3521     default:
3522       gcc_unreachable ();
3523       break;
3524     }
3525 }
3526
3527 static unsigned
3528 bit_count (unsigned HOST_WIDE_INT value)
3529 {
3530   unsigned count = 0;
3531
3532   while (value)
3533     {
3534       count++;
3535       value &= value - 1;
3536     }
3537
3538   return count;
3539 }
3540
3541 void
3542 aarch64_print_operand (FILE *f, rtx x, char code)
3543 {
3544   switch (code)
3545     {
3546     /* An integer or symbol address without a preceding # sign.  */
3547     case 'c':
3548       switch (GET_CODE (x))
3549         {
3550         case CONST_INT:
3551           fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3552           break;
3553
3554         case SYMBOL_REF:
3555           output_addr_const (f, x);
3556           break;
3557
3558         case CONST:
3559           if (GET_CODE (XEXP (x, 0)) == PLUS
3560               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3561             {
3562               output_addr_const (f, x);
3563               break;
3564             }
3565           /* Fall through.  */
3566
3567         default:
3568           output_operand_lossage ("Unsupported operand for code '%c'", code);
3569         }
3570       break;
3571
3572     case 'e':
3573       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3574       {
3575         int n;
3576
3577         if (GET_CODE (x) != CONST_INT
3578             || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3579           {
3580             output_operand_lossage ("invalid operand for '%%%c'", code);
3581             return;
3582           }
3583
3584         switch (n)
3585           {
3586           case 3:
3587             fputc ('b', f);
3588             break;
3589           case 4:
3590             fputc ('h', f);
3591             break;
3592           case 5:
3593             fputc ('w', f);
3594             break;
3595           default:
3596             output_operand_lossage ("invalid operand for '%%%c'", code);
3597             return;
3598           }
3599       }
3600       break;
3601
3602     case 'p':
3603       {
3604         int n;
3605
3606         /* Print N such that 2^N == X.  */
3607         if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3608           {
3609             output_operand_lossage ("invalid operand for '%%%c'", code);
3610             return;
3611           }
3612
3613         asm_fprintf (f, "%d", n);
3614       }
3615       break;
3616
3617     case 'P':
3618       /* Print the number of non-zero bits in X (a const_int).  */
3619       if (GET_CODE (x) != CONST_INT)
3620         {
3621           output_operand_lossage ("invalid operand for '%%%c'", code);
3622           return;
3623         }
3624
3625       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3626       break;
3627
3628     case 'H':
3629       /* Print the higher numbered register of a pair (TImode) of regs.  */
3630       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3631         {
3632           output_operand_lossage ("invalid operand for '%%%c'", code);
3633           return;
3634         }
3635
3636       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3637       break;
3638
3639     case 'm':
3640       /* Print a condition (eq, ne, etc).  */
3641
3642       /* CONST_TRUE_RTX means always -- that's the default.  */
3643       if (x == const_true_rtx)
3644         return;
3645
3646       if (!COMPARISON_P (x))
3647         {
3648           output_operand_lossage ("invalid operand for '%%%c'", code);
3649           return;
3650         }
3651
3652       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3653       break;
3654
3655     case 'M':
3656       /* Print the inverse of a condition (eq <-> ne, etc).  */
3657
3658       /* CONST_TRUE_RTX means never -- that's the default.  */
3659       if (x == const_true_rtx)
3660         {
3661           fputs ("nv", f);
3662           return;
3663         }
3664
3665       if (!COMPARISON_P (x))
3666         {
3667           output_operand_lossage ("invalid operand for '%%%c'", code);
3668           return;
3669         }
3670
3671       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3672                                   (aarch64_get_condition_code (x))], f);
3673       break;
3674
3675     case 'b':
3676     case 'h':
3677     case 's':
3678     case 'd':
3679     case 'q':
3680       /* Print a scalar FP/SIMD register name.  */
3681       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3682         {
3683           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3684           return;
3685         }
3686       asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3687       break;
3688
3689     case 'S':
3690     case 'T':
3691     case 'U':
3692     case 'V':
3693       /* Print the first FP/SIMD register name in a list.  */
3694       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3695         {
3696           output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3697           return;
3698         }
3699       asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3700       break;
3701
3702     case 'X':
3703       /* Print bottom 16 bits of integer constant in hex.  */
3704       if (GET_CODE (x) != CONST_INT)
3705         {
3706           output_operand_lossage ("invalid operand for '%%%c'", code);
3707           return;
3708         }
3709       asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3710       break;
3711
3712     case 'w':
3713     case 'x':
3714       /* Print a general register name or the zero register (32-bit or
3715          64-bit).  */
3716       if (x == const0_rtx
3717           || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3718         {
3719           asm_fprintf (f, "%czr", code);
3720           break;
3721         }
3722
3723       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3724         {
3725           asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3726           break;
3727         }
3728
3729       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3730         {
3731           asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3732           break;
3733         }
3734
3735       /* Fall through */
3736
3737     case 0:
3738       /* Print a normal operand, if it's a general register, then we
3739          assume DImode.  */
3740       if (x == NULL)
3741         {
3742           output_operand_lossage ("missing operand");
3743           return;
3744         }
3745
3746       switch (GET_CODE (x))
3747         {
3748         case REG:
3749           asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3750           break;
3751
3752         case MEM:
3753           aarch64_memory_reference_mode = GET_MODE (x);
3754           output_address (XEXP (x, 0));
3755           break;
3756
3757         case LABEL_REF:
3758         case SYMBOL_REF:
3759           output_addr_const (asm_out_file, x);
3760           break;
3761
3762         case CONST_INT:
3763           asm_fprintf (f, "%wd", INTVAL (x));
3764           break;
3765
3766         case CONST_VECTOR:
3767           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3768             {
3769               gcc_assert (aarch64_const_vec_all_same_int_p (x,
3770                                                             HOST_WIDE_INT_MIN,
3771                                                             HOST_WIDE_INT_MAX));
3772               asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3773             }
3774           else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3775             {
3776               fputc ('0', f);
3777             }
3778           else
3779             gcc_unreachable ();
3780           break;
3781
3782         case CONST_DOUBLE:
3783           /* CONST_DOUBLE can represent a double-width integer.
3784              In this case, the mode of x is VOIDmode.  */
3785           if (GET_MODE (x) == VOIDmode)
3786             ; /* Do Nothing.  */
3787           else if (aarch64_float_const_zero_rtx_p (x))
3788             {
3789               fputc ('0', f);
3790               break;
3791             }
3792           else if (aarch64_float_const_representable_p (x))
3793             {
3794 #define buf_size 20
3795               char float_buf[buf_size] = {'\0'};
3796               REAL_VALUE_TYPE r;
3797               REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3798               real_to_decimal_for_mode (float_buf, &r,
3799                                         buf_size, buf_size,
3800                                         1, GET_MODE (x));
3801               asm_fprintf (asm_out_file, "%s", float_buf);
3802               break;
3803 #undef buf_size
3804             }
3805           output_operand_lossage ("invalid constant");
3806           return;
3807         default:
3808           output_operand_lossage ("invalid operand");
3809           return;
3810         }
3811       break;
3812
3813     case 'A':
3814       if (GET_CODE (x) == HIGH)
3815         x = XEXP (x, 0);
3816
3817       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3818         {
3819         case SYMBOL_SMALL_GOT:
3820           asm_fprintf (asm_out_file, ":got:");
3821           break;
3822
3823         case SYMBOL_SMALL_TLSGD:
3824           asm_fprintf (asm_out_file, ":tlsgd:");
3825           break;
3826
3827         case SYMBOL_SMALL_TLSDESC:
3828           asm_fprintf (asm_out_file, ":tlsdesc:");
3829           break;
3830
3831         case SYMBOL_SMALL_GOTTPREL:
3832           asm_fprintf (asm_out_file, ":gottprel:");
3833           break;
3834
3835         case SYMBOL_SMALL_TPREL:
3836           asm_fprintf (asm_out_file, ":tprel:");
3837           break;
3838
3839         case SYMBOL_TINY_GOT:
3840           gcc_unreachable ();
3841           break;
3842
3843         default:
3844           break;
3845         }
3846       output_addr_const (asm_out_file, x);
3847       break;
3848
3849     case 'L':
3850       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3851         {
3852         case SYMBOL_SMALL_GOT:
3853           asm_fprintf (asm_out_file, ":lo12:");
3854           break;
3855
3856         case SYMBOL_SMALL_TLSGD:
3857           asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3858           break;
3859
3860         case SYMBOL_SMALL_TLSDESC:
3861           asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3862           break;
3863
3864         case SYMBOL_SMALL_GOTTPREL:
3865           asm_fprintf (asm_out_file, ":gottprel_lo12:");
3866           break;
3867
3868         case SYMBOL_SMALL_TPREL:
3869           asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3870           break;
3871
3872         case SYMBOL_TINY_GOT:
3873           asm_fprintf (asm_out_file, ":got:");
3874           break;
3875
3876         default:
3877           break;
3878         }
3879       output_addr_const (asm_out_file, x);
3880       break;
3881
3882     case 'G':
3883
3884       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3885         {
3886         case SYMBOL_SMALL_TPREL:
3887           asm_fprintf (asm_out_file, ":tprel_hi12:");
3888           break;
3889         default:
3890           break;
3891         }
3892       output_addr_const (asm_out_file, x);
3893       break;
3894
3895     default:
3896       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3897       return;
3898     }
3899 }
3900
3901 void
3902 aarch64_print_operand_address (FILE *f, rtx x)
3903 {
3904   struct aarch64_address_info addr;
3905
3906   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3907                              MEM, true))
3908     switch (addr.type)
3909       {
3910       case ADDRESS_REG_IMM:
3911         if (addr.offset == const0_rtx)
3912           asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3913         else
3914           asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3915                        INTVAL (addr.offset));
3916         return;
3917
3918       case ADDRESS_REG_REG:
3919         if (addr.shift == 0)
3920           asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3921                        reg_names [REGNO (addr.offset)]);
3922         else
3923           asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3924                        reg_names [REGNO (addr.offset)], addr.shift);
3925         return;
3926
3927       case ADDRESS_REG_UXTW:
3928         if (addr.shift == 0)
3929           asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3930                        REGNO (addr.offset) - R0_REGNUM);
3931         else
3932           asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3933                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3934         return;
3935
3936       case ADDRESS_REG_SXTW:
3937         if (addr.shift == 0)
3938           asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3939                        REGNO (addr.offset) - R0_REGNUM);
3940         else
3941           asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3942                        REGNO (addr.offset) - R0_REGNUM, addr.shift);
3943         return;
3944
3945       case ADDRESS_REG_WB:
3946         switch (GET_CODE (x))
3947           {
3948           case PRE_INC:
3949             asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3950                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3951             return;
3952           case POST_INC:
3953             asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3954                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3955             return;
3956           case PRE_DEC:
3957             asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3958                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3959             return;
3960           case POST_DEC:
3961             asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3962                          GET_MODE_SIZE (aarch64_memory_reference_mode));
3963             return;
3964           case PRE_MODIFY:
3965             asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3966                          INTVAL (addr.offset));
3967             return;
3968           case POST_MODIFY:
3969             asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3970                          INTVAL (addr.offset));
3971             return;
3972           default:
3973             break;
3974           }
3975         break;
3976
3977       case ADDRESS_LO_SUM:
3978         asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3979         output_addr_const (f, addr.offset);
3980         asm_fprintf (f, "]");
3981         return;
3982
3983       case ADDRESS_SYMBOLIC:
3984         break;
3985       }
3986
3987   output_addr_const (f, x);
3988 }
3989
3990 bool
3991 aarch64_label_mentioned_p (rtx x)
3992 {
3993   const char *fmt;
3994   int i;
3995
3996   if (GET_CODE (x) == LABEL_REF)
3997     return true;
3998
3999   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4000      referencing instruction, but they are constant offsets, not
4001      symbols.  */
4002   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4003     return false;
4004
4005   fmt = GET_RTX_FORMAT (GET_CODE (x));
4006   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4007     {
4008       if (fmt[i] == 'E')
4009         {
4010           int j;
4011
4012           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4013             if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4014               return 1;
4015         }
4016       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4017         return 1;
4018     }
4019
4020   return 0;
4021 }
4022
4023 /* Implement REGNO_REG_CLASS.  */
4024
4025 enum reg_class
4026 aarch64_regno_regclass (unsigned regno)
4027 {
4028   if (GP_REGNUM_P (regno))
4029     return CORE_REGS;
4030
4031   if (regno == SP_REGNUM)
4032     return STACK_REG;
4033
4034   if (regno == FRAME_POINTER_REGNUM
4035       || regno == ARG_POINTER_REGNUM)
4036     return POINTER_REGS;
4037
4038   if (FP_REGNUM_P (regno))
4039     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
4040
4041   return NO_REGS;
4042 }
4043
4044 /* Try a machine-dependent way of reloading an illegitimate address
4045    operand.  If we find one, push the reload and return the new rtx.  */
4046
4047 rtx
4048 aarch64_legitimize_reload_address (rtx *x_p,
4049                                    enum machine_mode mode,
4050                                    int opnum, int type,
4051                                    int ind_levels ATTRIBUTE_UNUSED)
4052 {
4053   rtx x = *x_p;
4054
4055   /* Do not allow mem (plus (reg, const)) if vector mode.  */
4056   if (aarch64_vector_mode_p (mode)
4057       && GET_CODE (x) == PLUS
4058       && REG_P (XEXP (x, 0))
4059       && CONST_INT_P (XEXP (x, 1)))
4060     {
4061       rtx orig_rtx = x;
4062       x = copy_rtx (x);
4063       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4064                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4065                    opnum, (enum reload_type) type);
4066       return x;
4067     }
4068
4069   /* We must recognize output that we have already generated ourselves.  */
4070   if (GET_CODE (x) == PLUS
4071       && GET_CODE (XEXP (x, 0)) == PLUS
4072       && REG_P (XEXP (XEXP (x, 0), 0))
4073       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4074       && CONST_INT_P (XEXP (x, 1)))
4075     {
4076       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4077                    BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4078                    opnum, (enum reload_type) type);
4079       return x;
4080     }
4081
4082   /* We wish to handle large displacements off a base register by splitting
4083      the addend across an add and the mem insn.  This can cut the number of
4084      extra insns needed from 3 to 1.  It is only useful for load/store of a
4085      single register with 12 bit offset field.  */
4086   if (GET_CODE (x) == PLUS
4087       && REG_P (XEXP (x, 0))
4088       && CONST_INT_P (XEXP (x, 1))
4089       && HARD_REGISTER_P (XEXP (x, 0))
4090       && mode != TImode
4091       && mode != TFmode
4092       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4093     {
4094       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4095       HOST_WIDE_INT low = val & 0xfff;
4096       HOST_WIDE_INT high = val - low;
4097       HOST_WIDE_INT offs;
4098       rtx cst;
4099       enum machine_mode xmode = GET_MODE (x);
4100
4101       /* In ILP32, xmode can be either DImode or SImode.  */
4102       gcc_assert (xmode == DImode || xmode == SImode);
4103
4104       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
4105          BLKmode alignment.  */
4106       if (GET_MODE_SIZE (mode) == 0)
4107         return NULL_RTX;
4108
4109       offs = low % GET_MODE_SIZE (mode);
4110
4111       /* Align misaligned offset by adjusting high part to compensate.  */
4112       if (offs != 0)
4113         {
4114           if (aarch64_uimm12_shift (high + offs))
4115             {
4116               /* Align down.  */
4117               low = low - offs;
4118               high = high + offs;
4119             }
4120           else
4121             {
4122               /* Align up.  */
4123               offs = GET_MODE_SIZE (mode) - offs;
4124               low = low + offs;
4125               high = high + (low & 0x1000) - offs;
4126               low &= 0xfff;
4127             }
4128         }
4129
4130       /* Check for overflow.  */
4131       if (high + low != val)
4132         return NULL_RTX;
4133
4134       cst = GEN_INT (high);
4135       if (!aarch64_uimm12_shift (high))
4136         cst = force_const_mem (xmode, cst);
4137
4138       /* Reload high part into base reg, leaving the low part
4139          in the mem instruction.
4140          Note that replacing this gen_rtx_PLUS with plus_constant is
4141          wrong in this case because we rely on the
4142          (plus (plus reg c1) c2) structure being preserved so that
4143          XEXP (*p, 0) in push_reload below uses the correct term.  */
4144       x = gen_rtx_PLUS (xmode,
4145                         gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4146                         GEN_INT (low));
4147
4148       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4149                    BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4150                    opnum, (enum reload_type) type);
4151       return x;
4152     }
4153
4154   return NULL_RTX;
4155 }
4156
4157
4158 static reg_class_t
4159 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4160                           reg_class_t rclass,
4161                           enum machine_mode mode,
4162                           secondary_reload_info *sri)
4163 {
4164   /* Without the TARGET_SIMD instructions we cannot move a Q register
4165      to a Q register directly.  We need a scratch.  */
4166   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4167       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4168       && reg_class_subset_p (rclass, FP_REGS))
4169     {
4170       if (mode == TFmode)
4171         sri->icode = CODE_FOR_aarch64_reload_movtf;
4172       else if (mode == TImode)
4173         sri->icode = CODE_FOR_aarch64_reload_movti;
4174       return NO_REGS;
4175     }
4176
4177   /* A TFmode or TImode memory access should be handled via an FP_REGS
4178      because AArch64 has richer addressing modes for LDR/STR instructions
4179      than LDP/STP instructions.  */
4180   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4181       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4182     return FP_REGS;
4183
4184   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4185       return CORE_REGS;
4186
4187   return NO_REGS;
4188 }
4189
4190 static bool
4191 aarch64_can_eliminate (const int from, const int to)
4192 {
4193   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4194      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
4195
4196   if (frame_pointer_needed)
4197     {
4198       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4199         return true;
4200       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4201         return false;
4202       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4203           && !cfun->calls_alloca)
4204         return true;
4205       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4206         return true;
4207
4208       return false;
4209     }
4210
4211   return true;
4212 }
4213
4214 HOST_WIDE_INT
4215 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4216 {
4217   HOST_WIDE_INT frame_size;
4218   HOST_WIDE_INT offset;
4219
4220   aarch64_layout_frame ();
4221   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4222                 + crtl->outgoing_args_size
4223                 + cfun->machine->saved_varargs_size);
4224
4225    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4226    offset = frame_size;
4227
4228    if (to == HARD_FRAME_POINTER_REGNUM)
4229      {
4230        if (from == ARG_POINTER_REGNUM)
4231          return offset - crtl->outgoing_args_size;
4232
4233        if (from == FRAME_POINTER_REGNUM)
4234          return cfun->machine->frame.saved_regs_size + get_frame_size ();
4235      }
4236
4237    if (to == STACK_POINTER_REGNUM)
4238      {
4239        if (from == FRAME_POINTER_REGNUM)
4240          {
4241            HOST_WIDE_INT elim = crtl->outgoing_args_size
4242                               + cfun->machine->frame.saved_regs_size
4243                               + get_frame_size ()
4244                               - cfun->machine->frame.fp_lr_offset;
4245            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4246            return elim;
4247          }
4248      }
4249
4250    return offset;
4251 }
4252
4253
4254 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
4255    previous frame.  */
4256
4257 rtx
4258 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4259 {
4260   if (count != 0)
4261     return const0_rtx;
4262   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4263 }
4264
4265
4266 static void
4267 aarch64_asm_trampoline_template (FILE *f)
4268 {
4269   if (TARGET_ILP32)
4270     {
4271       asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4272       asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4273     }
4274   else
4275     {
4276       asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4277       asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4278     }
4279   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4280   assemble_aligned_integer (4, const0_rtx);
4281   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4282   assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4283 }
4284
4285 static void
4286 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4287 {
4288   rtx fnaddr, mem, a_tramp;
4289   const int tramp_code_sz = 16;
4290
4291   /* Don't need to copy the trailing D-words, we fill those in below.  */
4292   emit_block_move (m_tramp, assemble_trampoline_template (),
4293                    GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4294   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4295   fnaddr = XEXP (DECL_RTL (fndecl), 0);
4296   if (GET_MODE (fnaddr) != ptr_mode)
4297     fnaddr = convert_memory_address (ptr_mode, fnaddr);
4298   emit_move_insn (mem, fnaddr);
4299
4300   mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4301   emit_move_insn (mem, chain_value);
4302
4303   /* XXX We should really define a "clear_cache" pattern and use
4304      gen_clear_cache().  */
4305   a_tramp = XEXP (m_tramp, 0);
4306   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4307                      LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4308                      plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4309                      ptr_mode);
4310 }
4311
4312 static unsigned char
4313 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4314 {
4315   switch (regclass)
4316     {
4317     case CORE_REGS:
4318     case POINTER_REGS:
4319     case GENERAL_REGS:
4320     case ALL_REGS:
4321     case FP_REGS:
4322     case FP_LO_REGS:
4323       return
4324         aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4325                                        (GET_MODE_SIZE (mode) + 7) / 8;
4326     case STACK_REG:
4327       return 1;
4328
4329     case NO_REGS:
4330       return 0;
4331
4332     default:
4333       break;
4334     }
4335   gcc_unreachable ();
4336 }
4337
4338 static reg_class_t
4339 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4340 {
4341   if (regclass == POINTER_REGS)
4342     return GENERAL_REGS;
4343
4344   if (regclass == STACK_REG)
4345     {
4346       if (REG_P(x)
4347           && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4348           return regclass;
4349
4350       return NO_REGS;
4351     }
4352
4353   /* If it's an integer immediate that MOVI can't handle, then
4354      FP_REGS is not an option, so we return NO_REGS instead.  */
4355   if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4356       && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4357     return NO_REGS;
4358
4359   /* Register eliminiation can result in a request for
4360      SP+constant->FP_REGS.  We cannot support such operations which
4361      use SP as source and an FP_REG as destination, so reject out
4362      right now.  */
4363   if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4364     {
4365       rtx lhs = XEXP (x, 0);
4366
4367       /* Look through a possible SUBREG introduced by ILP32.  */
4368       if (GET_CODE (lhs) == SUBREG)
4369         lhs = SUBREG_REG (lhs);
4370
4371       gcc_assert (REG_P (lhs));
4372       gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4373                                       POINTER_REGS));
4374       return NO_REGS;
4375     }
4376
4377   return regclass;
4378 }
4379
4380 void
4381 aarch64_asm_output_labelref (FILE* f, const char *name)
4382 {
4383   asm_fprintf (f, "%U%s", name);
4384 }
4385
4386 static void
4387 aarch64_elf_asm_constructor (rtx symbol, int priority)
4388 {
4389   if (priority == DEFAULT_INIT_PRIORITY)
4390     default_ctor_section_asm_out_constructor (symbol, priority);
4391   else
4392     {
4393       section *s;
4394       char buf[18];
4395       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4396       s = get_section (buf, SECTION_WRITE, NULL);
4397       switch_to_section (s);
4398       assemble_align (POINTER_SIZE);
4399       assemble_aligned_integer (POINTER_BYTES, symbol);
4400     }
4401 }
4402
4403 static void
4404 aarch64_elf_asm_destructor (rtx symbol, int priority)
4405 {
4406   if (priority == DEFAULT_INIT_PRIORITY)
4407     default_dtor_section_asm_out_destructor (symbol, priority);
4408   else
4409     {
4410       section *s;
4411       char buf[18];
4412       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4413       s = get_section (buf, SECTION_WRITE, NULL);
4414       switch_to_section (s);
4415       assemble_align (POINTER_SIZE);
4416       assemble_aligned_integer (POINTER_BYTES, symbol);
4417     }
4418 }
4419
4420 const char*
4421 aarch64_output_casesi (rtx *operands)
4422 {
4423   char buf[100];
4424   char label[100];
4425   rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4426   int index;
4427   static const char *const patterns[4][2] =
4428   {
4429     {
4430       "ldrb\t%w3, [%0,%w1,uxtw]",
4431       "add\t%3, %4, %w3, sxtb #2"
4432     },
4433     {
4434       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4435       "add\t%3, %4, %w3, sxth #2"
4436     },
4437     {
4438       "ldr\t%w3, [%0,%w1,uxtw #2]",
4439       "add\t%3, %4, %w3, sxtw #2"
4440     },
4441     /* We assume that DImode is only generated when not optimizing and
4442        that we don't really need 64-bit address offsets.  That would
4443        imply an object file with 8GB of code in a single function!  */
4444     {
4445       "ldr\t%w3, [%0,%w1,uxtw #2]",
4446       "add\t%3, %4, %w3, sxtw #2"
4447     }
4448   };
4449
4450   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4451
4452   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4453
4454   gcc_assert (index >= 0 && index <= 3);
4455
4456   /* Need to implement table size reduction, by chaning the code below.  */
4457   output_asm_insn (patterns[index][0], operands);
4458   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4459   snprintf (buf, sizeof (buf),
4460             "adr\t%%4, %s", targetm.strip_name_encoding (label));
4461   output_asm_insn (buf, operands);
4462   output_asm_insn (patterns[index][1], operands);
4463   output_asm_insn ("br\t%3", operands);
4464   assemble_label (asm_out_file, label);
4465   return "";
4466 }
4467
4468
4469 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4470    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4471    operator.  */
4472
4473 int
4474 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4475 {
4476   if (shift >= 0 && shift <= 3)
4477     {
4478       int size;
4479       for (size = 8; size <= 32; size *= 2)
4480         {
4481           HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4482           if (mask == bits << shift)
4483             return size;
4484         }
4485     }
4486   return 0;
4487 }
4488
4489 static bool
4490 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4491                                    const_rtx x ATTRIBUTE_UNUSED)
4492 {
4493   /* We can't use blocks for constants when we're using a per-function
4494      constant pool.  */
4495   return false;
4496 }
4497
4498 static section *
4499 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4500                             rtx x ATTRIBUTE_UNUSED,
4501                             unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4502 {
4503   /* Force all constant pool entries into the current function section.  */
4504   return function_section (current_function_decl);
4505 }
4506
4507
4508 /* Costs.  */
4509
4510 /* Helper function for rtx cost calculation.  Strip a shift expression
4511    from X.  Returns the inner operand if successful, or the original
4512    expression on failure.  */
4513 static rtx
4514 aarch64_strip_shift (rtx x)
4515 {
4516   rtx op = x;
4517
4518   if ((GET_CODE (op) == ASHIFT
4519        || GET_CODE (op) == ASHIFTRT
4520        || GET_CODE (op) == LSHIFTRT)
4521       && CONST_INT_P (XEXP (op, 1)))
4522     return XEXP (op, 0);
4523
4524   if (GET_CODE (op) == MULT
4525       && CONST_INT_P (XEXP (op, 1))
4526       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4527     return XEXP (op, 0);
4528
4529   return x;
4530 }
4531
4532 /* Helper function for rtx cost calculation.  Strip an extend
4533    expression from X.  Returns the inner operand if successful, or the
4534    original expression on failure.  We deal with a number of possible
4535    canonicalization variations here.  */
4536 static rtx
4537 aarch64_strip_extend (rtx x)
4538 {
4539   rtx op = x;
4540
4541   /* Zero and sign extraction of a widened value.  */
4542   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4543       && XEXP (op, 2) == const0_rtx
4544       && GET_CODE (XEXP (op, 0)) == MULT
4545       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4546                                          XEXP (op, 1)))
4547     return XEXP (XEXP (op, 0), 0);
4548
4549   /* It can also be represented (for zero-extend) as an AND with an
4550      immediate.  */
4551   if (GET_CODE (op) == AND
4552       && GET_CODE (XEXP (op, 0)) == MULT
4553       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4554       && CONST_INT_P (XEXP (op, 1))
4555       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4556                            INTVAL (XEXP (op, 1))) != 0)
4557     return XEXP (XEXP (op, 0), 0);
4558
4559   /* Now handle extended register, as this may also have an optional
4560      left shift by 1..4.  */
4561   if (GET_CODE (op) == ASHIFT
4562       && CONST_INT_P (XEXP (op, 1))
4563       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4564     op = XEXP (op, 0);
4565
4566   if (GET_CODE (op) == ZERO_EXTEND
4567       || GET_CODE (op) == SIGN_EXTEND)
4568     op = XEXP (op, 0);
4569
4570   if (op != x)
4571     return op;
4572
4573   return x;
4574 }
4575
4576 /* Helper function for rtx cost calculation.  Calculate the cost of
4577    a MULT, which may be part of a multiply-accumulate rtx.  Return
4578    the calculated cost of the expression, recursing manually in to
4579    operands where needed.  */
4580
4581 static int
4582 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4583 {
4584   rtx op0, op1;
4585   const struct cpu_cost_table *extra_cost
4586     = aarch64_tune_params->insn_extra_cost;
4587   int cost = 0;
4588   bool maybe_fma = (outer == PLUS || outer == MINUS);
4589   enum machine_mode mode = GET_MODE (x);
4590
4591   gcc_checking_assert (code == MULT);
4592
4593   op0 = XEXP (x, 0);
4594   op1 = XEXP (x, 1);
4595
4596   if (VECTOR_MODE_P (mode))
4597     mode = GET_MODE_INNER (mode);
4598
4599   /* Integer multiply/fma.  */
4600   if (GET_MODE_CLASS (mode) == MODE_INT)
4601     {
4602       /* The multiply will be canonicalized as a shift, cost it as such.  */
4603       if (CONST_INT_P (op1)
4604           && exact_log2 (INTVAL (op1)) > 0)
4605         {
4606           if (speed)
4607             {
4608               if (maybe_fma)
4609                 /* ADD (shifted register).  */
4610                 cost += extra_cost->alu.arith_shift;
4611               else
4612                 /* LSL (immediate).  */
4613                 cost += extra_cost->alu.shift;
4614             }
4615
4616           cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4617
4618           return cost;
4619         }
4620
4621       /* Integer multiplies or FMAs have zero/sign extending variants.  */
4622       if ((GET_CODE (op0) == ZERO_EXTEND
4623            && GET_CODE (op1) == ZERO_EXTEND)
4624           || (GET_CODE (op0) == SIGN_EXTEND
4625               && GET_CODE (op1) == SIGN_EXTEND))
4626         {
4627           cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628                   + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4629
4630           if (speed)
4631             {
4632               if (maybe_fma)
4633                 /* MADD/SMADDL/UMADDL.  */
4634                 cost += extra_cost->mult[0].extend_add;
4635               else
4636                 /* MUL/SMULL/UMULL.  */
4637                 cost += extra_cost->mult[0].extend;
4638             }
4639
4640           return cost;
4641         }
4642
4643       /* This is either an integer multiply or an FMA.  In both cases
4644          we want to recurse and cost the operands.  */
4645       cost += rtx_cost (op0, MULT, 0, speed)
4646               + rtx_cost (op1, MULT, 1, speed);
4647
4648       if (speed)
4649         {
4650           if (maybe_fma)
4651             /* MADD.  */
4652             cost += extra_cost->mult[mode == DImode].add;
4653           else
4654             /* MUL.  */
4655             cost += extra_cost->mult[mode == DImode].simple;
4656         }
4657
4658       return cost;
4659     }
4660   else
4661     {
4662       if (speed)
4663         {
4664           /* Floating-point FMA can also support negations of the
4665              operands.  */
4666           if (GET_CODE (op0) == NEG)
4667             {
4668               maybe_fma = true;
4669               op0 = XEXP (op0, 0);
4670             }
4671           if (GET_CODE (op1) == NEG)
4672             {
4673               maybe_fma = true;
4674               op1 = XEXP (op1, 0);
4675             }
4676
4677           if (maybe_fma)
4678             /* FMADD/FNMADD/FNMSUB/FMSUB.  */
4679             cost += extra_cost->fp[mode == DFmode].fma;
4680           else
4681             /* FMUL.  */
4682             cost += extra_cost->fp[mode == DFmode].mult;
4683         }
4684
4685       cost += rtx_cost (op0, MULT, 0, speed)
4686               + rtx_cost (op1, MULT, 1, speed);
4687       return cost;
4688     }
4689 }
4690
4691 static int
4692 aarch64_address_cost (rtx x,
4693                       enum machine_mode mode,
4694                       addr_space_t as ATTRIBUTE_UNUSED,
4695                       bool speed)
4696 {
4697   enum rtx_code c = GET_CODE (x);
4698   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4699   struct aarch64_address_info info;
4700   int cost = 0;
4701   info.shift = 0;
4702
4703   if (!aarch64_classify_address (&info, x, mode, c, false))
4704     {
4705       if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4706         {
4707           /* This is a CONST or SYMBOL ref which will be split
4708              in a different way depending on the code model in use.
4709              Cost it through the generic infrastructure.  */
4710           int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4711           /* Divide through by the cost of one instruction to
4712              bring it to the same units as the address costs.  */
4713           cost_symbol_ref /= COSTS_N_INSNS (1);
4714           /* The cost is then the cost of preparing the address,
4715              followed by an immediate (possibly 0) offset.  */
4716           return cost_symbol_ref + addr_cost->imm_offset;
4717         }
4718       else
4719         {
4720           /* This is most likely a jump table from a case
4721              statement.  */
4722           return addr_cost->register_offset;
4723         }
4724     }
4725
4726   switch (info.type)
4727     {
4728       case ADDRESS_LO_SUM:
4729       case ADDRESS_SYMBOLIC:
4730       case ADDRESS_REG_IMM:
4731         cost += addr_cost->imm_offset;
4732         break;
4733
4734       case ADDRESS_REG_WB:
4735         if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4736           cost += addr_cost->pre_modify;
4737         else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4738           cost += addr_cost->post_modify;
4739         else
4740           gcc_unreachable ();
4741
4742         break;
4743
4744       case ADDRESS_REG_REG:
4745         cost += addr_cost->register_offset;
4746         break;
4747
4748       case ADDRESS_REG_UXTW:
4749       case ADDRESS_REG_SXTW:
4750         cost += addr_cost->register_extend;
4751         break;
4752
4753       default:
4754         gcc_unreachable ();
4755     }
4756
4757
4758   if (info.shift > 0)
4759     {
4760       /* For the sake of calculating the cost of the shifted register
4761          component, we can treat same sized modes in the same way.  */
4762       switch (GET_MODE_BITSIZE (mode))
4763         {
4764           case 16:
4765             cost += addr_cost->addr_scale_costs.hi;
4766             break;
4767
4768           case 32:
4769             cost += addr_cost->addr_scale_costs.si;
4770             break;
4771
4772           case 64:
4773             cost += addr_cost->addr_scale_costs.di;
4774             break;
4775
4776           /* We can't tell, or this is a 128-bit vector.  */
4777           default:
4778             cost += addr_cost->addr_scale_costs.ti;
4779             break;
4780         }
4781     }
4782
4783   return cost;
4784 }
4785
4786 /* Calculate the cost of calculating X, storing it in *COST.  Result
4787    is true if the total cost of the operation has now been calculated.  */
4788 static bool
4789 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4790                    int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4791 {
4792   rtx op0, op1;
4793   const struct cpu_cost_table *extra_cost
4794     = aarch64_tune_params->insn_extra_cost;
4795   enum machine_mode mode = GET_MODE (x);
4796
4797   /* By default, assume that everything has equivalent cost to the
4798      cheapest instruction.  Any additional costs are applied as a delta
4799      above this default.  */
4800   *cost = COSTS_N_INSNS (1);
4801
4802   /* TODO: The cost infrastructure currently does not handle
4803      vector operations.  Assume that all vector operations
4804      are equally expensive.  */
4805   if (VECTOR_MODE_P (mode))
4806     {
4807       if (speed)
4808         *cost += extra_cost->vect.alu;
4809       return true;
4810     }
4811
4812   switch (code)
4813     {
4814     case SET:
4815       /* The cost depends entirely on the operands to SET.  */
4816       *cost = 0;
4817       op0 = SET_DEST (x);
4818       op1 = SET_SRC (x);
4819
4820       switch (GET_CODE (op0))
4821         {
4822         case MEM:
4823           if (speed)
4824             {
4825               rtx address = XEXP (op0, 0);
4826               if (GET_MODE_CLASS (mode) == MODE_INT)
4827                 *cost += extra_cost->ldst.store;
4828               else if (mode == SFmode)
4829                 *cost += extra_cost->ldst.storef;
4830               else if (mode == DFmode)
4831                 *cost += extra_cost->ldst.stored;
4832
4833               *cost +=
4834                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4835                                                      0, speed));
4836             }
4837
4838           *cost += rtx_cost (op1, SET, 1, speed);
4839           return true;
4840
4841         case SUBREG:
4842           if (! REG_P (SUBREG_REG (op0)))
4843             *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4844
4845           /* Fall through.  */
4846         case REG:
4847           /* const0_rtx is in general free, but we will use an
4848              instruction to set a register to 0.  */
4849           if (REG_P (op1) || op1 == const0_rtx)
4850             {
4851               /* The cost is 1 per register copied.  */
4852               int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4853                               / UNITS_PER_WORD;
4854               *cost = COSTS_N_INSNS (n_minus_1 + 1);
4855             }
4856           else
4857             /* Cost is just the cost of the RHS of the set.  */
4858             *cost += rtx_cost (op1, SET, 1, speed);
4859           return true;
4860
4861         case ZERO_EXTRACT:
4862         case SIGN_EXTRACT:
4863           /* Bit-field insertion.  Strip any redundant widening of
4864              the RHS to meet the width of the target.  */
4865           if (GET_CODE (op1) == SUBREG)
4866             op1 = SUBREG_REG (op1);
4867           if ((GET_CODE (op1) == ZERO_EXTEND
4868                || GET_CODE (op1) == SIGN_EXTEND)
4869               && GET_CODE (XEXP (op0, 1)) == CONST_INT
4870               && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4871                   >= INTVAL (XEXP (op0, 1))))
4872             op1 = XEXP (op1, 0);
4873
4874           if (CONST_INT_P (op1))
4875             {
4876               /* MOV immediate is assumed to always be cheap.  */
4877               *cost = COSTS_N_INSNS (1);
4878             }
4879           else
4880             {
4881               /* BFM.  */
4882               if (speed)
4883                 *cost += extra_cost->alu.bfi;
4884               *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4885             }
4886
4887           return true;
4888
4889         default:
4890           /* We can't make sense of this, assume default cost.  */
4891           *cost = COSTS_N_INSNS (1);
4892           break;
4893         }
4894       return false;
4895
4896     case CONST_INT:
4897       /* If an instruction can incorporate a constant within the
4898          instruction, the instruction's expression avoids calling
4899          rtx_cost() on the constant.  If rtx_cost() is called on a
4900          constant, then it is usually because the constant must be
4901          moved into a register by one or more instructions.
4902
4903          The exception is constant 0, which can be expressed
4904          as XZR/WZR and is therefore free.  The exception to this is
4905          if we have (set (reg) (const0_rtx)) in which case we must cost
4906          the move.  However, we can catch that when we cost the SET, so
4907          we don't need to consider that here.  */
4908       if (x == const0_rtx)
4909         *cost = 0;
4910       else
4911         {
4912           /* To an approximation, building any other constant is
4913              proportionally expensive to the number of instructions
4914              required to build that constant.  This is true whether we
4915              are compiling for SPEED or otherwise.  */
4916           *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4917                                                          INTVAL (x),
4918                                                          false));
4919         }
4920       return true;
4921
4922     case CONST_DOUBLE:
4923       if (speed)
4924         {
4925           /* mov[df,sf]_aarch64.  */
4926           if (aarch64_float_const_representable_p (x))
4927             /* FMOV (scalar immediate).  */
4928             *cost += extra_cost->fp[mode == DFmode].fpconst;
4929           else if (!aarch64_float_const_zero_rtx_p (x))
4930             {
4931               /* This will be a load from memory.  */
4932               if (mode == DFmode)
4933                 *cost += extra_cost->ldst.loadd;
4934               else
4935                 *cost += extra_cost->ldst.loadf;
4936             }
4937           else
4938             /* Otherwise this is +0.0.  We get this using MOVI d0, #0
4939                or MOV v0.s[0], wzr - neither of which are modeled by the
4940                cost tables.  Just use the default cost.  */
4941             {
4942             }
4943         }
4944
4945       return true;
4946
4947     case MEM:
4948       if (speed)
4949         {
4950           /* For loads we want the base cost of a load, plus an
4951              approximation for the additional cost of the addressing
4952              mode.  */
4953           rtx address = XEXP (x, 0);
4954           if (GET_MODE_CLASS (mode) == MODE_INT)
4955             *cost += extra_cost->ldst.load;
4956           else if (mode == SFmode)
4957             *cost += extra_cost->ldst.loadf;
4958           else if (mode == DFmode)
4959             *cost += extra_cost->ldst.loadd;
4960
4961           *cost +=
4962                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4963                                                      0, speed));
4964         }
4965
4966       return true;
4967
4968     case NEG:
4969       op0 = XEXP (x, 0);
4970
4971       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4972        {
4973           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4974               || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4975             {
4976               /* CSETM.  */
4977               *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4978               return true;
4979             }
4980
4981           /* Cost this as SUB wzr, X.  */
4982           op0 = CONST0_RTX (GET_MODE (x));
4983           op1 = XEXP (x, 0);
4984           goto cost_minus;
4985         }
4986
4987       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4988         {
4989           /* Support (neg(fma...)) as a single instruction only if
4990              sign of zeros is unimportant.  This matches the decision
4991              making in aarch64.md.  */
4992           if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4993             {
4994               /* FNMADD.  */
4995               *cost = rtx_cost (op0, NEG, 0, speed);
4996               return true;
4997             }
4998           if (speed)
4999             /* FNEG.  */
5000             *cost += extra_cost->fp[mode == DFmode].neg;
5001           return false;
5002         }
5003
5004       return false;
5005
5006     case COMPARE:
5007       op0 = XEXP (x, 0);
5008       op1 = XEXP (x, 1);
5009
5010       if (op1 == const0_rtx
5011           && GET_CODE (op0) == AND)
5012         {
5013           x = op0;
5014           goto cost_logic;
5015         }
5016
5017       /* Comparisons can work if the order is swapped.
5018          Canonicalization puts the more complex operation first, but
5019          we want it in op1.  */
5020       if (! (REG_P (op0)
5021              || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5022         {
5023           op0 = XEXP (x, 1);
5024           op1 = XEXP (x, 0);
5025         }
5026       goto cost_minus;
5027
5028     case MINUS:
5029       {
5030         op0 = XEXP (x, 0);
5031         op1 = XEXP (x, 1);
5032
5033 cost_minus:
5034         /* Detect valid immediates.  */
5035         if ((GET_MODE_CLASS (mode) == MODE_INT
5036              || (GET_MODE_CLASS (mode) == MODE_CC
5037                  && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5038             && CONST_INT_P (op1)
5039             && aarch64_uimm12_shift (INTVAL (op1)))
5040           {
5041             *cost += rtx_cost (op0, MINUS, 0, speed);
5042
5043             if (speed)
5044               /* SUB(S) (immediate).  */
5045               *cost += extra_cost->alu.arith;
5046             return true;
5047
5048           }
5049
5050         rtx new_op1 = aarch64_strip_extend (op1);
5051
5052         /* Cost this as an FMA-alike operation.  */
5053         if ((GET_CODE (new_op1) == MULT
5054              || GET_CODE (new_op1) == ASHIFT)
5055             && code != COMPARE)
5056           {
5057             *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5058                                             (enum rtx_code) code,
5059                                             speed);
5060             *cost += rtx_cost (op0, MINUS, 0, speed);
5061             return true;
5062           }
5063
5064         *cost += rtx_cost (new_op1, MINUS, 1, speed);
5065
5066         if (speed)
5067           {
5068             if (GET_MODE_CLASS (mode) == MODE_INT)
5069               /* SUB(S).  */
5070               *cost += extra_cost->alu.arith;
5071             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5072               /* FSUB.  */
5073               *cost += extra_cost->fp[mode == DFmode].addsub;
5074           }
5075         return true;
5076       }
5077
5078     case PLUS:
5079       {
5080         rtx new_op0;
5081
5082         op0 = XEXP (x, 0);
5083         op1 = XEXP (x, 1);
5084
5085         if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5086             || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5087           {
5088             /* CSINC.  */
5089             *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5090             *cost += rtx_cost (op1, PLUS, 1, speed);
5091             return true;
5092           }
5093
5094         if (GET_MODE_CLASS (mode) == MODE_INT
5095             && CONST_INT_P (op1)
5096             && aarch64_uimm12_shift (INTVAL (op1)))
5097           {
5098             *cost += rtx_cost (op0, PLUS, 0, speed);
5099
5100             if (speed)
5101               /* ADD (immediate).  */
5102               *cost += extra_cost->alu.arith;
5103             return true;
5104           }
5105
5106         /* Strip any extend, leave shifts behind as we will
5107            cost them through mult_cost.  */
5108         new_op0 = aarch64_strip_extend (op0);
5109
5110         if (GET_CODE (new_op0) == MULT
5111             || GET_CODE (new_op0) == ASHIFT)
5112           {
5113             *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5114                                             speed);
5115             *cost += rtx_cost (op1, PLUS, 1, speed);
5116             return true;
5117           }
5118
5119         *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5120                   + rtx_cost (op1, PLUS, 1, speed));
5121
5122         if (speed)
5123           {
5124             if (GET_MODE_CLASS (mode) == MODE_INT)
5125               /* ADD.  */
5126               *cost += extra_cost->alu.arith;
5127             else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5128               /* FADD.  */
5129               *cost += extra_cost->fp[mode == DFmode].addsub;
5130           }
5131         return true;
5132       }
5133
5134     case IOR:
5135     case XOR:
5136     case AND:
5137     cost_logic:
5138       op0 = XEXP (x, 0);
5139       op1 = XEXP (x, 1);
5140
5141       if (code == AND
5142           && GET_CODE (op0) == MULT
5143           && CONST_INT_P (XEXP (op0, 1))
5144           && CONST_INT_P (op1)
5145           && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5146                                INTVAL (op1)) != 0)
5147         {
5148           /* This is a UBFM/SBFM.  */
5149           *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5150           if (speed)
5151             *cost += extra_cost->alu.bfx;
5152           return true;
5153         }
5154
5155       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5156         {
5157           /* We possibly get the immediate for free, this is not
5158              modelled.  */
5159           if (CONST_INT_P (op1)
5160               && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5161             {
5162               *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5163
5164               if (speed)
5165                 *cost += extra_cost->alu.logical;
5166
5167               return true;
5168             }
5169           else
5170             {
5171               rtx new_op0 = op0;
5172
5173               /* Handle ORN, EON, or BIC.  */
5174               if (GET_CODE (op0) == NOT)
5175                 op0 = XEXP (op0, 0);
5176
5177               new_op0 = aarch64_strip_shift (op0);
5178
5179               /* If we had a shift on op0 then this is a logical-shift-
5180                  by-register/immediate operation.  Otherwise, this is just
5181                  a logical operation.  */
5182               if (speed)
5183                 {
5184                   if (new_op0 != op0)
5185                     {
5186                       /* Shift by immediate.  */
5187                       if (CONST_INT_P (XEXP (op0, 1)))
5188                         *cost += extra_cost->alu.log_shift;
5189                       else
5190                         *cost += extra_cost->alu.log_shift_reg;
5191                     }
5192                   else
5193                     *cost += extra_cost->alu.logical;
5194                 }
5195
5196               /* In both cases we want to cost both operands.  */
5197               *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5198                        + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5199
5200               return true;
5201             }
5202         }
5203       return false;
5204
5205     case NOT:
5206       /* MVN.  */
5207       if (speed)
5208         *cost += extra_cost->alu.logical;
5209
5210       /* The logical instruction could have the shifted register form,
5211          but the cost is the same if the shift is processed as a separate
5212          instruction, so we don't bother with it here.  */
5213       return false;
5214
5215     case ZERO_EXTEND:
5216
5217       op0 = XEXP (x, 0);
5218       /* If a value is written in SI mode, then zero extended to DI
5219          mode, the operation will in general be free as a write to
5220          a 'w' register implicitly zeroes the upper bits of an 'x'
5221          register.  However, if this is
5222
5223            (set (reg) (zero_extend (reg)))
5224
5225          we must cost the explicit register move.  */
5226       if (mode == DImode
5227           && GET_MODE (op0) == SImode
5228           && outer == SET)
5229         {
5230           int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5231
5232           if (!op_cost && speed)
5233             /* MOV.  */
5234             *cost += extra_cost->alu.extend;
5235           else
5236             /* Free, the cost is that of the SI mode operation.  */
5237             *cost = op_cost;
5238
5239           return true;
5240         }
5241       else if (MEM_P (XEXP (x, 0)))
5242         {
5243           /* All loads can zero extend to any size for free.  */
5244           *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5245           return true;
5246         }
5247
5248       /* UXTB/UXTH.  */
5249       if (speed)
5250         *cost += extra_cost->alu.extend;
5251
5252       return false;
5253
5254     case SIGN_EXTEND:
5255       if (MEM_P (XEXP (x, 0)))
5256         {
5257           /* LDRSH.  */
5258           if (speed)
5259             {
5260               rtx address = XEXP (XEXP (x, 0), 0);
5261               *cost += extra_cost->ldst.load_sign_extend;
5262
5263               *cost +=
5264                 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5265                                                      0, speed));
5266             }
5267           return true;
5268         }
5269
5270       if (speed)
5271         *cost += extra_cost->alu.extend;
5272       return false;
5273
5274     case ROTATE:
5275       if (!CONST_INT_P (XEXP (x, 1)))
5276         *cost += COSTS_N_INSNS (2);
5277       /* Fall through.  */
5278     case ROTATERT:
5279     case LSHIFTRT:
5280     case ASHIFT:
5281     case ASHIFTRT:
5282
5283       /* Shifting by a register often takes an extra cycle.  */
5284       if (speed && !CONST_INT_P (XEXP (x, 1)))
5285         *cost += extra_cost->alu.arith_shift_reg;
5286
5287       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5288       return true;
5289
5290     case HIGH:
5291       if (!CONSTANT_P (XEXP (x, 0)))
5292         *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5293       return true;
5294
5295     case LO_SUM:
5296       if (!CONSTANT_P (XEXP (x, 1)))
5297         *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5298       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5299       return true;
5300
5301     case ZERO_EXTRACT:
5302     case SIGN_EXTRACT:
5303       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5304       return true;
5305
5306     case MULT:
5307       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5308       /* aarch64_rtx_mult_cost always handles recursion to its
5309          operands.  */
5310       return true;
5311
5312     case MOD:
5313     case UMOD:
5314       *cost = COSTS_N_INSNS (2);
5315       if (speed)
5316         {
5317           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5318             *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5319                       + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5320           else if (GET_MODE (x) == DFmode)
5321             *cost += (extra_cost->fp[1].mult
5322                       + extra_cost->fp[1].div);
5323           else if (GET_MODE (x) == SFmode)
5324             *cost += (extra_cost->fp[0].mult
5325                       + extra_cost->fp[0].div);
5326         }
5327       return false;  /* All arguments need to be in registers.  */
5328
5329     case DIV:
5330     case UDIV:
5331       *cost = COSTS_N_INSNS (1);
5332       if (speed)
5333         {
5334           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5335             *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5336           else if (GET_MODE (x) == DFmode)
5337             *cost += extra_cost->fp[1].div;
5338           else if (GET_MODE (x) == SFmode)
5339             *cost += extra_cost->fp[0].div;
5340         }
5341       return false;  /* All arguments need to be in registers.  */
5342
5343     default:
5344       break;
5345     }
5346   return false;
5347 }
5348
5349 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5350    calculated for X.  This cost is stored in *COST.  Returns true
5351    if the total cost of X was calculated.  */
5352 static bool
5353 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5354                    int param, int *cost, bool speed)
5355 {
5356   bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5357
5358   if (dump_file && (dump_flags & TDF_DETAILS))
5359     {
5360       print_rtl_single (dump_file, x);
5361       fprintf (dump_file, "\n%s cost: %d (%s)\n",
5362                speed ? "Hot" : "Cold",
5363                *cost, result ? "final" : "partial");
5364     }
5365
5366   return result;
5367 }
5368
5369 static int
5370 aarch64_register_move_cost (enum machine_mode mode,
5371                             reg_class_t from_i, reg_class_t to_i)
5372 {
5373   enum reg_class from = (enum reg_class) from_i;
5374   enum reg_class to = (enum reg_class) to_i;
5375   const struct cpu_regmove_cost *regmove_cost
5376     = aarch64_tune_params->regmove_cost;
5377
5378   /* Moving between GPR and stack cost is the same as GP2GP.  */
5379   if ((from == GENERAL_REGS && to == STACK_REG)
5380       || (to == GENERAL_REGS && from == STACK_REG))
5381     return regmove_cost->GP2GP;
5382
5383   /* To/From the stack register, we move via the gprs.  */
5384   if (to == STACK_REG || from == STACK_REG)
5385     return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5386             + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5387
5388   if (from == GENERAL_REGS && to == GENERAL_REGS)
5389     return regmove_cost->GP2GP;
5390   else if (from == GENERAL_REGS)
5391     return regmove_cost->GP2FP;
5392   else if (to == GENERAL_REGS)
5393     return regmove_cost->FP2GP;
5394
5395   /* When AdvSIMD instructions are disabled it is not possible to move
5396      a 128-bit value directly between Q registers.  This is handled in
5397      secondary reload.  A general register is used as a scratch to move
5398      the upper DI value and the lower DI value is moved directly,
5399      hence the cost is the sum of three moves. */
5400   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5401     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5402
5403   return regmove_cost->FP2FP;
5404 }
5405
5406 static int
5407 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5408                           reg_class_t rclass ATTRIBUTE_UNUSED,
5409                           bool in ATTRIBUTE_UNUSED)
5410 {
5411   return aarch64_tune_params->memmov_cost;
5412 }
5413
5414 /* Return the number of instructions that can be issued per cycle.  */
5415 static int
5416 aarch64_sched_issue_rate (void)
5417 {
5418   return aarch64_tune_params->issue_rate;
5419 }
5420
5421 /* Vectorizer cost model target hooks.  */
5422
5423 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5424 static int
5425 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5426                                     tree vectype,
5427                                     int misalign ATTRIBUTE_UNUSED)
5428 {
5429   unsigned elements;
5430
5431   switch (type_of_cost)
5432     {
5433       case scalar_stmt:
5434         return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5435
5436       case scalar_load:
5437         return aarch64_tune_params->vec_costs->scalar_load_cost;
5438
5439       case scalar_store:
5440         return aarch64_tune_params->vec_costs->scalar_store_cost;
5441
5442       case vector_stmt:
5443         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5444
5445       case vector_load:
5446         return aarch64_tune_params->vec_costs->vec_align_load_cost;
5447
5448       case vector_store:
5449         return aarch64_tune_params->vec_costs->vec_store_cost;
5450
5451       case vec_to_scalar:
5452         return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5453
5454       case scalar_to_vec:
5455         return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5456
5457       case unaligned_load:
5458         return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5459
5460       case unaligned_store:
5461         return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5462
5463       case cond_branch_taken:
5464         return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5465
5466       case cond_branch_not_taken:
5467         return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5468
5469       case vec_perm:
5470       case vec_promote_demote:
5471         return aarch64_tune_params->vec_costs->vec_stmt_cost;
5472
5473       case vec_construct:
5474         elements = TYPE_VECTOR_SUBPARTS (vectype);
5475         return elements / 2 + 1;
5476
5477       default:
5478         gcc_unreachable ();
5479     }
5480 }
5481
5482 /* Implement targetm.vectorize.add_stmt_cost.  */
5483 static unsigned
5484 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5485                        struct _stmt_vec_info *stmt_info, int misalign,
5486                        enum vect_cost_model_location where)
5487 {
5488   unsigned *cost = (unsigned *) data;
5489   unsigned retval = 0;
5490
5491   if (flag_vect_cost_model)
5492     {
5493       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5494       int stmt_cost =
5495             aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5496
5497       /* Statements in an inner loop relative to the loop being
5498          vectorized are weighted more heavily.  The value here is
5499          a function (linear for now) of the loop nest level.  */
5500       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5501         {
5502           loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5503           struct loop *loop =  LOOP_VINFO_LOOP (loop_info);
5504           unsigned nest_level = loop_depth (loop);
5505
5506           count *= nest_level;
5507         }
5508
5509       retval = (unsigned) (count * stmt_cost);
5510       cost[where] += retval;
5511     }
5512
5513   return retval;
5514 }
5515
5516 static void initialize_aarch64_code_model (void);
5517
5518 /* Parse the architecture extension string.  */
5519
5520 static void
5521 aarch64_parse_extension (char *str)
5522 {
5523   /* The extension string is parsed left to right.  */
5524   const struct aarch64_option_extension *opt = NULL;
5525
5526   /* Flag to say whether we are adding or removing an extension.  */
5527   int adding_ext = -1;
5528
5529   while (str != NULL && *str != 0)
5530     {
5531       char *ext;
5532       size_t len;
5533
5534       str++;
5535       ext = strchr (str, '+');
5536
5537       if (ext != NULL)
5538         len = ext - str;
5539       else
5540         len = strlen (str);
5541
5542       if (len >= 2 && strncmp (str, "no", 2) == 0)
5543         {
5544           adding_ext = 0;
5545           len -= 2;
5546           str += 2;
5547         }
5548       else if (len > 0)
5549         adding_ext = 1;
5550
5551       if (len == 0)
5552         {
5553           error ("missing feature modifier after %qs", "+no");
5554           return;
5555         }
5556
5557       /* Scan over the extensions table trying to find an exact match.  */
5558       for (opt = all_extensions; opt->name != NULL; opt++)
5559         {
5560           if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5561             {
5562               /* Add or remove the extension.  */
5563               if (adding_ext)
5564                 aarch64_isa_flags |= opt->flags_on;
5565               else
5566                 aarch64_isa_flags &= ~(opt->flags_off);
5567               break;
5568             }
5569         }
5570
5571       if (opt->name == NULL)
5572         {
5573           /* Extension not found in list.  */
5574           error ("unknown feature modifier %qs", str);
5575           return;
5576         }
5577
5578       str = ext;
5579     };
5580
5581   return;
5582 }
5583
5584 /* Parse the ARCH string.  */
5585
5586 static void
5587 aarch64_parse_arch (void)
5588 {
5589   char *ext;
5590   const struct processor *arch;
5591   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5592   size_t len;
5593
5594   strcpy (str, aarch64_arch_string);
5595
5596   ext = strchr (str, '+');
5597
5598   if (ext != NULL)
5599     len = ext - str;
5600   else
5601     len = strlen (str);
5602
5603   if (len == 0)
5604     {
5605       error ("missing arch name in -march=%qs", str);
5606       return;
5607     }
5608
5609   /* Loop through the list of supported ARCHs to find a match.  */
5610   for (arch = all_architectures; arch->name != NULL; arch++)
5611     {
5612       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5613         {
5614           selected_arch = arch;
5615           aarch64_isa_flags = selected_arch->flags;
5616
5617           if (!selected_cpu)
5618             selected_cpu = &all_cores[selected_arch->core];
5619
5620           if (ext != NULL)
5621             {
5622               /* ARCH string contains at least one extension.  */
5623               aarch64_parse_extension (ext);
5624             }
5625
5626           if (strcmp (selected_arch->arch, selected_cpu->arch))
5627             {
5628               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5629                        selected_cpu->name, selected_arch->name);
5630             }
5631
5632           return;
5633         }
5634     }
5635
5636   /* ARCH name not found in list.  */
5637   error ("unknown value %qs for -march", str);
5638   return;
5639 }
5640
5641 /* Parse the CPU string.  */
5642
5643 static void
5644 aarch64_parse_cpu (void)
5645 {
5646   char *ext;
5647   const struct processor *cpu;
5648   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5649   size_t len;
5650
5651   strcpy (str, aarch64_cpu_string);
5652
5653   ext = strchr (str, '+');
5654
5655   if (ext != NULL)
5656     len = ext - str;
5657   else
5658     len = strlen (str);
5659
5660   if (len == 0)
5661     {
5662       error ("missing cpu name in -mcpu=%qs", str);
5663       return;
5664     }
5665
5666   /* Loop through the list of supported CPUs to find a match.  */
5667   for (cpu = all_cores; cpu->name != NULL; cpu++)
5668     {
5669       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5670         {
5671           selected_cpu = cpu;
5672           selected_tune = cpu;
5673           aarch64_isa_flags = selected_cpu->flags;
5674
5675           if (ext != NULL)
5676             {
5677               /* CPU string contains at least one extension.  */
5678               aarch64_parse_extension (ext);
5679             }
5680
5681           return;
5682         }
5683     }
5684
5685   /* CPU name not found in list.  */
5686   error ("unknown value %qs for -mcpu", str);
5687   return;
5688 }
5689
5690 /* Parse the TUNE string.  */
5691
5692 static void
5693 aarch64_parse_tune (void)
5694 {
5695   const struct processor *cpu;
5696   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5697   strcpy (str, aarch64_tune_string);
5698
5699   /* Loop through the list of supported CPUs to find a match.  */
5700   for (cpu = all_cores; cpu->name != NULL; cpu++)
5701     {
5702       if (strcmp (cpu->name, str) == 0)
5703         {
5704           selected_tune = cpu;
5705           return;
5706         }
5707     }
5708
5709   /* CPU name not found in list.  */
5710   error ("unknown value %qs for -mtune", str);
5711   return;
5712 }
5713
5714
5715 /* Implement TARGET_OPTION_OVERRIDE.  */
5716
5717 static void
5718 aarch64_override_options (void)
5719 {
5720   /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5721      If either of -march or -mtune is given, they override their
5722      respective component of -mcpu.
5723
5724      So, first parse AARCH64_CPU_STRING, then the others, be careful
5725      with -march as, if -mcpu is not present on the command line, march
5726      must set a sensible default CPU.  */
5727   if (aarch64_cpu_string)
5728     {
5729       aarch64_parse_cpu ();
5730     }
5731
5732   if (aarch64_arch_string)
5733     {
5734       aarch64_parse_arch ();
5735     }
5736
5737   if (aarch64_tune_string)
5738     {
5739       aarch64_parse_tune ();
5740     }
5741
5742 #ifndef HAVE_AS_MABI_OPTION
5743   /* The compiler may have been configured with 2.23.* binutils, which does
5744      not have support for ILP32.  */
5745   if (TARGET_ILP32)
5746     error ("Assembler does not support -mabi=ilp32");
5747 #endif
5748
5749   initialize_aarch64_code_model ();
5750
5751   aarch64_build_bitmask_table ();
5752
5753   /* This target defaults to strict volatile bitfields.  */
5754   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5755     flag_strict_volatile_bitfields = 1;
5756
5757   /* If the user did not specify a processor, choose the default
5758      one for them.  This will be the CPU set during configuration using
5759      --with-cpu, otherwise it is "generic".  */
5760   if (!selected_cpu)
5761     {
5762       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5763       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5764     }
5765
5766   gcc_assert (selected_cpu);
5767
5768   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
5769   if (!selected_tune)
5770     selected_tune = &all_cores[selected_cpu->core];
5771
5772   aarch64_tune_flags = selected_tune->flags;
5773   aarch64_tune = selected_tune->core;
5774   aarch64_tune_params = selected_tune->tune;
5775
5776   if (aarch64_fix_a53_err835769 == 2)
5777     {
5778 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5779       aarch64_fix_a53_err835769 = 1;
5780 #else
5781       aarch64_fix_a53_err835769 = 0;
5782 #endif
5783     }
5784
5785   aarch64_override_options_after_change ();
5786 }
5787
5788 /* Implement targetm.override_options_after_change.  */
5789
5790 static void
5791 aarch64_override_options_after_change (void)
5792 {
5793   if (flag_omit_frame_pointer)
5794     flag_omit_leaf_frame_pointer = false;
5795   else if (flag_omit_leaf_frame_pointer)
5796     flag_omit_frame_pointer = true;
5797 }
5798
5799 static struct machine_function *
5800 aarch64_init_machine_status (void)
5801 {
5802   struct machine_function *machine;
5803   machine = ggc_alloc_cleared_machine_function ();
5804   return machine;
5805 }
5806
5807 void
5808 aarch64_init_expanders (void)
5809 {
5810   init_machine_status = aarch64_init_machine_status;
5811 }
5812
5813 /* A checking mechanism for the implementation of the various code models.  */
5814 static void
5815 initialize_aarch64_code_model (void)
5816 {
5817    if (flag_pic)
5818      {
5819        switch (aarch64_cmodel_var)
5820          {
5821          case AARCH64_CMODEL_TINY:
5822            aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5823            break;
5824          case AARCH64_CMODEL_SMALL:
5825            aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5826            break;
5827          case AARCH64_CMODEL_LARGE:
5828            sorry ("code model %qs with -f%s", "large",
5829                   flag_pic > 1 ? "PIC" : "pic");
5830          default:
5831            gcc_unreachable ();
5832          }
5833      }
5834    else
5835      aarch64_cmodel = aarch64_cmodel_var;
5836 }
5837
5838 /* Return true if SYMBOL_REF X binds locally.  */
5839
5840 static bool
5841 aarch64_symbol_binds_local_p (const_rtx x)
5842 {
5843   return (SYMBOL_REF_DECL (x)
5844           ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5845           : SYMBOL_REF_LOCAL_P (x));
5846 }
5847
5848 /* Return true if SYMBOL_REF X is thread local */
5849 static bool
5850 aarch64_tls_symbol_p (rtx x)
5851 {
5852   if (! TARGET_HAVE_TLS)
5853     return false;
5854
5855   if (GET_CODE (x) != SYMBOL_REF)
5856     return false;
5857
5858   return SYMBOL_REF_TLS_MODEL (x) != 0;
5859 }
5860
5861 /* Classify a TLS symbol into one of the TLS kinds.  */
5862 enum aarch64_symbol_type
5863 aarch64_classify_tls_symbol (rtx x)
5864 {
5865   enum tls_model tls_kind = tls_symbolic_operand_type (x);
5866
5867   switch (tls_kind)
5868     {
5869     case TLS_MODEL_GLOBAL_DYNAMIC:
5870     case TLS_MODEL_LOCAL_DYNAMIC:
5871       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5872
5873     case TLS_MODEL_INITIAL_EXEC:
5874       return SYMBOL_SMALL_GOTTPREL;
5875
5876     case TLS_MODEL_LOCAL_EXEC:
5877       return SYMBOL_SMALL_TPREL;
5878
5879     case TLS_MODEL_EMULATED:
5880     case TLS_MODEL_NONE:
5881       return SYMBOL_FORCE_TO_MEM;
5882
5883     default:
5884       gcc_unreachable ();
5885     }
5886 }
5887
5888 /* Return the method that should be used to access SYMBOL_REF or
5889    LABEL_REF X in context CONTEXT.  */
5890
5891 enum aarch64_symbol_type
5892 aarch64_classify_symbol (rtx x,
5893                          enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5894 {
5895   if (GET_CODE (x) == LABEL_REF)
5896     {
5897       switch (aarch64_cmodel)
5898         {
5899         case AARCH64_CMODEL_LARGE:
5900           return SYMBOL_FORCE_TO_MEM;
5901
5902         case AARCH64_CMODEL_TINY_PIC:
5903         case AARCH64_CMODEL_TINY:
5904           return SYMBOL_TINY_ABSOLUTE;
5905
5906         case AARCH64_CMODEL_SMALL_PIC:
5907         case AARCH64_CMODEL_SMALL:
5908           return SYMBOL_SMALL_ABSOLUTE;
5909
5910         default:
5911           gcc_unreachable ();
5912         }
5913     }
5914
5915   if (GET_CODE (x) == SYMBOL_REF)
5916     {
5917       if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5918           return SYMBOL_FORCE_TO_MEM;
5919
5920       if (aarch64_tls_symbol_p (x))
5921         return aarch64_classify_tls_symbol (x);
5922
5923       switch (aarch64_cmodel)
5924         {
5925         case AARCH64_CMODEL_TINY:
5926           if (SYMBOL_REF_WEAK (x))
5927             return SYMBOL_FORCE_TO_MEM;
5928           return SYMBOL_TINY_ABSOLUTE;
5929
5930         case AARCH64_CMODEL_SMALL:
5931           if (SYMBOL_REF_WEAK (x))
5932             return SYMBOL_FORCE_TO_MEM;
5933           return SYMBOL_SMALL_ABSOLUTE;
5934
5935         case AARCH64_CMODEL_TINY_PIC:
5936           if (!aarch64_symbol_binds_local_p (x))
5937             return SYMBOL_TINY_GOT;
5938           return SYMBOL_TINY_ABSOLUTE;
5939
5940         case AARCH64_CMODEL_SMALL_PIC:
5941           if (!aarch64_symbol_binds_local_p (x))
5942             return SYMBOL_SMALL_GOT;
5943           return SYMBOL_SMALL_ABSOLUTE;
5944
5945         default:
5946           gcc_unreachable ();
5947         }
5948     }
5949
5950   /* By default push everything into the constant pool.  */
5951   return SYMBOL_FORCE_TO_MEM;
5952 }
5953
5954 bool
5955 aarch64_constant_address_p (rtx x)
5956 {
5957   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5958 }
5959
5960 bool
5961 aarch64_legitimate_pic_operand_p (rtx x)
5962 {
5963   if (GET_CODE (x) == SYMBOL_REF
5964       || (GET_CODE (x) == CONST
5965           && GET_CODE (XEXP (x, 0)) == PLUS
5966           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5967      return false;
5968
5969   return true;
5970 }
5971
5972 /* Return true if X holds either a quarter-precision or
5973      floating-point +0.0 constant.  */
5974 static bool
5975 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5976 {
5977   if (!CONST_DOUBLE_P (x))
5978     return false;
5979
5980   /* TODO: We could handle moving 0.0 to a TFmode register,
5981      but first we would like to refactor the movtf_aarch64
5982      to be more amicable to split moves properly and
5983      correctly gate on TARGET_SIMD.  For now - reject all
5984      constants which are not to SFmode or DFmode registers.  */
5985   if (!(mode == SFmode || mode == DFmode))
5986     return false;
5987
5988   if (aarch64_float_const_zero_rtx_p (x))
5989     return true;
5990   return aarch64_float_const_representable_p (x);
5991 }
5992
5993 static bool
5994 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5995 {
5996   /* Do not allow vector struct mode constants.  We could support
5997      0 and -1 easily, but they need support in aarch64-simd.md.  */
5998   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5999     return false;
6000
6001   /* This could probably go away because
6002      we now decompose CONST_INTs according to expand_mov_immediate.  */
6003   if ((GET_CODE (x) == CONST_VECTOR
6004        && aarch64_simd_valid_immediate (x, mode, false, NULL))
6005       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6006         return !targetm.cannot_force_const_mem (mode, x);
6007
6008   if (GET_CODE (x) == HIGH
6009       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6010     return true;
6011
6012   return aarch64_constant_address_p (x);
6013 }
6014
6015 rtx
6016 aarch64_load_tp (rtx target)
6017 {
6018   if (!target
6019       || GET_MODE (target) != Pmode
6020       || !register_operand (target, Pmode))
6021     target = gen_reg_rtx (Pmode);
6022
6023   /* Can return in any reg.  */
6024   emit_insn (gen_aarch64_load_tp_hard (target));
6025   return target;
6026 }
6027
6028 /* On AAPCS systems, this is the "struct __va_list".  */
6029 static GTY(()) tree va_list_type;
6030
6031 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6032    Return the type to use as __builtin_va_list.
6033
6034    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6035
6036    struct __va_list
6037    {
6038      void *__stack;
6039      void *__gr_top;
6040      void *__vr_top;
6041      int   __gr_offs;
6042      int   __vr_offs;
6043    };  */
6044
6045 static tree
6046 aarch64_build_builtin_va_list (void)
6047 {
6048   tree va_list_name;
6049   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6050
6051   /* Create the type.  */
6052   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6053   /* Give it the required name.  */
6054   va_list_name = build_decl (BUILTINS_LOCATION,
6055                              TYPE_DECL,
6056                              get_identifier ("__va_list"),
6057                              va_list_type);
6058   DECL_ARTIFICIAL (va_list_name) = 1;
6059   TYPE_NAME (va_list_type) = va_list_name;
6060   TYPE_STUB_DECL (va_list_type) = va_list_name;
6061
6062   /* Create the fields.  */
6063   f_stack = build_decl (BUILTINS_LOCATION,
6064                         FIELD_DECL, get_identifier ("__stack"),
6065                         ptr_type_node);
6066   f_grtop = build_decl (BUILTINS_LOCATION,
6067                         FIELD_DECL, get_identifier ("__gr_top"),
6068                         ptr_type_node);
6069   f_vrtop = build_decl (BUILTINS_LOCATION,
6070                         FIELD_DECL, get_identifier ("__vr_top"),
6071                         ptr_type_node);
6072   f_groff = build_decl (BUILTINS_LOCATION,
6073                         FIELD_DECL, get_identifier ("__gr_offs"),
6074                         integer_type_node);
6075   f_vroff = build_decl (BUILTINS_LOCATION,
6076                         FIELD_DECL, get_identifier ("__vr_offs"),
6077                         integer_type_node);
6078
6079   DECL_ARTIFICIAL (f_stack) = 1;
6080   DECL_ARTIFICIAL (f_grtop) = 1;
6081   DECL_ARTIFICIAL (f_vrtop) = 1;
6082   DECL_ARTIFICIAL (f_groff) = 1;
6083   DECL_ARTIFICIAL (f_vroff) = 1;
6084
6085   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6086   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6087   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6088   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6089   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6090
6091   TYPE_FIELDS (va_list_type) = f_stack;
6092   DECL_CHAIN (f_stack) = f_grtop;
6093   DECL_CHAIN (f_grtop) = f_vrtop;
6094   DECL_CHAIN (f_vrtop) = f_groff;
6095   DECL_CHAIN (f_groff) = f_vroff;
6096
6097   /* Compute its layout.  */
6098   layout_type (va_list_type);
6099
6100   return va_list_type;
6101 }
6102
6103 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
6104 static void
6105 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6106 {
6107   const CUMULATIVE_ARGS *cum;
6108   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6109   tree stack, grtop, vrtop, groff, vroff;
6110   tree t;
6111   int gr_save_area_size;
6112   int vr_save_area_size;
6113   int vr_offset;
6114
6115   cum = &crtl->args.info;
6116   gr_save_area_size
6117     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6118   vr_save_area_size
6119     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6120
6121   if (TARGET_GENERAL_REGS_ONLY)
6122     {
6123       if (cum->aapcs_nvrn > 0)
6124         sorry ("%qs and floating point or vector arguments",
6125                "-mgeneral-regs-only");
6126       vr_save_area_size = 0;
6127     }
6128
6129   f_stack = TYPE_FIELDS (va_list_type_node);
6130   f_grtop = DECL_CHAIN (f_stack);
6131   f_vrtop = DECL_CHAIN (f_grtop);
6132   f_groff = DECL_CHAIN (f_vrtop);
6133   f_vroff = DECL_CHAIN (f_groff);
6134
6135   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6136                   NULL_TREE);
6137   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6138                   NULL_TREE);
6139   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6140                   NULL_TREE);
6141   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6142                   NULL_TREE);
6143   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6144                   NULL_TREE);
6145
6146   /* Emit code to initialize STACK, which points to the next varargs stack
6147      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
6148      by named arguments.  STACK is 8-byte aligned.  */
6149   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6150   if (cum->aapcs_stack_size > 0)
6151     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6152   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6153   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6154
6155   /* Emit code to initialize GRTOP, the top of the GR save area.
6156      virtual_incoming_args_rtx should have been 16 byte aligned.  */
6157   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6158   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6159   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6160
6161   /* Emit code to initialize VRTOP, the top of the VR save area.
6162      This address is gr_save_area_bytes below GRTOP, rounded
6163      down to the next 16-byte boundary.  */
6164   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6165   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6166                              STACK_BOUNDARY / BITS_PER_UNIT);
6167
6168   if (vr_offset)
6169     t = fold_build_pointer_plus_hwi (t, -vr_offset);
6170   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6171   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6172
6173   /* Emit code to initialize GROFF, the offset from GRTOP of the
6174      next GPR argument.  */
6175   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6176               build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6177   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6178
6179   /* Likewise emit code to initialize VROFF, the offset from FTOP
6180      of the next VR argument.  */
6181   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6182               build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6183   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6184 }
6185
6186 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
6187
6188 static tree
6189 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6190                               gimple_seq *post_p ATTRIBUTE_UNUSED)
6191 {
6192   tree addr;
6193   bool indirect_p;
6194   bool is_ha;           /* is HFA or HVA.  */
6195   bool dw_align;        /* double-word align.  */
6196   enum machine_mode ag_mode = VOIDmode;
6197   int nregs;
6198   enum machine_mode mode;
6199
6200   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6201   tree stack, f_top, f_off, off, arg, roundup, on_stack;
6202   HOST_WIDE_INT size, rsize, adjust, align;
6203   tree t, u, cond1, cond2;
6204
6205   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6206   if (indirect_p)
6207     type = build_pointer_type (type);
6208
6209   mode = TYPE_MODE (type);
6210
6211   f_stack = TYPE_FIELDS (va_list_type_node);
6212   f_grtop = DECL_CHAIN (f_stack);
6213   f_vrtop = DECL_CHAIN (f_grtop);
6214   f_groff = DECL_CHAIN (f_vrtop);
6215   f_vroff = DECL_CHAIN (f_groff);
6216
6217   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6218                   f_stack, NULL_TREE);
6219   size = int_size_in_bytes (type);
6220   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6221
6222   dw_align = false;
6223   adjust = 0;
6224   if (aarch64_vfp_is_call_or_return_candidate (mode,
6225                                                type,
6226                                                &ag_mode,
6227                                                &nregs,
6228                                                &is_ha))
6229     {
6230       /* TYPE passed in fp/simd registers.  */
6231       if (TARGET_GENERAL_REGS_ONLY)
6232         sorry ("%qs and floating point or vector arguments",
6233                "-mgeneral-regs-only");
6234
6235       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6236                       unshare_expr (valist), f_vrtop, NULL_TREE);
6237       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6238                       unshare_expr (valist), f_vroff, NULL_TREE);
6239
6240       rsize = nregs * UNITS_PER_VREG;
6241
6242       if (is_ha)
6243         {
6244           if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6245             adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6246         }
6247       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6248                && size < UNITS_PER_VREG)
6249         {
6250           adjust = UNITS_PER_VREG - size;
6251         }
6252     }
6253   else
6254     {
6255       /* TYPE passed in general registers.  */
6256       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6257                       unshare_expr (valist), f_grtop, NULL_TREE);
6258       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6259                       unshare_expr (valist), f_groff, NULL_TREE);
6260       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6261       nregs = rsize / UNITS_PER_WORD;
6262
6263       if (align > 8)
6264         dw_align = true;
6265
6266       if (BLOCK_REG_PADDING (mode, type, 1) == downward
6267           && size < UNITS_PER_WORD)
6268         {
6269           adjust = UNITS_PER_WORD  - size;
6270         }
6271     }
6272
6273   /* Get a local temporary for the field value.  */
6274   off = get_initialized_tmp_var (f_off, pre_p, NULL);
6275
6276   /* Emit code to branch if off >= 0.  */
6277   t = build2 (GE_EXPR, boolean_type_node, off,
6278               build_int_cst (TREE_TYPE (off), 0));
6279   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6280
6281   if (dw_align)
6282     {
6283       /* Emit: offs = (offs + 15) & -16.  */
6284       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6285                   build_int_cst (TREE_TYPE (off), 15));
6286       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6287                   build_int_cst (TREE_TYPE (off), -16));
6288       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6289     }
6290   else
6291     roundup = NULL;
6292
6293   /* Update ap.__[g|v]r_offs  */
6294   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6295               build_int_cst (TREE_TYPE (off), rsize));
6296   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6297
6298   /* String up.  */
6299   if (roundup)
6300     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6301
6302   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
6303   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6304               build_int_cst (TREE_TYPE (f_off), 0));
6305   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6306
6307   /* String up: make sure the assignment happens before the use.  */
6308   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6309   COND_EXPR_ELSE (cond1) = t;
6310
6311   /* Prepare the trees handling the argument that is passed on the stack;
6312      the top level node will store in ON_STACK.  */
6313   arg = get_initialized_tmp_var (stack, pre_p, NULL);
6314   if (align > 8)
6315     {
6316       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
6317       t = fold_convert (intDI_type_node, arg);
6318       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6319                   build_int_cst (TREE_TYPE (t), 15));
6320       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6321                   build_int_cst (TREE_TYPE (t), -16));
6322       t = fold_convert (TREE_TYPE (arg), t);
6323       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6324     }
6325   else
6326     roundup = NULL;
6327   /* Advance ap.__stack  */
6328   t = fold_convert (intDI_type_node, arg);
6329   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6330               build_int_cst (TREE_TYPE (t), size + 7));
6331   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6332               build_int_cst (TREE_TYPE (t), -8));
6333   t = fold_convert (TREE_TYPE (arg), t);
6334   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6335   /* String up roundup and advance.  */
6336   if (roundup)
6337     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6338   /* String up with arg */
6339   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6340   /* Big-endianness related address adjustment.  */
6341   if (BLOCK_REG_PADDING (mode, type, 1) == downward
6342       && size < UNITS_PER_WORD)
6343   {
6344     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6345                 size_int (UNITS_PER_WORD - size));
6346     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6347   }
6348
6349   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6350   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6351
6352   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
6353   t = off;
6354   if (adjust)
6355     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6356                 build_int_cst (TREE_TYPE (off), adjust));
6357
6358   t = fold_convert (sizetype, t);
6359   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6360
6361   if (is_ha)
6362     {
6363       /* type ha; // treat as "struct {ftype field[n];}"
6364          ... [computing offs]
6365          for (i = 0; i <nregs; ++i, offs += 16)
6366            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6367          return ha;  */
6368       int i;
6369       tree tmp_ha, field_t, field_ptr_t;
6370
6371       /* Declare a local variable.  */
6372       tmp_ha = create_tmp_var_raw (type, "ha");
6373       gimple_add_tmp_var (tmp_ha);
6374
6375       /* Establish the base type.  */
6376       switch (ag_mode)
6377         {
6378         case SFmode:
6379           field_t = float_type_node;
6380           field_ptr_t = float_ptr_type_node;
6381           break;
6382         case DFmode:
6383           field_t = double_type_node;
6384           field_ptr_t = double_ptr_type_node;
6385           break;
6386         case TFmode:
6387           field_t = long_double_type_node;
6388           field_ptr_t = long_double_ptr_type_node;
6389           break;
6390 /* The half precision and quad precision are not fully supported yet.  Enable
6391    the following code after the support is complete.  Need to find the correct
6392    type node for __fp16 *.  */
6393 #if 0
6394         case HFmode:
6395           field_t = float_type_node;
6396           field_ptr_t = float_ptr_type_node;
6397           break;
6398 #endif
6399         case V2SImode:
6400         case V4SImode:
6401             {
6402               tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6403               field_t = build_vector_type_for_mode (innertype, ag_mode);
6404               field_ptr_t = build_pointer_type (field_t);
6405             }
6406           break;
6407         default:
6408           gcc_assert (0);
6409         }
6410
6411       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
6412       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6413       addr = t;
6414       t = fold_convert (field_ptr_t, addr);
6415       t = build2 (MODIFY_EXPR, field_t,
6416                   build1 (INDIRECT_REF, field_t, tmp_ha),
6417                   build1 (INDIRECT_REF, field_t, t));
6418
6419       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
6420       for (i = 1; i < nregs; ++i)
6421         {
6422           addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6423           u = fold_convert (field_ptr_t, addr);
6424           u = build2 (MODIFY_EXPR, field_t,
6425                       build2 (MEM_REF, field_t, tmp_ha,
6426                               build_int_cst (field_ptr_t,
6427                                              (i *
6428                                               int_size_in_bytes (field_t)))),
6429                       build1 (INDIRECT_REF, field_t, u));
6430           t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6431         }
6432
6433       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6434       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6435     }
6436
6437   COND_EXPR_ELSE (cond2) = t;
6438   addr = fold_convert (build_pointer_type (type), cond1);
6439   addr = build_va_arg_indirect_ref (addr);
6440
6441   if (indirect_p)
6442     addr = build_va_arg_indirect_ref (addr);
6443
6444   return addr;
6445 }
6446
6447 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
6448
6449 static void
6450 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6451                                 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6452                                 int no_rtl)
6453 {
6454   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6455   CUMULATIVE_ARGS local_cum;
6456   int gr_saved, vr_saved;
6457
6458   /* The caller has advanced CUM up to, but not beyond, the last named
6459      argument.  Advance a local copy of CUM past the last "real" named
6460      argument, to find out how many registers are left over.  */
6461   local_cum = *cum;
6462   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6463
6464   /* Found out how many registers we need to save.  */
6465   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6466   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6467
6468   if (TARGET_GENERAL_REGS_ONLY)
6469     {
6470       if (local_cum.aapcs_nvrn > 0)
6471         sorry ("%qs and floating point or vector arguments",
6472                "-mgeneral-regs-only");
6473       vr_saved = 0;
6474     }
6475
6476   if (!no_rtl)
6477     {
6478       if (gr_saved > 0)
6479         {
6480           rtx ptr, mem;
6481
6482           /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
6483           ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6484                                - gr_saved * UNITS_PER_WORD);
6485           mem = gen_frame_mem (BLKmode, ptr);
6486           set_mem_alias_set (mem, get_varargs_alias_set ());
6487
6488           move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6489                                mem, gr_saved);
6490         }
6491       if (vr_saved > 0)
6492         {
6493           /* We can't use move_block_from_reg, because it will use
6494              the wrong mode, storing D regs only.  */
6495           enum machine_mode mode = TImode;
6496           int off, i;
6497
6498           /* Set OFF to the offset from virtual_incoming_args_rtx of
6499              the first vector register.  The VR save area lies below
6500              the GR one, and is aligned to 16 bytes.  */
6501           off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6502                                    STACK_BOUNDARY / BITS_PER_UNIT);
6503           off -= vr_saved * UNITS_PER_VREG;
6504
6505           for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6506             {
6507               rtx ptr, mem;
6508
6509               ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6510               mem = gen_frame_mem (mode, ptr);
6511               set_mem_alias_set (mem, get_varargs_alias_set ());
6512               aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6513               off += UNITS_PER_VREG;
6514             }
6515         }
6516     }
6517
6518   /* We don't save the size into *PRETEND_SIZE because we want to avoid
6519      any complication of having crtl->args.pretend_args_size changed.  */
6520   cfun->machine->saved_varargs_size
6521     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6522                       STACK_BOUNDARY / BITS_PER_UNIT)
6523        + vr_saved * UNITS_PER_VREG);
6524 }
6525
6526 static void
6527 aarch64_conditional_register_usage (void)
6528 {
6529   int i;
6530   if (!TARGET_FLOAT)
6531     {
6532       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6533         {
6534           fixed_regs[i] = 1;
6535           call_used_regs[i] = 1;
6536         }
6537     }
6538 }
6539
6540 /* Walk down the type tree of TYPE counting consecutive base elements.
6541    If *MODEP is VOIDmode, then set it to the first valid floating point
6542    type.  If a non-floating point type is found, or if a floating point
6543    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6544    otherwise return the count in the sub-tree.  */
6545 static int
6546 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6547 {
6548   enum machine_mode mode;
6549   HOST_WIDE_INT size;
6550
6551   switch (TREE_CODE (type))
6552     {
6553     case REAL_TYPE:
6554       mode = TYPE_MODE (type);
6555       if (mode != DFmode && mode != SFmode && mode != TFmode)
6556         return -1;
6557
6558       if (*modep == VOIDmode)
6559         *modep = mode;
6560
6561       if (*modep == mode)
6562         return 1;
6563
6564       break;
6565
6566     case COMPLEX_TYPE:
6567       mode = TYPE_MODE (TREE_TYPE (type));
6568       if (mode != DFmode && mode != SFmode && mode != TFmode)
6569         return -1;
6570
6571       if (*modep == VOIDmode)
6572         *modep = mode;
6573
6574       if (*modep == mode)
6575         return 2;
6576
6577       break;
6578
6579     case VECTOR_TYPE:
6580       /* Use V2SImode and V4SImode as representatives of all 64-bit
6581          and 128-bit vector types.  */
6582       size = int_size_in_bytes (type);
6583       switch (size)
6584         {
6585         case 8:
6586           mode = V2SImode;
6587           break;
6588         case 16:
6589           mode = V4SImode;
6590           break;
6591         default:
6592           return -1;
6593         }
6594
6595       if (*modep == VOIDmode)
6596         *modep = mode;
6597
6598       /* Vector modes are considered to be opaque: two vectors are
6599          equivalent for the purposes of being homogeneous aggregates
6600          if they are the same size.  */
6601       if (*modep == mode)
6602         return 1;
6603
6604       break;
6605
6606     case ARRAY_TYPE:
6607       {
6608         int count;
6609         tree index = TYPE_DOMAIN (type);
6610
6611         /* Can't handle incomplete types.  */
6612         if (!COMPLETE_TYPE_P (type))
6613           return -1;
6614
6615         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6616         if (count == -1
6617             || !index
6618             || !TYPE_MAX_VALUE (index)
6619             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6620             || !TYPE_MIN_VALUE (index)
6621             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6622             || count < 0)
6623           return -1;
6624
6625         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6626                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6627
6628         /* There must be no padding.  */
6629         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6630             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6631                 != count * GET_MODE_BITSIZE (*modep)))
6632           return -1;
6633
6634         return count;
6635       }
6636
6637     case RECORD_TYPE:
6638       {
6639         int count = 0;
6640         int sub_count;
6641         tree field;
6642
6643         /* Can't handle incomplete types.  */
6644         if (!COMPLETE_TYPE_P (type))
6645           return -1;
6646
6647         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6648           {
6649             if (TREE_CODE (field) != FIELD_DECL)
6650               continue;
6651
6652             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6653             if (sub_count < 0)
6654               return -1;
6655             count += sub_count;
6656           }
6657
6658         /* There must be no padding.  */
6659         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6660             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6661                 != count * GET_MODE_BITSIZE (*modep)))
6662           return -1;
6663
6664         return count;
6665       }
6666
6667     case UNION_TYPE:
6668     case QUAL_UNION_TYPE:
6669       {
6670         /* These aren't very interesting except in a degenerate case.  */
6671         int count = 0;
6672         int sub_count;
6673         tree field;
6674
6675         /* Can't handle incomplete types.  */
6676         if (!COMPLETE_TYPE_P (type))
6677           return -1;
6678
6679         for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6680           {
6681             if (TREE_CODE (field) != FIELD_DECL)
6682               continue;
6683
6684             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6685             if (sub_count < 0)
6686               return -1;
6687             count = count > sub_count ? count : sub_count;
6688           }
6689
6690         /* There must be no padding.  */
6691         if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6692             || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6693                 != count * GET_MODE_BITSIZE (*modep)))
6694           return -1;
6695
6696         return count;
6697       }
6698
6699     default:
6700       break;
6701     }
6702
6703   return -1;
6704 }
6705
6706 /* Return true if we use LRA instead of reload pass.  */
6707 static bool
6708 aarch64_lra_p (void)
6709 {
6710   return aarch64_lra_flag;
6711 }
6712
6713 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6714    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
6715    array types.  The C99 floating-point complex types are also considered
6716    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
6717    types, which are GCC extensions and out of the scope of AAPCS64, are
6718    treated as composite types here as well.
6719
6720    Note that MODE itself is not sufficient in determining whether a type
6721    is such a composite type or not.  This is because
6722    stor-layout.c:compute_record_mode may have already changed the MODE
6723    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
6724    structure with only one field may have its MODE set to the mode of the
6725    field.  Also an integer mode whose size matches the size of the
6726    RECORD_TYPE type may be used to substitute the original mode
6727    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
6728    solely relied on.  */
6729
6730 static bool
6731 aarch64_composite_type_p (const_tree type,
6732                           enum machine_mode mode)
6733 {
6734   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6735     return true;
6736
6737   if (mode == BLKmode
6738       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6739       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6740     return true;
6741
6742   return false;
6743 }
6744
6745 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6746    type as described in AAPCS64 \S 4.1.2.
6747
6748    See the comment above aarch64_composite_type_p for the notes on MODE.  */
6749
6750 static bool
6751 aarch64_short_vector_p (const_tree type,
6752                         enum machine_mode mode)
6753 {
6754   HOST_WIDE_INT size = -1;
6755
6756   if (type && TREE_CODE (type) == VECTOR_TYPE)
6757     size = int_size_in_bytes (type);
6758   else if (!aarch64_composite_type_p (type, mode)
6759            && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6760                || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6761     size = GET_MODE_SIZE (mode);
6762
6763   return (size == 8 || size == 16) ? true : false;
6764 }
6765
6766 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6767    shall be passed or returned in simd/fp register(s) (providing these
6768    parameter passing registers are available).
6769
6770    Upon successful return, *COUNT returns the number of needed registers,
6771    *BASE_MODE returns the mode of the individual register and when IS_HAF
6772    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6773    floating-point aggregate or a homogeneous short-vector aggregate.  */
6774
6775 static bool
6776 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6777                                          const_tree type,
6778                                          enum machine_mode *base_mode,
6779                                          int *count,
6780                                          bool *is_ha)
6781 {
6782   enum machine_mode new_mode = VOIDmode;
6783   bool composite_p = aarch64_composite_type_p (type, mode);
6784
6785   if (is_ha != NULL) *is_ha = false;
6786
6787   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6788       || aarch64_short_vector_p (type, mode))
6789     {
6790       *count = 1;
6791       new_mode = mode;
6792     }
6793   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6794     {
6795       if (is_ha != NULL) *is_ha = true;
6796       *count = 2;
6797       new_mode = GET_MODE_INNER (mode);
6798     }
6799   else if (type && composite_p)
6800     {
6801       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6802
6803       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6804         {
6805           if (is_ha != NULL) *is_ha = true;
6806           *count = ag_count;
6807         }
6808       else
6809         return false;
6810     }
6811   else
6812     return false;
6813
6814   *base_mode = new_mode;
6815   return true;
6816 }
6817
6818 /* Implement TARGET_STRUCT_VALUE_RTX.  */
6819
6820 static rtx
6821 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6822                           int incoming ATTRIBUTE_UNUSED)
6823 {
6824   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6825 }
6826
6827 /* Implements target hook vector_mode_supported_p.  */
6828 static bool
6829 aarch64_vector_mode_supported_p (enum machine_mode mode)
6830 {
6831   if (TARGET_SIMD
6832       && (mode == V4SImode  || mode == V8HImode
6833           || mode == V16QImode || mode == V2DImode
6834           || mode == V2SImode  || mode == V4HImode
6835           || mode == V8QImode || mode == V2SFmode
6836           || mode == V4SFmode || mode == V2DFmode
6837           || mode == V1DFmode))
6838     return true;
6839
6840   return false;
6841 }
6842
6843 /* Return appropriate SIMD container
6844    for MODE within a vector of WIDTH bits.  */
6845 static enum machine_mode
6846 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6847 {
6848   gcc_assert (width == 64 || width == 128);
6849   if (TARGET_SIMD)
6850     {
6851       if (width == 128)
6852         switch (mode)
6853           {
6854           case DFmode:
6855             return V2DFmode;
6856           case SFmode:
6857             return V4SFmode;
6858           case SImode:
6859             return V4SImode;
6860           case HImode:
6861             return V8HImode;
6862           case QImode:
6863             return V16QImode;
6864           case DImode:
6865             return V2DImode;
6866           default:
6867             break;
6868           }
6869       else
6870         switch (mode)
6871           {
6872           case SFmode:
6873             return V2SFmode;
6874           case SImode:
6875             return V2SImode;
6876           case HImode:
6877             return V4HImode;
6878           case QImode:
6879             return V8QImode;
6880           default:
6881             break;
6882           }
6883     }
6884   return word_mode;
6885 }
6886
6887 /* Return 128-bit container as the preferred SIMD mode for MODE.  */
6888 static enum machine_mode
6889 aarch64_preferred_simd_mode (enum machine_mode mode)
6890 {
6891   return aarch64_simd_container_mode (mode, 128);
6892 }
6893
6894 /* Return the bitmask of possible vector sizes for the vectorizer
6895    to iterate over.  */
6896 static unsigned int
6897 aarch64_autovectorize_vector_sizes (void)
6898 {
6899   return (16 | 8);
6900 }
6901
6902 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6903    vector types in order to conform to the AAPCS64 (see "Procedure
6904    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
6905    qualify for emission with the mangled names defined in that document,
6906    a vector type must not only be of the correct mode but also be
6907    composed of AdvSIMD vector element types (e.g.
6908    _builtin_aarch64_simd_qi); these types are registered by
6909    aarch64_init_simd_builtins ().  In other words, vector types defined
6910    in other ways e.g. via vector_size attribute will get default
6911    mangled names.  */
6912 typedef struct
6913 {
6914   enum machine_mode mode;
6915   const char *element_type_name;
6916   const char *mangled_name;
6917 } aarch64_simd_mangle_map_entry;
6918
6919 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6920   /* 64-bit containerized types.  */
6921   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
6922   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
6923   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
6924   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
6925   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
6926   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
6927   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6928   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6929   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6930   /* 128-bit containerized types.  */
6931   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6932   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6933   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6934   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6935   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6936   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6937   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6938   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6939   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6940   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6941   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6942   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6943   { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6944   { VOIDmode, NULL, NULL }
6945 };
6946
6947 /* Implement TARGET_MANGLE_TYPE.  */
6948
6949 static const char *
6950 aarch64_mangle_type (const_tree type)
6951 {
6952   /* The AArch64 ABI documents say that "__va_list" has to be
6953      managled as if it is in the "std" namespace.  */
6954   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6955     return "St9__va_list";
6956
6957   /* Check the mode of the vector type, and the name of the vector
6958      element type, against the table.  */
6959   if (TREE_CODE (type) == VECTOR_TYPE)
6960     {
6961       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6962
6963       while (pos->mode != VOIDmode)
6964         {
6965           tree elt_type = TREE_TYPE (type);
6966
6967           if (pos->mode == TYPE_MODE (type)
6968               && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6969               && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6970                           pos->element_type_name))
6971             return pos->mangled_name;
6972
6973           pos++;
6974         }
6975     }
6976
6977   /* Use the default mangling.  */
6978   return NULL;
6979 }
6980
6981 static int
6982 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6983 {
6984   return MEM_P (*x);
6985 }
6986
6987 static bool
6988 is_memory_op (rtx mem_insn)
6989 {
6990    rtx pattern = PATTERN (mem_insn);
6991    return for_each_rtx (&pattern, is_mem_p, NULL);
6992 }
6993
6994 /* Find the first rtx before insn that will generate an assembly
6995    instruction.  */
6996
6997 static rtx
6998 aarch64_prev_real_insn (rtx insn)
6999 {
7000   if (!insn)
7001     return NULL;
7002
7003   do
7004     {
7005       insn = prev_real_insn (insn);
7006     }
7007   while (insn && recog_memoized (insn) < 0);
7008
7009   return insn;
7010 }
7011
7012 static bool
7013 is_madd_op (enum attr_type t1)
7014 {
7015   unsigned int i;
7016   /* A number of these may be AArch32 only.  */
7017   enum attr_type mlatypes[] = {
7018     TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7019     TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7020     TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7021   };
7022
7023   for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7024     {
7025       if (t1 == mlatypes[i])
7026         return true;
7027     }
7028
7029   return false;
7030 }
7031
7032 /* Check if there is a register dependency between a load and the insn
7033    for which we hold recog_data.  */
7034
7035 static bool
7036 dep_between_memop_and_curr (rtx memop)
7037 {
7038   rtx load_reg;
7039   int opno;
7040
7041   if (!memop)
7042     return false;
7043
7044   if (!REG_P (SET_DEST (memop)))
7045     return false;
7046
7047   load_reg = SET_DEST (memop);
7048   for (opno = 0; opno < recog_data.n_operands; opno++)
7049     {
7050       rtx operand = recog_data.operand[opno];
7051       if (REG_P (operand)
7052           && reg_overlap_mentioned_p (load_reg, operand))
7053         return true;
7054
7055     }
7056   return false;
7057 }
7058
7059 bool
7060 aarch64_madd_needs_nop (rtx insn)
7061 {
7062   enum attr_type attr_type;
7063   rtx prev;
7064   rtx body;
7065
7066   if (!aarch64_fix_a53_err835769)
7067     return false;
7068
7069   if (recog_memoized (insn) < 0)
7070     return false;
7071
7072   attr_type = get_attr_type (insn);
7073   if (!is_madd_op (attr_type))
7074     return false;
7075
7076   prev = aarch64_prev_real_insn (insn);
7077   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7078      Restore recog state to INSN to avoid state corruption.  */
7079   extract_constrain_insn_cached (insn);
7080
7081   if (!prev)
7082     return false;
7083
7084   body = single_set (prev);
7085
7086   /* If the previous insn is a memory op and there is no dependency between
7087      it and the madd, emit a nop between them.  If we know the previous insn is
7088      a memory op but body is NULL, emit the nop to be safe, it's probably a
7089      load/store pair insn.  */
7090   if (is_memory_op (prev)
7091       && GET_MODE (recog_data.operand[0]) == DImode
7092       && (!dep_between_memop_and_curr (body)))
7093     return true;
7094
7095   return false;
7096
7097 }
7098
7099 void
7100 aarch64_final_prescan_insn (rtx insn)
7101 {
7102   if (aarch64_madd_needs_nop (insn))
7103     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7104 }
7105
7106
7107 /* Return the equivalent letter for size.  */
7108 static char
7109 sizetochar (int size)
7110 {
7111   switch (size)
7112     {
7113     case 64: return 'd';
7114     case 32: return 's';
7115     case 16: return 'h';
7116     case 8 : return 'b';
7117     default: gcc_unreachable ();
7118     }
7119 }
7120
7121 /* Return true iff x is a uniform vector of floating-point
7122    constants, and the constant can be represented in
7123    quarter-precision form.  Note, as aarch64_float_const_representable
7124    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
7125 static bool
7126 aarch64_vect_float_const_representable_p (rtx x)
7127 {
7128   int i = 0;
7129   REAL_VALUE_TYPE r0, ri;
7130   rtx x0, xi;
7131
7132   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7133     return false;
7134
7135   x0 = CONST_VECTOR_ELT (x, 0);
7136   if (!CONST_DOUBLE_P (x0))
7137     return false;
7138
7139   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7140
7141   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7142     {
7143       xi = CONST_VECTOR_ELT (x, i);
7144       if (!CONST_DOUBLE_P (xi))
7145         return false;
7146
7147       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7148       if (!REAL_VALUES_EQUAL (r0, ri))
7149         return false;
7150     }
7151
7152   return aarch64_float_const_representable_p (x0);
7153 }
7154
7155 /* Return true for valid and false for invalid.  */
7156 bool
7157 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7158                               struct simd_immediate_info *info)
7159 {
7160 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)  \
7161   matches = 1;                                          \
7162   for (i = 0; i < idx; i += (STRIDE))                   \
7163     if (!(TEST))                                        \
7164       matches = 0;                                      \
7165   if (matches)                                          \
7166     {                                                   \
7167       immtype = (CLASS);                                \
7168       elsize = (ELSIZE);                                \
7169       eshift = (SHIFT);                                 \
7170       emvn = (NEG);                                     \
7171       break;                                            \
7172     }
7173
7174   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7175   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7176   unsigned char bytes[16];
7177   int immtype = -1, matches;
7178   unsigned int invmask = inverse ? 0xff : 0;
7179   int eshift, emvn;
7180
7181   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7182     {
7183       if (! (aarch64_simd_imm_zero_p (op, mode)
7184              || aarch64_vect_float_const_representable_p (op)))
7185         return false;
7186
7187       if (info)
7188         {
7189           info->value = CONST_VECTOR_ELT (op, 0);
7190           info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7191           info->mvn = false;
7192           info->shift = 0;
7193         }
7194
7195       return true;
7196     }
7197
7198   /* Splat vector constant out into a byte vector.  */
7199   for (i = 0; i < n_elts; i++)
7200     {
7201       /* The vector is provided in gcc endian-neutral fashion.  For aarch64_be,
7202          it must be laid out in the vector register in reverse order.  */
7203       rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7204       unsigned HOST_WIDE_INT elpart;
7205       unsigned int part, parts;
7206
7207       if (GET_CODE (el) == CONST_INT)
7208         {
7209           elpart = INTVAL (el);
7210           parts = 1;
7211         }
7212       else if (GET_CODE (el) == CONST_DOUBLE)
7213         {
7214           elpart = CONST_DOUBLE_LOW (el);
7215           parts = 2;
7216         }
7217       else
7218         gcc_unreachable ();
7219
7220       for (part = 0; part < parts; part++)
7221         {
7222           unsigned int byte;
7223           for (byte = 0; byte < innersize; byte++)
7224             {
7225               bytes[idx++] = (elpart & 0xff) ^ invmask;
7226               elpart >>= BITS_PER_UNIT;
7227             }
7228           if (GET_CODE (el) == CONST_DOUBLE)
7229             elpart = CONST_DOUBLE_HIGH (el);
7230         }
7231     }
7232
7233   /* Sanity check.  */
7234   gcc_assert (idx == GET_MODE_SIZE (mode));
7235
7236   do
7237     {
7238       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7239              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7240
7241       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7242              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7243
7244       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7245              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7246
7247       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7248              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7249
7250       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7251
7252       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7253
7254       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7255              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7256
7257       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7258              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7259
7260       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7261              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7262
7263       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7264              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7265
7266       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7267
7268       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7269
7270       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7271              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7272
7273       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7274              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7275
7276       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7277              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7278
7279       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7280              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7281
7282       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7283
7284       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7285              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7286     }
7287   while (0);
7288
7289   if (immtype == -1)
7290     return false;
7291
7292   if (info)
7293     {
7294       info->element_width = elsize;
7295       info->mvn = emvn != 0;
7296       info->shift = eshift;
7297
7298       unsigned HOST_WIDE_INT imm = 0;
7299
7300       if (immtype >= 12 && immtype <= 15)
7301         info->msl = true;
7302
7303       /* Un-invert bytes of recognized vector, if necessary.  */
7304       if (invmask != 0)
7305         for (i = 0; i < idx; i++)
7306           bytes[i] ^= invmask;
7307
7308       if (immtype == 17)
7309         {
7310           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7311           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7312
7313           for (i = 0; i < 8; i++)
7314             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7315               << (i * BITS_PER_UNIT);
7316
7317
7318           info->value = GEN_INT (imm);
7319         }
7320       else
7321         {
7322           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7323             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7324
7325           /* Construct 'abcdefgh' because the assembler cannot handle
7326              generic constants.  */
7327           if (info->mvn)
7328             imm = ~imm;
7329           imm = (imm >> info->shift) & 0xff;
7330           info->value = GEN_INT (imm);
7331         }
7332     }
7333
7334   return true;
7335 #undef CHECK
7336 }
7337
7338 static bool
7339 aarch64_const_vec_all_same_int_p (rtx x,
7340                                   HOST_WIDE_INT minval,
7341                                   HOST_WIDE_INT maxval)
7342 {
7343   HOST_WIDE_INT firstval;
7344   int count, i;
7345
7346   if (GET_CODE (x) != CONST_VECTOR
7347       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7348     return false;
7349
7350   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7351   if (firstval < minval || firstval > maxval)
7352     return false;
7353
7354   count = CONST_VECTOR_NUNITS (x);
7355   for (i = 1; i < count; i++)
7356     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7357       return false;
7358
7359   return true;
7360 }
7361
7362 /* Check of immediate shift constants are within range.  */
7363 bool
7364 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7365 {
7366   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7367   if (left)
7368     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7369   else
7370     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7371 }
7372
7373 /* Return true if X is a uniform vector where all elements
7374    are either the floating-point constant 0.0 or the
7375    integer constant 0.  */
7376 bool
7377 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7378 {
7379   return x == CONST0_RTX (mode);
7380 }
7381
7382 bool
7383 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7384 {
7385   HOST_WIDE_INT imm = INTVAL (x);
7386   int i;
7387
7388   for (i = 0; i < 8; i++)
7389     {
7390       unsigned int byte = imm & 0xff;
7391       if (byte != 0xff && byte != 0)
7392        return false;
7393       imm >>= 8;
7394     }
7395
7396   return true;
7397 }
7398
7399 bool
7400 aarch64_mov_operand_p (rtx x,
7401                        enum aarch64_symbol_context context,
7402                        enum machine_mode mode)
7403 {
7404   if (GET_CODE (x) == HIGH
7405       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7406     return true;
7407
7408   if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7409     return true;
7410
7411   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7412     return true;
7413
7414   return aarch64_classify_symbolic_expression (x, context)
7415     == SYMBOL_TINY_ABSOLUTE;
7416 }
7417
7418 /* Return a const_int vector of VAL.  */
7419 rtx
7420 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7421 {
7422   int nunits = GET_MODE_NUNITS (mode);
7423   rtvec v = rtvec_alloc (nunits);
7424   int i;
7425
7426   for (i=0; i < nunits; i++)
7427     RTVEC_ELT (v, i) = GEN_INT (val);
7428
7429   return gen_rtx_CONST_VECTOR (mode, v);
7430 }
7431
7432 /* Check OP is a legal scalar immediate for the MOVI instruction.  */
7433
7434 bool
7435 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7436 {
7437   enum machine_mode vmode;
7438
7439   gcc_assert (!VECTOR_MODE_P (mode));
7440   vmode = aarch64_preferred_simd_mode (mode);
7441   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7442   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7443 }
7444
7445 /* Construct and return a PARALLEL RTX vector.  */
7446 rtx
7447 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7448 {
7449   int nunits = GET_MODE_NUNITS (mode);
7450   rtvec v = rtvec_alloc (nunits / 2);
7451   int base = high ? nunits / 2 : 0;
7452   rtx t1;
7453   int i;
7454
7455   for (i=0; i < nunits / 2; i++)
7456     RTVEC_ELT (v, i) = GEN_INT (base + i);
7457
7458   t1 = gen_rtx_PARALLEL (mode, v);
7459   return t1;
7460 }
7461
7462 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
7463    HIGH (exclusive).  */
7464 void
7465 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7466 {
7467   HOST_WIDE_INT lane;
7468   gcc_assert (GET_CODE (operand) == CONST_INT);
7469   lane = INTVAL (operand);
7470
7471   if (lane < low || lane >= high)
7472     error ("lane out of range");
7473 }
7474
7475 void
7476 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7477 {
7478   gcc_assert (GET_CODE (operand) == CONST_INT);
7479   HOST_WIDE_INT lane = INTVAL (operand);
7480
7481   if (lane < low || lane >= high)
7482     error ("constant out of range");
7483 }
7484
7485 /* Emit code to reinterpret one AdvSIMD type as another,
7486    without altering bits.  */
7487 void
7488 aarch64_simd_reinterpret (rtx dest, rtx src)
7489 {
7490   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7491 }
7492
7493 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7494    registers).  */
7495 void
7496 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7497                             rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7498                             rtx op1)
7499 {
7500   rtx mem = gen_rtx_MEM (mode, destaddr);
7501   rtx tmp1 = gen_reg_rtx (mode);
7502   rtx tmp2 = gen_reg_rtx (mode);
7503
7504   emit_insn (intfn (tmp1, op1, tmp2));
7505
7506   emit_move_insn (mem, tmp1);
7507   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7508   emit_move_insn (mem, tmp2);
7509 }
7510
7511 /* Return TRUE if OP is a valid vector addressing mode.  */
7512 bool
7513 aarch64_simd_mem_operand_p (rtx op)
7514 {
7515   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7516                         || GET_CODE (XEXP (op, 0)) == REG);
7517 }
7518
7519 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7520    not to early-clobber SRC registers in the process.
7521
7522    We assume that the operands described by SRC and DEST represent a
7523    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
7524    number of components into which the copy has been decomposed.  */
7525 void
7526 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7527                                 rtx *src, unsigned int count)
7528 {
7529   unsigned int i;
7530
7531   if (!reg_overlap_mentioned_p (operands[0], operands[1])
7532       || REGNO (operands[0]) < REGNO (operands[1]))
7533     {
7534       for (i = 0; i < count; i++)
7535         {
7536           operands[2 * i] = dest[i];
7537           operands[2 * i + 1] = src[i];
7538         }
7539     }
7540   else
7541     {
7542       for (i = 0; i < count; i++)
7543         {
7544           operands[2 * i] = dest[count - i - 1];
7545           operands[2 * i + 1] = src[count - i - 1];
7546         }
7547     }
7548 }
7549
7550 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7551    one of VSTRUCT modes: OI, CI or XI.  */
7552 int
7553 aarch64_simd_attr_length_move (rtx insn)
7554 {
7555   enum machine_mode mode;
7556
7557   extract_insn_cached (insn);
7558
7559   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7560     {
7561       mode = GET_MODE (recog_data.operand[0]);
7562       switch (mode)
7563         {
7564         case OImode:
7565           return 8;
7566         case CImode:
7567           return 12;
7568         case XImode:
7569           return 16;
7570         default:
7571           gcc_unreachable ();
7572         }
7573     }
7574   return 4;
7575 }
7576
7577 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
7578    alignment of a vector to 128 bits.  */
7579 static HOST_WIDE_INT
7580 aarch64_simd_vector_alignment (const_tree type)
7581 {
7582   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7583   return MIN (align, 128);
7584 }
7585
7586 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
7587 static bool
7588 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7589 {
7590   if (is_packed)
7591     return false;
7592
7593   /* We guarantee alignment for vectors up to 128-bits.  */
7594   if (tree_int_cst_compare (TYPE_SIZE (type),
7595                             bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7596     return false;
7597
7598   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
7599   return true;
7600 }
7601
7602 /* If VALS is a vector constant that can be loaded into a register
7603    using DUP, generate instructions to do so and return an RTX to
7604    assign to the register.  Otherwise return NULL_RTX.  */
7605 static rtx
7606 aarch64_simd_dup_constant (rtx vals)
7607 {
7608   enum machine_mode mode = GET_MODE (vals);
7609   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7610   int n_elts = GET_MODE_NUNITS (mode);
7611   bool all_same = true;
7612   rtx x;
7613   int i;
7614
7615   if (GET_CODE (vals) != CONST_VECTOR)
7616     return NULL_RTX;
7617
7618   for (i = 1; i < n_elts; ++i)
7619     {
7620       x = CONST_VECTOR_ELT (vals, i);
7621       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7622         all_same = false;
7623     }
7624
7625   if (!all_same)
7626     return NULL_RTX;
7627
7628   /* We can load this constant by using DUP and a constant in a
7629      single ARM register.  This will be cheaper than a vector
7630      load.  */
7631   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7632   return gen_rtx_VEC_DUPLICATE (mode, x);
7633 }
7634
7635
7636 /* Generate code to load VALS, which is a PARALLEL containing only
7637    constants (for vec_init) or CONST_VECTOR, efficiently into a
7638    register.  Returns an RTX to copy into the register, or NULL_RTX
7639    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
7640 static rtx
7641 aarch64_simd_make_constant (rtx vals)
7642 {
7643   enum machine_mode mode = GET_MODE (vals);
7644   rtx const_dup;
7645   rtx const_vec = NULL_RTX;
7646   int n_elts = GET_MODE_NUNITS (mode);
7647   int n_const = 0;
7648   int i;
7649
7650   if (GET_CODE (vals) == CONST_VECTOR)
7651     const_vec = vals;
7652   else if (GET_CODE (vals) == PARALLEL)
7653     {
7654       /* A CONST_VECTOR must contain only CONST_INTs and
7655          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7656          Only store valid constants in a CONST_VECTOR.  */
7657       for (i = 0; i < n_elts; ++i)
7658         {
7659           rtx x = XVECEXP (vals, 0, i);
7660           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7661             n_const++;
7662         }
7663       if (n_const == n_elts)
7664         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7665     }
7666   else
7667     gcc_unreachable ();
7668
7669   if (const_vec != NULL_RTX
7670       && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7671     /* Load using MOVI/MVNI.  */
7672     return const_vec;
7673   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7674     /* Loaded using DUP.  */
7675     return const_dup;
7676   else if (const_vec != NULL_RTX)
7677     /* Load from constant pool. We can not take advantage of single-cycle
7678        LD1 because we need a PC-relative addressing mode.  */
7679     return const_vec;
7680   else
7681     /* A PARALLEL containing something not valid inside CONST_VECTOR.
7682        We can not construct an initializer.  */
7683     return NULL_RTX;
7684 }
7685
7686 void
7687 aarch64_expand_vector_init (rtx target, rtx vals)
7688 {
7689   enum machine_mode mode = GET_MODE (target);
7690   enum machine_mode inner_mode = GET_MODE_INNER (mode);
7691   int n_elts = GET_MODE_NUNITS (mode);
7692   int n_var = 0, one_var = -1;
7693   bool all_same = true;
7694   rtx x, mem;
7695   int i;
7696
7697   x = XVECEXP (vals, 0, 0);
7698   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7699     n_var = 1, one_var = 0;
7700
7701   for (i = 1; i < n_elts; ++i)
7702     {
7703       x = XVECEXP (vals, 0, i);
7704       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7705         ++n_var, one_var = i;
7706
7707       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7708         all_same = false;
7709     }
7710
7711   if (n_var == 0)
7712     {
7713       rtx constant = aarch64_simd_make_constant (vals);
7714       if (constant != NULL_RTX)
7715         {
7716           emit_move_insn (target, constant);
7717           return;
7718         }
7719     }
7720
7721   /* Splat a single non-constant element if we can.  */
7722   if (all_same)
7723     {
7724       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7725       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7726       return;
7727     }
7728
7729   /* One field is non-constant.  Load constant then overwrite varying
7730      field.  This is more efficient than using the stack.  */
7731   if (n_var == 1)
7732     {
7733       rtx copy = copy_rtx (vals);
7734       rtx index = GEN_INT (one_var);
7735       enum insn_code icode;
7736
7737       /* Load constant part of vector, substitute neighboring value for
7738          varying element.  */
7739       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7740       aarch64_expand_vector_init (target, copy);
7741
7742       /* Insert variable.  */
7743       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7744       icode = optab_handler (vec_set_optab, mode);
7745       gcc_assert (icode != CODE_FOR_nothing);
7746       emit_insn (GEN_FCN (icode) (target, x, index));
7747       return;
7748     }
7749
7750   /* Construct the vector in memory one field at a time
7751      and load the whole vector.  */
7752   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7753   for (i = 0; i < n_elts; i++)
7754     emit_move_insn (adjust_address_nv (mem, inner_mode,
7755                                     i * GET_MODE_SIZE (inner_mode)),
7756                     XVECEXP (vals, 0, i));
7757   emit_move_insn (target, mem);
7758
7759 }
7760
7761 static unsigned HOST_WIDE_INT
7762 aarch64_shift_truncation_mask (enum machine_mode mode)
7763 {
7764   return
7765     (aarch64_vector_mode_supported_p (mode)
7766      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7767 }
7768
7769 #ifndef TLS_SECTION_ASM_FLAG
7770 #define TLS_SECTION_ASM_FLAG 'T'
7771 #endif
7772
7773 void
7774 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7775                                tree decl ATTRIBUTE_UNUSED)
7776 {
7777   char flagchars[10], *f = flagchars;
7778
7779   /* If we have already declared this section, we can use an
7780      abbreviated form to switch back to it -- unless this section is
7781      part of a COMDAT groups, in which case GAS requires the full
7782      declaration every time.  */
7783   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7784       && (flags & SECTION_DECLARED))
7785     {
7786       fprintf (asm_out_file, "\t.section\t%s\n", name);
7787       return;
7788     }
7789
7790   if (!(flags & SECTION_DEBUG))
7791     *f++ = 'a';
7792   if (flags & SECTION_WRITE)
7793     *f++ = 'w';
7794   if (flags & SECTION_CODE)
7795     *f++ = 'x';
7796   if (flags & SECTION_SMALL)
7797     *f++ = 's';
7798   if (flags & SECTION_MERGE)
7799     *f++ = 'M';
7800   if (flags & SECTION_STRINGS)
7801     *f++ = 'S';
7802   if (flags & SECTION_TLS)
7803     *f++ = TLS_SECTION_ASM_FLAG;
7804   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7805     *f++ = 'G';
7806   *f = '\0';
7807
7808   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7809
7810   if (!(flags & SECTION_NOTYPE))
7811     {
7812       const char *type;
7813       const char *format;
7814
7815       if (flags & SECTION_BSS)
7816         type = "nobits";
7817       else
7818         type = "progbits";
7819
7820 #ifdef TYPE_OPERAND_FMT
7821       format = "," TYPE_OPERAND_FMT;
7822 #else
7823       format = ",@%s";
7824 #endif
7825
7826       fprintf (asm_out_file, format, type);
7827
7828       if (flags & SECTION_ENTSIZE)
7829         fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7830       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7831         {
7832           if (TREE_CODE (decl) == IDENTIFIER_NODE)
7833             fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7834           else
7835             fprintf (asm_out_file, ",%s,comdat",
7836                      IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7837         }
7838     }
7839
7840   putc ('\n', asm_out_file);
7841 }
7842
7843 /* Select a format to encode pointers in exception handling data.  */
7844 int
7845 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7846 {
7847    int type;
7848    switch (aarch64_cmodel)
7849      {
7850      case AARCH64_CMODEL_TINY:
7851      case AARCH64_CMODEL_TINY_PIC:
7852      case AARCH64_CMODEL_SMALL:
7853      case AARCH64_CMODEL_SMALL_PIC:
7854        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
7855           for everything.  */
7856        type = DW_EH_PE_sdata4;
7857        break;
7858      default:
7859        /* No assumptions here.  8-byte relocs required.  */
7860        type = DW_EH_PE_sdata8;
7861        break;
7862      }
7863    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7864 }
7865
7866 /* Emit load exclusive.  */
7867
7868 static void
7869 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7870                              rtx mem, rtx model_rtx)
7871 {
7872   rtx (*gen) (rtx, rtx, rtx);
7873
7874   switch (mode)
7875     {
7876     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7877     case HImode: gen = gen_aarch64_load_exclusivehi; break;
7878     case SImode: gen = gen_aarch64_load_exclusivesi; break;
7879     case DImode: gen = gen_aarch64_load_exclusivedi; break;
7880     default:
7881       gcc_unreachable ();
7882     }
7883
7884   emit_insn (gen (rval, mem, model_rtx));
7885 }
7886
7887 /* Emit store exclusive.  */
7888
7889 static void
7890 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7891                               rtx rval, rtx mem, rtx model_rtx)
7892 {
7893   rtx (*gen) (rtx, rtx, rtx, rtx);
7894
7895   switch (mode)
7896     {
7897     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7898     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7899     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7900     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7901     default:
7902       gcc_unreachable ();
7903     }
7904
7905   emit_insn (gen (bval, rval, mem, model_rtx));
7906 }
7907
7908 /* Mark the previous jump instruction as unlikely.  */
7909
7910 static void
7911 aarch64_emit_unlikely_jump (rtx insn)
7912 {
7913   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7914
7915   insn = emit_jump_insn (insn);
7916   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7917 }
7918
7919 /* Expand a compare and swap pattern.  */
7920
7921 void
7922 aarch64_expand_compare_and_swap (rtx operands[])
7923 {
7924   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7925   enum machine_mode mode, cmp_mode;
7926   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7927
7928   bval = operands[0];
7929   rval = operands[1];
7930   mem = operands[2];
7931   oldval = operands[3];
7932   newval = operands[4];
7933   is_weak = operands[5];
7934   mod_s = operands[6];
7935   mod_f = operands[7];
7936   mode = GET_MODE (mem);
7937   cmp_mode = mode;
7938
7939   /* Normally the succ memory model must be stronger than fail, but in the
7940      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7941      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7942
7943   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7944       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7945     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7946
7947   switch (mode)
7948     {
7949     case QImode:
7950     case HImode:
7951       /* For short modes, we're going to perform the comparison in SImode,
7952          so do the zero-extension now.  */
7953       cmp_mode = SImode;
7954       rval = gen_reg_rtx (SImode);
7955       oldval = convert_modes (SImode, mode, oldval, true);
7956       /* Fall through.  */
7957
7958     case SImode:
7959     case DImode:
7960       /* Force the value into a register if needed.  */
7961       if (!aarch64_plus_operand (oldval, mode))
7962         oldval = force_reg (cmp_mode, oldval);
7963       break;
7964
7965     default:
7966       gcc_unreachable ();
7967     }
7968
7969   switch (mode)
7970     {
7971     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7972     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7973     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7974     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7975     default:
7976       gcc_unreachable ();
7977     }
7978
7979   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7980
7981   if (mode == QImode || mode == HImode)
7982     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7983
7984   x = gen_rtx_REG (CCmode, CC_REGNUM);
7985   x = gen_rtx_EQ (SImode, x, const0_rtx);
7986   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7987 }
7988
7989 /* Split a compare and swap pattern.  */
7990
7991 void
7992 aarch64_split_compare_and_swap (rtx operands[])
7993 {
7994   rtx rval, mem, oldval, newval, scratch;
7995   enum machine_mode mode;
7996   bool is_weak;
7997   rtx label1, label2, x, cond;
7998
7999   rval = operands[0];
8000   mem = operands[1];
8001   oldval = operands[2];
8002   newval = operands[3];
8003   is_weak = (operands[4] != const0_rtx);
8004   scratch = operands[7];
8005   mode = GET_MODE (mem);
8006
8007   label1 = NULL_RTX;
8008   if (!is_weak)
8009     {
8010       label1 = gen_label_rtx ();
8011       emit_label (label1);
8012     }
8013   label2 = gen_label_rtx ();
8014
8015   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8016
8017   cond = aarch64_gen_compare_reg (NE, rval, oldval);
8018   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8019   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8020                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8021   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8022
8023   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8024
8025   if (!is_weak)
8026     {
8027       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8028       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8029                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8030       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8031     }
8032   else
8033     {
8034       cond = gen_rtx_REG (CCmode, CC_REGNUM);
8035       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8036       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8037     }
8038
8039   emit_label (label2);
8040 }
8041
8042 /* Split an atomic operation.  */
8043
8044 void
8045 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8046                      rtx value, rtx model_rtx, rtx cond)
8047 {
8048   enum machine_mode mode = GET_MODE (mem);
8049   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8050   rtx label, x;
8051
8052   label = gen_label_rtx ();
8053   emit_label (label);
8054
8055   if (new_out)
8056     new_out = gen_lowpart (wmode, new_out);
8057   if (old_out)
8058     old_out = gen_lowpart (wmode, old_out);
8059   else
8060     old_out = new_out;
8061   value = simplify_gen_subreg (wmode, value, mode, 0);
8062
8063   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8064
8065   switch (code)
8066     {
8067     case SET:
8068       new_out = value;
8069       break;
8070
8071     case NOT:
8072       x = gen_rtx_AND (wmode, old_out, value);
8073       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8074       x = gen_rtx_NOT (wmode, new_out);
8075       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8076       break;
8077
8078     case MINUS:
8079       if (CONST_INT_P (value))
8080         {
8081           value = GEN_INT (-INTVAL (value));
8082           code = PLUS;
8083         }
8084       /* Fall through.  */
8085
8086     default:
8087       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8088       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8089       break;
8090     }
8091
8092   aarch64_emit_store_exclusive (mode, cond, mem,
8093                                 gen_lowpart (mode, new_out), model_rtx);
8094
8095   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8096   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8097                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8098   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8099 }
8100
8101 static void
8102 aarch64_print_extension (void)
8103 {
8104   const struct aarch64_option_extension *opt = NULL;
8105
8106   for (opt = all_extensions; opt->name != NULL; opt++)
8107     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8108       asm_fprintf (asm_out_file, "+%s", opt->name);
8109
8110   asm_fprintf (asm_out_file, "\n");
8111 }
8112
8113 static void
8114 aarch64_start_file (void)
8115 {
8116   if (selected_arch)
8117     {
8118       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8119       aarch64_print_extension ();
8120     }
8121   else if (selected_cpu)
8122     {
8123       const char *truncated_name
8124             = aarch64_rewrite_selected_cpu (selected_cpu->name);
8125       asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8126       aarch64_print_extension ();
8127     }
8128   default_file_start();
8129 }
8130
8131 /* Target hook for c_mode_for_suffix.  */
8132 static enum machine_mode
8133 aarch64_c_mode_for_suffix (char suffix)
8134 {
8135   if (suffix == 'q')
8136     return TFmode;
8137
8138   return VOIDmode;
8139 }
8140
8141 /* We can only represent floating point constants which will fit in
8142    "quarter-precision" values.  These values are characterised by
8143    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
8144    by:
8145
8146    (-1)^s * (n/16) * 2^r
8147
8148    Where:
8149      's' is the sign bit.
8150      'n' is an integer in the range 16 <= n <= 31.
8151      'r' is an integer in the range -3 <= r <= 4.  */
8152
8153 /* Return true iff X can be represented by a quarter-precision
8154    floating point immediate operand X.  Note, we cannot represent 0.0.  */
8155 bool
8156 aarch64_float_const_representable_p (rtx x)
8157 {
8158   /* This represents our current view of how many bits
8159      make up the mantissa.  */
8160   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8161   int exponent;
8162   unsigned HOST_WIDE_INT mantissa, mask;
8163   HOST_WIDE_INT m1, m2;
8164   REAL_VALUE_TYPE r, m;
8165
8166   if (!CONST_DOUBLE_P (x))
8167     return false;
8168
8169   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8170
8171   /* We cannot represent infinities, NaNs or +/-zero.  We won't
8172      know if we have +zero until we analyse the mantissa, but we
8173      can reject the other invalid values.  */
8174   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8175       || REAL_VALUE_MINUS_ZERO (r))
8176     return false;
8177
8178   /* Extract exponent.  */
8179   r = real_value_abs (&r);
8180   exponent = REAL_EXP (&r);
8181
8182   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8183      highest (sign) bit, with a fixed binary point at bit point_pos.
8184      m1 holds the low part of the mantissa, m2 the high part.
8185      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8186      bits for the mantissa, this can fail (low bits will be lost).  */
8187   real_ldexp (&m, &r, point_pos - exponent);
8188   REAL_VALUE_TO_INT (&m1, &m2, m);
8189
8190   /* If the low part of the mantissa has bits set we cannot represent
8191      the value.  */
8192   if (m1 != 0)
8193     return false;
8194   /* We have rejected the lower HOST_WIDE_INT, so update our
8195      understanding of how many bits lie in the mantissa and
8196      look only at the high HOST_WIDE_INT.  */
8197   mantissa = m2;
8198   point_pos -= HOST_BITS_PER_WIDE_INT;
8199
8200   /* We can only represent values with a mantissa of the form 1.xxxx.  */
8201   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8202   if ((mantissa & mask) != 0)
8203     return false;
8204
8205   /* Having filtered unrepresentable values, we may now remove all
8206      but the highest 5 bits.  */
8207   mantissa >>= point_pos - 5;
8208
8209   /* We cannot represent the value 0.0, so reject it.  This is handled
8210      elsewhere.  */
8211   if (mantissa == 0)
8212     return false;
8213
8214   /* Then, as bit 4 is always set, we can mask it off, leaving
8215      the mantissa in the range [0, 15].  */
8216   mantissa &= ~(1 << 4);
8217   gcc_assert (mantissa <= 15);
8218
8219   /* GCC internally does not use IEEE754-like encoding (where normalized
8220      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
8221      Our mantissa values are shifted 4 places to the left relative to
8222      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8223      by 5 places to correct for GCC's representation.  */
8224   exponent = 5 - exponent;
8225
8226   return (exponent >= 0 && exponent <= 7);
8227 }
8228
8229 char*
8230 aarch64_output_simd_mov_immediate (rtx const_vector,
8231                                    enum machine_mode mode,
8232                                    unsigned width)
8233 {
8234   bool is_valid;
8235   static char templ[40];
8236   const char *mnemonic;
8237   const char *shift_op;
8238   unsigned int lane_count = 0;
8239   char element_char;
8240
8241   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8242
8243   /* This will return true to show const_vector is legal for use as either
8244      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
8245      also update INFO to show how the immediate should be generated.  */
8246   is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8247   gcc_assert (is_valid);
8248
8249   element_char = sizetochar (info.element_width);
8250   lane_count = width / info.element_width;
8251
8252   mode = GET_MODE_INNER (mode);
8253   if (mode == SFmode || mode == DFmode)
8254     {
8255       gcc_assert (info.shift == 0 && ! info.mvn);
8256       if (aarch64_float_const_zero_rtx_p (info.value))
8257         info.value = GEN_INT (0);
8258       else
8259         {
8260 #define buf_size 20
8261           REAL_VALUE_TYPE r;
8262           REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8263           char float_buf[buf_size] = {'\0'};
8264           real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8265 #undef buf_size
8266
8267           if (lane_count == 1)
8268             snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8269           else
8270             snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8271                       lane_count, element_char, float_buf);
8272           return templ;
8273         }
8274     }
8275
8276   mnemonic = info.mvn ? "mvni" : "movi";
8277   shift_op = info.msl ? "msl" : "lsl";
8278
8279   if (lane_count == 1)
8280     snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8281               mnemonic, UINTVAL (info.value));
8282   else if (info.shift)
8283     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8284               ", %s %d", mnemonic, lane_count, element_char,
8285               UINTVAL (info.value), shift_op, info.shift);
8286   else
8287     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8288               mnemonic, lane_count, element_char, UINTVAL (info.value));
8289   return templ;
8290 }
8291
8292 char*
8293 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8294                                           enum machine_mode mode)
8295 {
8296   enum machine_mode vmode;
8297
8298   gcc_assert (!VECTOR_MODE_P (mode));
8299   vmode = aarch64_simd_container_mode (mode, 64);
8300   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8301   return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8302 }
8303
8304 /* Split operands into moves from op[1] + op[2] into op[0].  */
8305
8306 void
8307 aarch64_split_combinev16qi (rtx operands[3])
8308 {
8309   unsigned int dest = REGNO (operands[0]);
8310   unsigned int src1 = REGNO (operands[1]);
8311   unsigned int src2 = REGNO (operands[2]);
8312   enum machine_mode halfmode = GET_MODE (operands[1]);
8313   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8314   rtx destlo, desthi;
8315
8316   gcc_assert (halfmode == V16QImode);
8317
8318   if (src1 == dest && src2 == dest + halfregs)
8319     {
8320       /* No-op move.  Can't split to nothing; emit something.  */
8321       emit_note (NOTE_INSN_DELETED);
8322       return;
8323     }
8324
8325   /* Preserve register attributes for variable tracking.  */
8326   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8327   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8328                                GET_MODE_SIZE (halfmode));
8329
8330   /* Special case of reversed high/low parts.  */
8331   if (reg_overlap_mentioned_p (operands[2], destlo)
8332       && reg_overlap_mentioned_p (operands[1], desthi))
8333     {
8334       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8335       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8336       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8337     }
8338   else if (!reg_overlap_mentioned_p (operands[2], destlo))
8339     {
8340       /* Try to avoid unnecessary moves if part of the result
8341          is in the right place already.  */
8342       if (src1 != dest)
8343         emit_move_insn (destlo, operands[1]);
8344       if (src2 != dest + halfregs)
8345         emit_move_insn (desthi, operands[2]);
8346     }
8347   else
8348     {
8349       if (src2 != dest + halfregs)
8350         emit_move_insn (desthi, operands[2]);
8351       if (src1 != dest)
8352         emit_move_insn (destlo, operands[1]);
8353     }
8354 }
8355
8356 /* vec_perm support.  */
8357
8358 #define MAX_VECT_LEN 16
8359
8360 struct expand_vec_perm_d
8361 {
8362   rtx target, op0, op1;
8363   unsigned char perm[MAX_VECT_LEN];
8364   enum machine_mode vmode;
8365   unsigned char nelt;
8366   bool one_vector_p;
8367   bool testing_p;
8368 };
8369
8370 /* Generate a variable permutation.  */
8371
8372 static void
8373 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8374 {
8375   enum machine_mode vmode = GET_MODE (target);
8376   bool one_vector_p = rtx_equal_p (op0, op1);
8377
8378   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8379   gcc_checking_assert (GET_MODE (op0) == vmode);
8380   gcc_checking_assert (GET_MODE (op1) == vmode);
8381   gcc_checking_assert (GET_MODE (sel) == vmode);
8382   gcc_checking_assert (TARGET_SIMD);
8383
8384   if (one_vector_p)
8385     {
8386       if (vmode == V8QImode)
8387         {
8388           /* Expand the argument to a V16QI mode by duplicating it.  */
8389           rtx pair = gen_reg_rtx (V16QImode);
8390           emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8391           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8392         }
8393       else
8394         {
8395           emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8396         }
8397     }
8398   else
8399     {
8400       rtx pair;
8401
8402       if (vmode == V8QImode)
8403         {
8404           pair = gen_reg_rtx (V16QImode);
8405           emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8406           emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8407         }
8408       else
8409         {
8410           pair = gen_reg_rtx (OImode);
8411           emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8412           emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8413         }
8414     }
8415 }
8416
8417 void
8418 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8419 {
8420   enum machine_mode vmode = GET_MODE (target);
8421   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8422   bool one_vector_p = rtx_equal_p (op0, op1);
8423   rtx rmask[MAX_VECT_LEN], mask;
8424
8425   gcc_checking_assert (!BYTES_BIG_ENDIAN);
8426
8427   /* The TBL instruction does not use a modulo index, so we must take care
8428      of that ourselves.  */
8429   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8430   for (i = 0; i < nelt; ++i)
8431     rmask[i] = mask;
8432   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8433   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8434
8435   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8436 }
8437
8438 /* Recognize patterns suitable for the TRN instructions.  */
8439 static bool
8440 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8441 {
8442   unsigned int i, odd, mask, nelt = d->nelt;
8443   rtx out, in0, in1, x;
8444   rtx (*gen) (rtx, rtx, rtx);
8445   enum machine_mode vmode = d->vmode;
8446
8447   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8448     return false;
8449
8450   /* Note that these are little-endian tests.
8451      We correct for big-endian later.  */
8452   if (d->perm[0] == 0)
8453     odd = 0;
8454   else if (d->perm[0] == 1)
8455     odd = 1;
8456   else
8457     return false;
8458   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8459
8460   for (i = 0; i < nelt; i += 2)
8461     {
8462       if (d->perm[i] != i + odd)
8463         return false;
8464       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8465         return false;
8466     }
8467
8468   /* Success!  */
8469   if (d->testing_p)
8470     return true;
8471
8472   in0 = d->op0;
8473   in1 = d->op1;
8474   if (BYTES_BIG_ENDIAN)
8475     {
8476       x = in0, in0 = in1, in1 = x;
8477       odd = !odd;
8478     }
8479   out = d->target;
8480
8481   if (odd)
8482     {
8483       switch (vmode)
8484         {
8485         case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8486         case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8487         case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8488         case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8489         case V4SImode: gen = gen_aarch64_trn2v4si; break;
8490         case V2SImode: gen = gen_aarch64_trn2v2si; break;
8491         case V2DImode: gen = gen_aarch64_trn2v2di; break;
8492         case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8493         case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8494         case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8495         default:
8496           return false;
8497         }
8498     }
8499   else
8500     {
8501       switch (vmode)
8502         {
8503         case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8504         case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8505         case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8506         case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8507         case V4SImode: gen = gen_aarch64_trn1v4si; break;
8508         case V2SImode: gen = gen_aarch64_trn1v2si; break;
8509         case V2DImode: gen = gen_aarch64_trn1v2di; break;
8510         case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8511         case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8512         case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8513         default:
8514           return false;
8515         }
8516     }
8517
8518   emit_insn (gen (out, in0, in1));
8519   return true;
8520 }
8521
8522 /* Recognize patterns suitable for the UZP instructions.  */
8523 static bool
8524 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8525 {
8526   unsigned int i, odd, mask, nelt = d->nelt;
8527   rtx out, in0, in1, x;
8528   rtx (*gen) (rtx, rtx, rtx);
8529   enum machine_mode vmode = d->vmode;
8530
8531   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8532     return false;
8533
8534   /* Note that these are little-endian tests.
8535      We correct for big-endian later.  */
8536   if (d->perm[0] == 0)
8537     odd = 0;
8538   else if (d->perm[0] == 1)
8539     odd = 1;
8540   else
8541     return false;
8542   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8543
8544   for (i = 0; i < nelt; i++)
8545     {
8546       unsigned elt = (i * 2 + odd) & mask;
8547       if (d->perm[i] != elt)
8548         return false;
8549     }
8550
8551   /* Success!  */
8552   if (d->testing_p)
8553     return true;
8554
8555   in0 = d->op0;
8556   in1 = d->op1;
8557   if (BYTES_BIG_ENDIAN)
8558     {
8559       x = in0, in0 = in1, in1 = x;
8560       odd = !odd;
8561     }
8562   out = d->target;
8563
8564   if (odd)
8565     {
8566       switch (vmode)
8567         {
8568         case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8569         case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8570         case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8571         case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8572         case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8573         case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8574         case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8575         case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8576         case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8577         case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8578         default:
8579           return false;
8580         }
8581     }
8582   else
8583     {
8584       switch (vmode)
8585         {
8586         case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8587         case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8588         case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8589         case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8590         case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8591         case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8592         case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8593         case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8594         case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8595         case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8596         default:
8597           return false;
8598         }
8599     }
8600
8601   emit_insn (gen (out, in0, in1));
8602   return true;
8603 }
8604
8605 /* Recognize patterns suitable for the ZIP instructions.  */
8606 static bool
8607 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8608 {
8609   unsigned int i, high, mask, nelt = d->nelt;
8610   rtx out, in0, in1, x;
8611   rtx (*gen) (rtx, rtx, rtx);
8612   enum machine_mode vmode = d->vmode;
8613
8614   if (GET_MODE_UNIT_SIZE (vmode) > 8)
8615     return false;
8616
8617   /* Note that these are little-endian tests.
8618      We correct for big-endian later.  */
8619   high = nelt / 2;
8620   if (d->perm[0] == high)
8621     /* Do Nothing.  */
8622     ;
8623   else if (d->perm[0] == 0)
8624     high = 0;
8625   else
8626     return false;
8627   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8628
8629   for (i = 0; i < nelt / 2; i++)
8630     {
8631       unsigned elt = (i + high) & mask;
8632       if (d->perm[i * 2] != elt)
8633         return false;
8634       elt = (elt + nelt) & mask;
8635       if (d->perm[i * 2 + 1] != elt)
8636         return false;
8637     }
8638
8639   /* Success!  */
8640   if (d->testing_p)
8641     return true;
8642
8643   in0 = d->op0;
8644   in1 = d->op1;
8645   if (BYTES_BIG_ENDIAN)
8646     {
8647       x = in0, in0 = in1, in1 = x;
8648       high = !high;
8649     }
8650   out = d->target;
8651
8652   if (high)
8653     {
8654       switch (vmode)
8655         {
8656         case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8657         case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8658         case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8659         case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8660         case V4SImode: gen = gen_aarch64_zip2v4si; break;
8661         case V2SImode: gen = gen_aarch64_zip2v2si; break;
8662         case V2DImode: gen = gen_aarch64_zip2v2di; break;
8663         case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8664         case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8665         case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8666         default:
8667           return false;
8668         }
8669     }
8670   else
8671     {
8672       switch (vmode)
8673         {
8674         case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8675         case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8676         case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8677         case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8678         case V4SImode: gen = gen_aarch64_zip1v4si; break;
8679         case V2SImode: gen = gen_aarch64_zip1v2si; break;
8680         case V2DImode: gen = gen_aarch64_zip1v2di; break;
8681         case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8682         case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8683         case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8684         default:
8685           return false;
8686         }
8687     }
8688
8689   emit_insn (gen (out, in0, in1));
8690   return true;
8691 }
8692
8693 static bool
8694 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8695 {
8696   rtx (*gen) (rtx, rtx, rtx);
8697   rtx out = d->target;
8698   rtx in0;
8699   enum machine_mode vmode = d->vmode;
8700   unsigned int i, elt, nelt = d->nelt;
8701   rtx lane;
8702
8703   /* TODO: This may not be big-endian safe.  */
8704   if (BYTES_BIG_ENDIAN)
8705     return false;
8706
8707   elt = d->perm[0];
8708   for (i = 1; i < nelt; i++)
8709     {
8710       if (elt != d->perm[i])
8711         return false;
8712     }
8713
8714   /* The generic preparation in aarch64_expand_vec_perm_const_1
8715      swaps the operand order and the permute indices if it finds
8716      d->perm[0] to be in the second operand.  Thus, we can always
8717      use d->op0 and need not do any extra arithmetic to get the
8718      correct lane number.  */
8719   in0 = d->op0;
8720   lane = GEN_INT (elt);
8721
8722   switch (vmode)
8723     {
8724     case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8725     case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8726     case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8727     case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8728     case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8729     case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8730     case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8731     case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8732     case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8733     case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8734     default:
8735       return false;
8736     }
8737
8738   emit_insn (gen (out, in0, lane));
8739   return true;
8740 }
8741
8742 static bool
8743 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8744 {
8745   rtx rperm[MAX_VECT_LEN], sel;
8746   enum machine_mode vmode = d->vmode;
8747   unsigned int i, nelt = d->nelt;
8748
8749   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
8750      numbering of elements for big-endian, we must reverse the order.  */
8751   if (BYTES_BIG_ENDIAN)
8752     return false;
8753
8754   if (d->testing_p)
8755     return true;
8756
8757   /* Generic code will try constant permutation twice.  Once with the
8758      original mode and again with the elements lowered to QImode.
8759      So wait and don't do the selector expansion ourselves.  */
8760   if (vmode != V8QImode && vmode != V16QImode)
8761     return false;
8762
8763   for (i = 0; i < nelt; ++i)
8764     rperm[i] = GEN_INT (d->perm[i]);
8765   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8766   sel = force_reg (vmode, sel);
8767
8768   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8769   return true;
8770 }
8771
8772 static bool
8773 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8774 {
8775   /* The pattern matching functions above are written to look for a small
8776      number to begin the sequence (0, 1, N/2).  If we begin with an index
8777      from the second operand, we can swap the operands.  */
8778   if (d->perm[0] >= d->nelt)
8779     {
8780       unsigned i, nelt = d->nelt;
8781       rtx x;
8782
8783       for (i = 0; i < nelt; ++i)
8784         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8785
8786       x = d->op0;
8787       d->op0 = d->op1;
8788       d->op1 = x;
8789     }
8790
8791   if (TARGET_SIMD)
8792     {
8793       if (aarch64_evpc_zip (d))
8794         return true;
8795       else if (aarch64_evpc_uzp (d))
8796         return true;
8797       else if (aarch64_evpc_trn (d))
8798         return true;
8799       else if (aarch64_evpc_dup (d))
8800         return true;
8801       return aarch64_evpc_tbl (d);
8802     }
8803   return false;
8804 }
8805
8806 /* Expand a vec_perm_const pattern.  */
8807
8808 bool
8809 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8810 {
8811   struct expand_vec_perm_d d;
8812   int i, nelt, which;
8813
8814   d.target = target;
8815   d.op0 = op0;
8816   d.op1 = op1;
8817
8818   d.vmode = GET_MODE (target);
8819   gcc_assert (VECTOR_MODE_P (d.vmode));
8820   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8821   d.testing_p = false;
8822
8823   for (i = which = 0; i < nelt; ++i)
8824     {
8825       rtx e = XVECEXP (sel, 0, i);
8826       int ei = INTVAL (e) & (2 * nelt - 1);
8827       which |= (ei < nelt ? 1 : 2);
8828       d.perm[i] = ei;
8829     }
8830
8831   switch (which)
8832     {
8833     default:
8834       gcc_unreachable ();
8835
8836     case 3:
8837       d.one_vector_p = false;
8838       if (!rtx_equal_p (op0, op1))
8839         break;
8840
8841       /* The elements of PERM do not suggest that only the first operand
8842          is used, but both operands are identical.  Allow easier matching
8843          of the permutation by folding the permutation into the single
8844          input vector.  */
8845       /* Fall Through.  */
8846     case 2:
8847       for (i = 0; i < nelt; ++i)
8848         d.perm[i] &= nelt - 1;
8849       d.op0 = op1;
8850       d.one_vector_p = true;
8851       break;
8852
8853     case 1:
8854       d.op1 = op0;
8855       d.one_vector_p = true;
8856       break;
8857     }
8858
8859   return aarch64_expand_vec_perm_const_1 (&d);
8860 }
8861
8862 static bool
8863 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8864                                      const unsigned char *sel)
8865 {
8866   struct expand_vec_perm_d d;
8867   unsigned int i, nelt, which;
8868   bool ret;
8869
8870   d.vmode = vmode;
8871   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8872   d.testing_p = true;
8873   memcpy (d.perm, sel, nelt);
8874
8875   /* Calculate whether all elements are in one vector.  */
8876   for (i = which = 0; i < nelt; ++i)
8877     {
8878       unsigned char e = d.perm[i];
8879       gcc_assert (e < 2 * nelt);
8880       which |= (e < nelt ? 1 : 2);
8881     }
8882
8883   /* If all elements are from the second vector, reindex as if from the
8884      first vector.  */
8885   if (which == 2)
8886     for (i = 0; i < nelt; ++i)
8887       d.perm[i] -= nelt;
8888
8889   /* Check whether the mask can be applied to a single vector.  */
8890   d.one_vector_p = (which != 3);
8891
8892   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8893   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8894   if (!d.one_vector_p)
8895     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8896
8897   start_sequence ();
8898   ret = aarch64_expand_vec_perm_const_1 (&d);
8899   end_sequence ();
8900
8901   return ret;
8902 }
8903
8904 /* Implement target hook CANNOT_CHANGE_MODE_CLASS.  */
8905 bool
8906 aarch64_cannot_change_mode_class (enum machine_mode from,
8907                                   enum machine_mode to,
8908                                   enum reg_class rclass)
8909 {
8910   /* Full-reg subregs are allowed on general regs or any class if they are
8911      the same size.  */
8912   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8913       || !reg_classes_intersect_p (FP_REGS, rclass))
8914     return false;
8915
8916   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
8917      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8918      2. Scalar to Scalar for integer modes or same size float modes.
8919      3. Vector to Vector modes.  */
8920   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8921     {
8922       if (aarch64_vector_mode_supported_p (from)
8923           && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8924         return false;
8925
8926       if (GET_MODE_NUNITS (from) == 1
8927           && GET_MODE_NUNITS (to) == 1
8928           && (GET_MODE_CLASS (from) == MODE_INT
8929               || from == to))
8930         return false;
8931
8932       if (aarch64_vector_mode_supported_p (from)
8933           && aarch64_vector_mode_supported_p (to))
8934         return false;
8935     }
8936
8937   return true;
8938 }
8939
8940 #undef TARGET_ADDRESS_COST
8941 #define TARGET_ADDRESS_COST aarch64_address_cost
8942
8943 /* This hook will determines whether unnamed bitfields affect the alignment
8944    of the containing structure.  The hook returns true if the structure
8945    should inherit the alignment requirements of an unnamed bitfield's
8946    type.  */
8947 #undef TARGET_ALIGN_ANON_BITFIELD
8948 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8949
8950 #undef TARGET_ASM_ALIGNED_DI_OP
8951 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8952
8953 #undef TARGET_ASM_ALIGNED_HI_OP
8954 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8955
8956 #undef TARGET_ASM_ALIGNED_SI_OP
8957 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8958
8959 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8960 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8961   hook_bool_const_tree_hwi_hwi_const_tree_true
8962
8963 #undef TARGET_ASM_FILE_START
8964 #define TARGET_ASM_FILE_START aarch64_start_file
8965
8966 #undef TARGET_ASM_OUTPUT_MI_THUNK
8967 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8968
8969 #undef TARGET_ASM_SELECT_RTX_SECTION
8970 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8971
8972 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8973 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8974
8975 #undef TARGET_BUILD_BUILTIN_VA_LIST
8976 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8977
8978 #undef TARGET_CALLEE_COPIES
8979 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8980
8981 #undef TARGET_CAN_ELIMINATE
8982 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8983
8984 #undef TARGET_CANNOT_FORCE_CONST_MEM
8985 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8986
8987 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8988 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8989
8990 /* Only the least significant bit is used for initialization guard
8991    variables.  */
8992 #undef TARGET_CXX_GUARD_MASK_BIT
8993 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8994
8995 #undef TARGET_C_MODE_FOR_SUFFIX
8996 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8997
8998 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8999 #undef  TARGET_DEFAULT_TARGET_FLAGS
9000 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9001 #endif
9002
9003 #undef TARGET_CLASS_MAX_NREGS
9004 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9005
9006 #undef TARGET_BUILTIN_DECL
9007 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9008
9009 #undef  TARGET_EXPAND_BUILTIN
9010 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9011
9012 #undef TARGET_EXPAND_BUILTIN_VA_START
9013 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9014
9015 #undef TARGET_FOLD_BUILTIN
9016 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9017
9018 #undef TARGET_FUNCTION_ARG
9019 #define TARGET_FUNCTION_ARG aarch64_function_arg
9020
9021 #undef TARGET_FUNCTION_ARG_ADVANCE
9022 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9023
9024 #undef TARGET_FUNCTION_ARG_BOUNDARY
9025 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9026
9027 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9028 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9029
9030 #undef TARGET_FUNCTION_VALUE
9031 #define TARGET_FUNCTION_VALUE aarch64_function_value
9032
9033 #undef TARGET_FUNCTION_VALUE_REGNO_P
9034 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9035
9036 #undef TARGET_FRAME_POINTER_REQUIRED
9037 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9038
9039 #undef TARGET_GIMPLE_FOLD_BUILTIN
9040 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9041
9042 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9043 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9044
9045 #undef  TARGET_INIT_BUILTINS
9046 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
9047
9048 #undef TARGET_LEGITIMATE_ADDRESS_P
9049 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9050
9051 #undef TARGET_LEGITIMATE_CONSTANT_P
9052 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9053
9054 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9055 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9056
9057 #undef TARGET_LRA_P
9058 #define TARGET_LRA_P aarch64_lra_p
9059
9060 #undef TARGET_MANGLE_TYPE
9061 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9062
9063 #undef TARGET_MEMORY_MOVE_COST
9064 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9065
9066 #undef TARGET_MUST_PASS_IN_STACK
9067 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9068
9069 /* This target hook should return true if accesses to volatile bitfields
9070    should use the narrowest mode possible.  It should return false if these
9071    accesses should use the bitfield container type.  */
9072 #undef TARGET_NARROW_VOLATILE_BITFIELD
9073 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9074
9075 #undef  TARGET_OPTION_OVERRIDE
9076 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9077
9078 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9079 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9080   aarch64_override_options_after_change
9081
9082 #undef TARGET_PASS_BY_REFERENCE
9083 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9084
9085 #undef TARGET_PREFERRED_RELOAD_CLASS
9086 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9087
9088 #undef TARGET_SECONDARY_RELOAD
9089 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9090
9091 #undef TARGET_SHIFT_TRUNCATION_MASK
9092 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9093
9094 #undef TARGET_SETUP_INCOMING_VARARGS
9095 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9096
9097 #undef TARGET_STRUCT_VALUE_RTX
9098 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
9099
9100 #undef TARGET_REGISTER_MOVE_COST
9101 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9102
9103 #undef TARGET_RETURN_IN_MEMORY
9104 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9105
9106 #undef TARGET_RETURN_IN_MSB
9107 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9108
9109 #undef TARGET_RTX_COSTS
9110 #define TARGET_RTX_COSTS aarch64_rtx_costs
9111
9112 #undef TARGET_SCHED_ISSUE_RATE
9113 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9114
9115 #undef TARGET_TRAMPOLINE_INIT
9116 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9117
9118 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9119 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9120
9121 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9122 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9123
9124 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9125 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9126
9127 #undef TARGET_VECTORIZE_ADD_STMT_COST
9128 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9129
9130 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9131 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9132   aarch64_builtin_vectorization_cost
9133
9134 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9135 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9136
9137 #undef TARGET_VECTORIZE_BUILTINS
9138 #define TARGET_VECTORIZE_BUILTINS
9139
9140 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9141 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9142   aarch64_builtin_vectorized_function
9143
9144 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9145 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9146   aarch64_autovectorize_vector_sizes
9147
9148 /* Section anchor support.  */
9149
9150 #undef TARGET_MIN_ANCHOR_OFFSET
9151 #define TARGET_MIN_ANCHOR_OFFSET -256
9152
9153 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9154    byte offset; we can do much more for larger data types, but have no way
9155    to determine the size of the access.  We assume accesses are aligned.  */
9156 #undef TARGET_MAX_ANCHOR_OFFSET
9157 #define TARGET_MAX_ANCHOR_OFFSET 4095
9158
9159 #undef TARGET_VECTOR_ALIGNMENT
9160 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9161
9162 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9163 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9164   aarch64_simd_vector_alignment_reachable
9165
9166 /* vec_perm support.  */
9167
9168 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9169 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9170   aarch64_vectorize_vec_perm_const_ok
9171
9172
9173 #undef TARGET_FIXED_CONDITION_CODE_REGS
9174 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9175
9176 #undef TARGET_FLAGS_REGNUM
9177 #define TARGET_FLAGS_REGNUM CC_REGNUM
9178
9179 struct gcc_target targetm = TARGET_INITIALIZER;
9180
9181 #include "gt-aarch64.h"