1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 enum machine_mode *, int *,
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
303 const char *const name;
304 enum aarch64_processor core;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
479 /* Return true if calls to DECL should be treated as
480 long-calls (ie called via a register). */
482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
487 /* Return true if calls to symbol-ref SYM should be treated as
488 long-calls (ie called via a register). */
490 aarch64_is_long_call_p (rtx sym)
492 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
495 /* Return true if the offsets to a zero/sign-extract operation
496 represent an expression that matches an extend operation. The
497 operands represent the paramters from
499 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
504 HOST_WIDE_INT mult_val, extract_val;
506 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
509 mult_val = INTVAL (mult_imm);
510 extract_val = INTVAL (extract_imm);
513 && extract_val < GET_MODE_BITSIZE (mode)
514 && exact_log2 (extract_val & ~7) > 0
515 && (extract_val & 7) <= 4
516 && mult_val == (1 << (extract_val & 7)))
522 /* Emit an insn that's a simple single-set. Both the operands must be
523 known to be valid. */
525 emit_set_insn (rtx x, rtx y)
527 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
530 /* X and Y are two things to compare using CODE. Emit the compare insn and
531 return the rtx for register 0 in the proper mode. */
533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
535 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
536 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
538 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
542 /* Build the SYMBOL_REF for __tls_get_addr. */
544 static GTY(()) rtx tls_get_addr_libfunc;
547 aarch64_tls_get_addr (void)
549 if (!tls_get_addr_libfunc)
550 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
551 return tls_get_addr_libfunc;
554 /* Return the TLS model to use for ADDR. */
556 static enum tls_model
557 tls_symbolic_operand_type (rtx addr)
559 enum tls_model tls_kind = TLS_MODEL_NONE;
562 if (GET_CODE (addr) == CONST)
564 split_const (addr, &sym, &addend);
565 if (GET_CODE (sym) == SYMBOL_REF)
566 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
568 else if (GET_CODE (addr) == SYMBOL_REF)
569 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
574 /* We'll allow lo_sum's in addresses in our legitimate addresses
575 so that combine would take care of combining addresses where
576 necessary, but for generation purposes, we'll generate the address
579 tmp = hi (symbol_ref); adrp x1, foo
580 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
584 adrp x1, :got:foo adrp tmp, :tlsgd:foo
585 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
589 Load TLS symbol, depending on TLS mechanism and TLS access model.
591 Global Dynamic - Traditional TLS:
593 add dest, tmp, #:tlsgd_lo12:imm
596 Global Dynamic - TLS Descriptors:
597 adrp dest, :tlsdesc:imm
598 ldr tmp, [dest, #:tlsdesc_lo12:imm]
599 add dest, dest, #:tlsdesc_lo12:imm
606 adrp tmp, :gottprel:imm
607 ldr dest, [tmp, #:gottprel_lo12:imm]
612 add t0, tp, #:tprel_hi12:imm
613 add t0, #:tprel_lo12_nc:imm
617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
618 enum aarch64_symbol_type type)
622 case SYMBOL_SMALL_ABSOLUTE:
624 /* In ILP32, the mode of dest can be either SImode or DImode. */
626 enum machine_mode mode = GET_MODE (dest);
628 gcc_assert (mode == Pmode || mode == ptr_mode);
630 if (can_create_pseudo_p ())
631 tmp_reg = gen_reg_rtx (mode);
633 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
634 emit_insn (gen_add_losym (dest, tmp_reg, imm));
638 case SYMBOL_TINY_ABSOLUTE:
639 emit_insn (gen_rtx_SET (Pmode, dest, imm));
642 case SYMBOL_SMALL_GOT:
644 /* In ILP32, the mode of dest can be either SImode or DImode,
645 while the got entry is always of SImode size. The mode of
646 dest depends on how dest is used: if dest is assigned to a
647 pointer (e.g. in the memory), it has SImode; it may have
648 DImode if dest is dereferenced to access the memeory.
649 This is why we have to handle three different ldr_got_small
650 patterns here (two patterns for ILP32). */
652 enum machine_mode mode = GET_MODE (dest);
654 if (can_create_pseudo_p ())
655 tmp_reg = gen_reg_rtx (mode);
657 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
658 if (mode == ptr_mode)
661 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
663 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
667 gcc_assert (mode == Pmode);
668 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
674 case SYMBOL_SMALL_TLSGD:
677 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
680 emit_call_insn (gen_tlsgd_small (result, imm));
681 insns = get_insns ();
684 RTL_CONST_CALL_P (insns) = 1;
685 emit_libcall_block (insns, dest, result, imm);
689 case SYMBOL_SMALL_TLSDESC:
691 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
694 emit_insn (gen_tlsdesc_small (imm));
695 tp = aarch64_load_tp (NULL);
696 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
697 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
701 case SYMBOL_SMALL_GOTTPREL:
703 rtx tmp_reg = gen_reg_rtx (Pmode);
704 rtx tp = aarch64_load_tp (NULL);
705 emit_insn (gen_tlsie_small (tmp_reg, imm));
706 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
707 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
711 case SYMBOL_SMALL_TPREL:
713 rtx tp = aarch64_load_tp (NULL);
714 emit_insn (gen_tlsle_small (dest, tp, imm));
715 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
719 case SYMBOL_TINY_GOT:
720 emit_insn (gen_ldr_got_tiny (dest, imm));
728 /* Emit a move from SRC to DEST. Assume that the move expanders can
729 handle all moves if !can_create_pseudo_p (). The distinction is
730 important because, unlike emit_move_insn, the move expanders know
731 how to force Pmode objects into the constant pool even when the
732 constant pool address is not itself legitimate. */
734 aarch64_emit_move (rtx dest, rtx src)
736 return (can_create_pseudo_p ()
737 ? emit_move_insn (dest, src)
738 : emit_move_insn_1 (dest, src));
741 /* Split a 128-bit move operation into two 64-bit move operations,
742 taking care to handle partial overlap of register to register
743 copies. Special cases are needed when moving between GP regs and
744 FP regs. SRC can be a register, constant or memory; DST a register
745 or memory. If either operand is memory it must not have any side
748 aarch64_split_128bit_move (rtx dst, rtx src)
753 enum machine_mode mode = GET_MODE (dst);
755 gcc_assert (mode == TImode || mode == TFmode);
756 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
757 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
759 if (REG_P (dst) && REG_P (src))
761 int src_regno = REGNO (src);
762 int dst_regno = REGNO (dst);
764 /* Handle FP <-> GP regs. */
765 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
767 src_lo = gen_lowpart (word_mode, src);
768 src_hi = gen_highpart (word_mode, src);
772 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
773 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
777 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
778 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
782 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
784 dst_lo = gen_lowpart (word_mode, dst);
785 dst_hi = gen_highpart (word_mode, dst);
789 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
790 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
794 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
795 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
801 dst_lo = gen_lowpart (word_mode, dst);
802 dst_hi = gen_highpart (word_mode, dst);
803 src_lo = gen_lowpart (word_mode, src);
804 src_hi = gen_highpart_mode (word_mode, mode, src);
806 /* At most one pairing may overlap. */
807 if (reg_overlap_mentioned_p (dst_lo, src_hi))
809 aarch64_emit_move (dst_hi, src_hi);
810 aarch64_emit_move (dst_lo, src_lo);
814 aarch64_emit_move (dst_lo, src_lo);
815 aarch64_emit_move (dst_hi, src_hi);
820 aarch64_split_128bit_move_p (rtx dst, rtx src)
822 return (! REG_P (src)
823 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
826 /* Split a complex SIMD combine. */
829 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
831 enum machine_mode src_mode = GET_MODE (src1);
832 enum machine_mode dst_mode = GET_MODE (dst);
834 gcc_assert (VECTOR_MODE_P (dst_mode));
836 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
838 rtx (*gen) (rtx, rtx, rtx);
843 gen = gen_aarch64_simd_combinev8qi;
846 gen = gen_aarch64_simd_combinev4hi;
849 gen = gen_aarch64_simd_combinev2si;
852 gen = gen_aarch64_simd_combinev2sf;
855 gen = gen_aarch64_simd_combinedi;
858 gen = gen_aarch64_simd_combinedf;
864 emit_insn (gen (dst, src1, src2));
869 /* Split a complex SIMD move. */
872 aarch64_split_simd_move (rtx dst, rtx src)
874 enum machine_mode src_mode = GET_MODE (src);
875 enum machine_mode dst_mode = GET_MODE (dst);
877 gcc_assert (VECTOR_MODE_P (dst_mode));
879 if (REG_P (dst) && REG_P (src))
881 rtx (*gen) (rtx, rtx);
883 gcc_assert (VECTOR_MODE_P (src_mode));
888 gen = gen_aarch64_split_simd_movv16qi;
891 gen = gen_aarch64_split_simd_movv8hi;
894 gen = gen_aarch64_split_simd_movv4si;
897 gen = gen_aarch64_split_simd_movv2di;
900 gen = gen_aarch64_split_simd_movv4sf;
903 gen = gen_aarch64_split_simd_movv2df;
909 emit_insn (gen (dst, src));
915 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
917 if (can_create_pseudo_p ())
918 return force_reg (mode, value);
921 x = aarch64_emit_move (x, value);
928 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
930 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
933 /* Load the full offset into a register. This
934 might be improvable in the future. */
935 high = GEN_INT (offset);
937 high = aarch64_force_temporary (mode, temp, high);
938 reg = aarch64_force_temporary (mode, temp,
939 gen_rtx_PLUS (mode, high, reg));
941 return plus_constant (mode, reg, offset);
945 aarch64_expand_mov_immediate (rtx dest, rtx imm)
947 enum machine_mode mode = GET_MODE (dest);
948 unsigned HOST_WIDE_INT mask;
951 unsigned HOST_WIDE_INT val;
954 int one_match, zero_match;
956 gcc_assert (mode == SImode || mode == DImode);
958 /* Check on what type of symbol it is. */
959 if (GET_CODE (imm) == SYMBOL_REF
960 || GET_CODE (imm) == LABEL_REF
961 || GET_CODE (imm) == CONST)
963 rtx mem, base, offset;
964 enum aarch64_symbol_type sty;
966 /* If we have (const (plus symbol offset)), separate out the offset
967 before we start classifying the symbol. */
968 split_const (imm, &base, &offset);
970 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
973 case SYMBOL_FORCE_TO_MEM:
974 if (offset != const0_rtx
975 && targetm.cannot_force_const_mem (mode, imm))
977 gcc_assert (can_create_pseudo_p ());
978 base = aarch64_force_temporary (mode, dest, base);
979 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
980 aarch64_emit_move (dest, base);
983 mem = force_const_mem (ptr_mode, imm);
985 if (mode != ptr_mode)
986 mem = gen_rtx_ZERO_EXTEND (mode, mem);
987 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
990 case SYMBOL_SMALL_TLSGD:
991 case SYMBOL_SMALL_TLSDESC:
992 case SYMBOL_SMALL_GOTTPREL:
993 case SYMBOL_SMALL_GOT:
994 case SYMBOL_TINY_GOT:
995 if (offset != const0_rtx)
997 gcc_assert(can_create_pseudo_p ());
998 base = aarch64_force_temporary (mode, dest, base);
999 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1000 aarch64_emit_move (dest, base);
1005 case SYMBOL_SMALL_TPREL:
1006 case SYMBOL_SMALL_ABSOLUTE:
1007 case SYMBOL_TINY_ABSOLUTE:
1008 aarch64_load_symref_appropriately (dest, imm, sty);
1016 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1018 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1022 if (!CONST_INT_P (imm))
1024 if (GET_CODE (imm) == HIGH)
1025 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1028 rtx mem = force_const_mem (mode, imm);
1030 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1038 /* We know we can't do this in 1 insn, and we must be able to do it
1039 in two; so don't mess around looking for sequences that don't buy
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1042 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1043 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1047 /* Remaining cases are all for DImode. */
1050 subtargets = optimize && can_create_pseudo_p ();
1056 for (i = 0; i < 64; i += 16, mask <<= 16)
1058 if ((val & mask) == 0)
1060 else if ((val & mask) == mask)
1067 for (i = 0; i < 64; i += 16, mask <<= 16)
1069 if ((val & mask) != mask)
1071 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1072 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1073 GEN_INT ((val >> i) & 0xffff)));
1080 if (zero_match == 2)
1081 goto simple_sequence;
1083 mask = 0x0ffff0000UL;
1084 for (i = 16; i < 64; i += 16, mask <<= 16)
1086 HOST_WIDE_INT comp = mask & ~(mask - 1);
1088 if (aarch64_uimm12_shift (val - (val & mask)))
1090 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1092 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1093 emit_insn (gen_adddi3 (dest, subtarget,
1094 GEN_INT (val - (val & mask))));
1097 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1099 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1101 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1102 GEN_INT ((val + comp) & mask)));
1103 emit_insn (gen_adddi3 (dest, subtarget,
1104 GEN_INT (val - ((val + comp) & mask))));
1107 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1109 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1111 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1112 GEN_INT ((val - comp) | ~mask)));
1113 emit_insn (gen_adddi3 (dest, subtarget,
1114 GEN_INT (val - ((val - comp) | ~mask))));
1117 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1119 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1121 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1122 GEN_INT (val | ~mask)));
1123 emit_insn (gen_adddi3 (dest, subtarget,
1124 GEN_INT (val - (val | ~mask))));
1129 /* See if we can do it by arithmetically combining two
1131 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1136 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1137 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1139 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1140 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1141 GEN_INT (aarch64_bitmasks[i])));
1142 emit_insn (gen_adddi3 (dest, subtarget,
1143 GEN_INT (val - aarch64_bitmasks[i])));
1147 for (j = 0; j < 64; j += 16, mask <<= 16)
1149 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1151 emit_insn (gen_rtx_SET (VOIDmode, dest,
1152 GEN_INT (aarch64_bitmasks[i])));
1153 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1154 GEN_INT ((val >> j) & 0xffff)));
1160 /* See if we can do it by logically combining two immediates. */
1161 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1163 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1167 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1168 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1170 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1171 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1172 GEN_INT (aarch64_bitmasks[i])));
1173 emit_insn (gen_iordi3 (dest, subtarget,
1174 GEN_INT (aarch64_bitmasks[j])));
1178 else if ((val & aarch64_bitmasks[i]) == val)
1182 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1183 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1186 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1187 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1188 GEN_INT (aarch64_bitmasks[j])));
1189 emit_insn (gen_anddi3 (dest, subtarget,
1190 GEN_INT (aarch64_bitmasks[i])));
1199 for (i = 0; i < 64; i += 16, mask <<= 16)
1201 if ((val & mask) != 0)
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (val & mask)));
1210 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1211 GEN_INT ((val >> i) & 0xffff)));
1217 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1219 /* Indirect calls are not currently supported. */
1223 /* Cannot tail-call to long-calls, since these are outside of the
1224 range of a branch instruction (we could handle this if we added
1225 support for indirect tail-calls. */
1226 if (aarch64_decl_is_long_call_p (decl))
1232 /* Implement TARGET_PASS_BY_REFERENCE. */
1235 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1236 enum machine_mode mode,
1238 bool named ATTRIBUTE_UNUSED)
1241 enum machine_mode dummymode;
1244 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1245 size = (mode == BLKmode && type)
1246 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1248 /* Aggregates are passed by reference based on their size. */
1249 if (type && AGGREGATE_TYPE_P (type))
1251 size = int_size_in_bytes (type);
1254 /* Variable sized arguments are always returned by reference. */
1258 /* Can this be a candidate to be passed in fp/simd register(s)? */
1259 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1264 /* Arguments which are variable sized or larger than 2 registers are
1265 passed by reference unless they are a homogenous floating point
1267 return size > 2 * UNITS_PER_WORD;
1270 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1272 aarch64_return_in_msb (const_tree valtype)
1274 enum machine_mode dummy_mode;
1277 /* Never happens in little-endian mode. */
1278 if (!BYTES_BIG_ENDIAN)
1281 /* Only composite types smaller than or equal to 16 bytes can
1282 be potentially returned in registers. */
1283 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1284 || int_size_in_bytes (valtype) <= 0
1285 || int_size_in_bytes (valtype) > 16)
1288 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1289 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1290 is always passed/returned in the least significant bits of fp/simd
1292 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1293 &dummy_mode, &dummy_int, NULL))
1299 /* Implement TARGET_FUNCTION_VALUE.
1300 Define how to find the value returned by a function. */
1303 aarch64_function_value (const_tree type, const_tree func,
1304 bool outgoing ATTRIBUTE_UNUSED)
1306 enum machine_mode mode;
1309 enum machine_mode ag_mode;
1311 mode = TYPE_MODE (type);
1312 if (INTEGRAL_TYPE_P (type))
1313 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1315 if (aarch64_return_in_msb (type))
1317 HOST_WIDE_INT size = int_size_in_bytes (type);
1319 if (size % UNITS_PER_WORD != 0)
1321 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1322 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1326 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1327 &ag_mode, &count, NULL))
1329 if (!aarch64_composite_type_p (type, mode))
1331 gcc_assert (count == 1 && mode == ag_mode);
1332 return gen_rtx_REG (mode, V0_REGNUM);
1339 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1340 for (i = 0; i < count; i++)
1342 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1343 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1344 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1345 XVECEXP (par, 0, i) = tmp;
1351 return gen_rtx_REG (mode, R0_REGNUM);
1354 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1355 Return true if REGNO is the number of a hard register in which the values
1356 of called function may come back. */
1359 aarch64_function_value_regno_p (const unsigned int regno)
1361 /* Maximum of 16 bytes can be returned in the general registers. Examples
1362 of 16-byte return values are: 128-bit integers and 16-byte small
1363 structures (excluding homogeneous floating-point aggregates). */
1364 if (regno == R0_REGNUM || regno == R1_REGNUM)
1367 /* Up to four fp/simd registers can return a function value, e.g. a
1368 homogeneous floating-point aggregate having four members. */
1369 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1370 return !TARGET_GENERAL_REGS_ONLY;
1375 /* Implement TARGET_RETURN_IN_MEMORY.
1377 If the type T of the result of a function is such that
1379 would require that arg be passed as a value in a register (or set of
1380 registers) according to the parameter passing rules, then the result
1381 is returned in the same registers as would be used for such an
1385 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1388 enum machine_mode ag_mode;
1391 if (!AGGREGATE_TYPE_P (type)
1392 && TREE_CODE (type) != COMPLEX_TYPE
1393 && TREE_CODE (type) != VECTOR_TYPE)
1394 /* Simple scalar types always returned in registers. */
1397 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1404 /* Types larger than 2 registers returned in memory. */
1405 size = int_size_in_bytes (type);
1406 return (size < 0 || size > 2 * UNITS_PER_WORD);
1410 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1411 const_tree type, int *nregs)
1413 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1414 return aarch64_vfp_is_call_or_return_candidate (mode,
1416 &pcum->aapcs_vfp_rmode,
1421 /* Given MODE and TYPE of a function argument, return the alignment in
1422 bits. The idea is to suppress any stronger alignment requested by
1423 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1424 This is a helper function for local use only. */
1427 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1429 unsigned int alignment;
1433 if (!integer_zerop (TYPE_SIZE (type)))
1435 if (TYPE_MODE (type) == mode)
1436 alignment = TYPE_ALIGN (type);
1438 alignment = GET_MODE_ALIGNMENT (mode);
1444 alignment = GET_MODE_ALIGNMENT (mode);
1449 /* Layout a function argument according to the AAPCS64 rules. The rule
1450 numbers refer to the rule numbers in the AAPCS64. */
1453 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1455 bool named ATTRIBUTE_UNUSED)
1457 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1458 int ncrn, nvrn, nregs;
1459 bool allocate_ncrn, allocate_nvrn;
1462 /* We need to do this once per argument. */
1463 if (pcum->aapcs_arg_processed)
1466 pcum->aapcs_arg_processed = true;
1468 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1470 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1473 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1474 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1479 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1480 The following code thus handles passing by SIMD/FP registers first. */
1482 nvrn = pcum->aapcs_nvrn;
1484 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1485 and homogenous short-vector aggregates (HVA). */
1488 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1490 pcum->aapcs_nextnvrn = nvrn + nregs;
1491 if (!aarch64_composite_type_p (type, mode))
1493 gcc_assert (nregs == 1);
1494 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1500 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1501 for (i = 0; i < nregs; i++)
1503 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1504 V0_REGNUM + nvrn + i);
1505 tmp = gen_rtx_EXPR_LIST
1507 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1508 XVECEXP (par, 0, i) = tmp;
1510 pcum->aapcs_reg = par;
1516 /* C.3 NSRN is set to 8. */
1517 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1522 ncrn = pcum->aapcs_ncrn;
1523 nregs = size / UNITS_PER_WORD;
1525 /* C6 - C9. though the sign and zero extension semantics are
1526 handled elsewhere. This is the case where the argument fits
1527 entirely general registers. */
1528 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1530 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1532 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1534 /* C.8 if the argument has an alignment of 16 then the NGRN is
1535 rounded up to the next even number. */
1536 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1539 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1541 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1542 A reg is still generated for it, but the caller should be smart
1543 enough not to use it. */
1544 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1546 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1553 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1554 for (i = 0; i < nregs; i++)
1556 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1557 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1558 GEN_INT (i * UNITS_PER_WORD));
1559 XVECEXP (par, 0, i) = tmp;
1561 pcum->aapcs_reg = par;
1564 pcum->aapcs_nextncrn = ncrn + nregs;
1569 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1571 /* The argument is passed on stack; record the needed number of words for
1572 this argument and align the total size if necessary. */
1574 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1575 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1576 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1577 16 / UNITS_PER_WORD);
1581 /* Implement TARGET_FUNCTION_ARG. */
1584 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1585 const_tree type, bool named)
1587 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1588 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1590 if (mode == VOIDmode)
1593 aarch64_layout_arg (pcum_v, mode, type, named);
1594 return pcum->aapcs_reg;
1598 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1599 const_tree fntype ATTRIBUTE_UNUSED,
1600 rtx libname ATTRIBUTE_UNUSED,
1601 const_tree fndecl ATTRIBUTE_UNUSED,
1602 unsigned n_named ATTRIBUTE_UNUSED)
1604 pcum->aapcs_ncrn = 0;
1605 pcum->aapcs_nvrn = 0;
1606 pcum->aapcs_nextncrn = 0;
1607 pcum->aapcs_nextnvrn = 0;
1608 pcum->pcs_variant = ARM_PCS_AAPCS64;
1609 pcum->aapcs_reg = NULL_RTX;
1610 pcum->aapcs_arg_processed = false;
1611 pcum->aapcs_stack_words = 0;
1612 pcum->aapcs_stack_size = 0;
1618 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1619 enum machine_mode mode,
1623 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1624 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1626 aarch64_layout_arg (pcum_v, mode, type, named);
1627 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1628 != (pcum->aapcs_stack_words != 0));
1629 pcum->aapcs_arg_processed = false;
1630 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1631 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1632 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1633 pcum->aapcs_stack_words = 0;
1634 pcum->aapcs_reg = NULL_RTX;
1639 aarch64_function_arg_regno_p (unsigned regno)
1641 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1642 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1645 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1646 PARM_BOUNDARY bits of alignment, but will be given anything up
1647 to STACK_BOUNDARY bits if the type requires it. This makes sure
1648 that both before and after the layout of each argument, the Next
1649 Stacked Argument Address (NSAA) will have a minimum alignment of
1653 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1655 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1657 if (alignment < PARM_BOUNDARY)
1658 alignment = PARM_BOUNDARY;
1659 if (alignment > STACK_BOUNDARY)
1660 alignment = STACK_BOUNDARY;
1664 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1666 Return true if an argument passed on the stack should be padded upwards,
1667 i.e. if the least-significant byte of the stack slot has useful data.
1669 Small aggregate types are placed in the lowest memory address.
1671 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1674 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1676 /* On little-endian targets, the least significant byte of every stack
1677 argument is passed at the lowest byte address of the stack slot. */
1678 if (!BYTES_BIG_ENDIAN)
1681 /* Otherwise, integral, floating-point and pointer types are padded downward:
1682 the least significant byte of a stack argument is passed at the highest
1683 byte address of the stack slot. */
1685 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1686 || POINTER_TYPE_P (type))
1687 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1690 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1694 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1696 It specifies padding for the last (may also be the only)
1697 element of a block move between registers and memory. If
1698 assuming the block is in the memory, padding upward means that
1699 the last element is padded after its highest significant byte,
1700 while in downward padding, the last element is padded at the
1701 its least significant byte side.
1703 Small aggregates and small complex types are always padded
1706 We don't need to worry about homogeneous floating-point or
1707 short-vector aggregates; their move is not affected by the
1708 padding direction determined here. Regardless of endianness,
1709 each element of such an aggregate is put in the least
1710 significant bits of a fp/simd register.
1712 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1713 register has useful data, and return the opposite if the most
1714 significant byte does. */
1717 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1718 bool first ATTRIBUTE_UNUSED)
1721 /* Small composite types are always padded upward. */
1722 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1724 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1725 : GET_MODE_SIZE (mode));
1726 if (size < 2 * UNITS_PER_WORD)
1730 /* Otherwise, use the default padding. */
1731 return !BYTES_BIG_ENDIAN;
1734 static enum machine_mode
1735 aarch64_libgcc_cmp_return_mode (void)
1741 aarch64_frame_pointer_required (void)
1743 /* If the function contains dynamic stack allocations, we need to
1744 use the frame pointer to access the static parts of the frame. */
1745 if (cfun->calls_alloca)
1748 /* In aarch64_override_options_after_change
1749 flag_omit_leaf_frame_pointer turns off the frame pointer by
1750 default. Turn it back on now if we've not got a leaf
1752 if (flag_omit_leaf_frame_pointer
1753 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1759 /* Mark the registers that need to be saved by the callee and calculate
1760 the size of the callee-saved registers area and frame record (both FP
1761 and LR may be omitted). */
1763 aarch64_layout_frame (void)
1765 HOST_WIDE_INT offset = 0;
1768 if (reload_completed && cfun->machine->frame.laid_out)
1771 cfun->machine->frame.fp_lr_offset = 0;
1773 /* First mark all the registers that really need to be saved... */
1774 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1775 cfun->machine->frame.reg_offset[regno] = -1;
1777 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1778 cfun->machine->frame.reg_offset[regno] = -1;
1780 /* ... that includes the eh data registers (if needed)... */
1781 if (crtl->calls_eh_return)
1782 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1783 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1785 /* ... and any callee saved register that dataflow says is live. */
1786 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1787 if (df_regs_ever_live_p (regno)
1788 && !call_used_regs[regno])
1789 cfun->machine->frame.reg_offset[regno] = 0;
1791 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1792 if (df_regs_ever_live_p (regno)
1793 && !call_used_regs[regno])
1794 cfun->machine->frame.reg_offset[regno] = 0;
1796 if (frame_pointer_needed)
1798 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1799 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1800 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1803 /* Now assign stack slots for them. */
1804 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1805 if (cfun->machine->frame.reg_offset[regno] != -1)
1807 cfun->machine->frame.reg_offset[regno] = offset;
1808 offset += UNITS_PER_WORD;
1811 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1812 if (cfun->machine->frame.reg_offset[regno] != -1)
1814 cfun->machine->frame.reg_offset[regno] = offset;
1815 offset += UNITS_PER_WORD;
1818 if (frame_pointer_needed)
1820 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1821 offset += UNITS_PER_WORD;
1822 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1825 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1827 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1828 offset += UNITS_PER_WORD;
1829 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1832 cfun->machine->frame.padding0 =
1833 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1834 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1836 cfun->machine->frame.saved_regs_size = offset;
1837 cfun->machine->frame.laid_out = true;
1840 /* Make the last instruction frame-related and note that it performs
1841 the operation described by FRAME_PATTERN. */
1844 aarch64_set_frame_expr (rtx frame_pattern)
1848 insn = get_last_insn ();
1849 RTX_FRAME_RELATED_P (insn) = 1;
1850 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1851 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1857 aarch64_register_saved_on_entry (int regno)
1859 return cfun->machine->frame.reg_offset[regno] != -1;
1864 aarch64_save_or_restore_fprs (int start_offset, int increment,
1865 bool restore, rtx base_rtx)
1871 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1872 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1875 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1877 if (aarch64_register_saved_on_entry (regno))
1880 mem = gen_mem_ref (DFmode,
1881 plus_constant (Pmode,
1885 for (regno2 = regno + 1;
1886 regno2 <= V31_REGNUM
1887 && !aarch64_register_saved_on_entry (regno2);
1892 if (regno2 <= V31_REGNUM &&
1893 aarch64_register_saved_on_entry (regno2))
1896 /* Next highest register to be saved. */
1897 mem2 = gen_mem_ref (DFmode,
1901 start_offset + increment));
1902 if (restore == false)
1905 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1906 mem2, gen_rtx_REG (DFmode, regno2)));
1912 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1913 gen_rtx_REG (DFmode, regno2), mem2));
1915 add_reg_note (insn, REG_CFA_RESTORE,
1916 gen_rtx_REG (DFmode, regno));
1917 add_reg_note (insn, REG_CFA_RESTORE,
1918 gen_rtx_REG (DFmode, regno2));
1921 /* The first part of a frame-related parallel insn
1922 is always assumed to be relevant to the frame
1923 calculations; subsequent parts, are only
1924 frame-related if explicitly marked. */
1925 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1927 start_offset += increment * 2;
1931 if (restore == false)
1932 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1935 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1936 add_reg_note (insn, REG_CFA_RESTORE,
1937 gen_rtx_REG (DImode, regno));
1939 start_offset += increment;
1941 RTX_FRAME_RELATED_P (insn) = 1;
1948 /* offset from the stack pointer of where the saves and
1949 restore's have to happen. */
1951 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1955 rtx base_rtx = stack_pointer_rtx;
1956 HOST_WIDE_INT start_offset = offset;
1957 HOST_WIDE_INT increment = UNITS_PER_WORD;
1958 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1959 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1963 for (regno = R0_REGNUM; regno <= limit; regno++)
1965 if (aarch64_register_saved_on_entry (regno))
1968 mem = gen_mem_ref (Pmode,
1969 plus_constant (Pmode,
1973 for (regno2 = regno + 1;
1975 && !aarch64_register_saved_on_entry (regno2);
1980 if (regno2 <= limit &&
1981 aarch64_register_saved_on_entry (regno2))
1984 /* Next highest register to be saved. */
1985 mem2 = gen_mem_ref (Pmode,
1989 start_offset + increment));
1990 if (restore == false)
1993 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1994 mem2, gen_rtx_REG (DImode, regno2)));
2000 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2001 gen_rtx_REG (DImode, regno2), mem2));
2003 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2004 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2007 /* The first part of a frame-related parallel insn
2008 is always assumed to be relevant to the frame
2009 calculations; subsequent parts, are only
2010 frame-related if explicitly marked. */
2011 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2014 start_offset += increment * 2;
2018 if (restore == false)
2019 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2022 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2023 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2025 start_offset += increment;
2027 RTX_FRAME_RELATED_P (insn) = 1;
2031 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2035 /* AArch64 stack frames generated by this compiler look like:
2037 +-------------------------------+
2039 | incoming stack arguments |
2041 +-------------------------------+ <-- arg_pointer_rtx
2043 | callee-allocated save area |
2044 | for register varargs |
2046 +-------------------------------+ <-- frame_pointer_rtx
2050 +-------------------------------+
2052 +-------------------------------+ |
2055 | callee-saved registers | | frame.saved_regs_size
2057 +-------------------------------+ |
2059 +-------------------------------+ |
2061 P +-------------------------------+ <-- hard_frame_pointer_rtx
2062 | dynamic allocation |
2063 +-------------------------------+
2065 | outgoing stack arguments |
2067 +-------------------------------+ <-- stack_pointer_rtx
2069 Dynamic stack allocations such as alloca insert data at point P.
2070 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2071 hard_frame_pointer_rtx unchanged. */
2073 /* Generate the prologue instructions for entry into a function.
2074 Establish the stack frame by decreasing the stack pointer with a
2075 properly calculated size and, if necessary, create a frame record
2076 filled with the values of LR and previous frame pointer. The
2077 current FP is also set up if it is in use. */
2080 aarch64_expand_prologue (void)
2082 /* sub sp, sp, #<frame_size>
2083 stp {fp, lr}, [sp, #<frame_size> - 16]
2084 add fp, sp, #<frame_size> - hardfp_offset
2085 stp {cs_reg}, [fp, #-16] etc.
2087 sub sp, sp, <final_adjustment_if_any>
2089 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2090 HOST_WIDE_INT frame_size, offset;
2091 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2094 aarch64_layout_frame ();
2095 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2096 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2097 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2098 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2099 + crtl->outgoing_args_size);
2100 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2101 STACK_BOUNDARY / BITS_PER_UNIT);
2103 if (flag_stack_usage_info)
2104 current_function_static_stack_size = frame_size;
2107 - original_frame_size
2108 - cfun->machine->frame.saved_regs_size);
2110 /* Store pairs and load pairs have a range only -512 to 504. */
2113 /* When the frame has a large size, an initial decrease is done on
2114 the stack pointer to jump over the callee-allocated save area for
2115 register varargs, the local variable area and/or the callee-saved
2116 register area. This will allow the pre-index write-back
2117 store pair instructions to be used for setting up the stack frame
2119 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2121 offset = cfun->machine->frame.saved_regs_size;
2123 frame_size -= (offset + crtl->outgoing_args_size);
2126 if (frame_size >= 0x1000000)
2128 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2129 emit_move_insn (op0, GEN_INT (-frame_size));
2130 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2131 aarch64_set_frame_expr (gen_rtx_SET
2132 (Pmode, stack_pointer_rtx,
2133 plus_constant (Pmode,
2137 else if (frame_size > 0)
2139 if ((frame_size & 0xfff) != frame_size)
2141 insn = emit_insn (gen_add2_insn
2143 GEN_INT (-(frame_size
2144 & ~(HOST_WIDE_INT)0xfff))));
2145 RTX_FRAME_RELATED_P (insn) = 1;
2147 if ((frame_size & 0xfff) != 0)
2149 insn = emit_insn (gen_add2_insn
2151 GEN_INT (-(frame_size
2152 & (HOST_WIDE_INT)0xfff))));
2153 RTX_FRAME_RELATED_P (insn) = 1;
2162 /* Save the frame pointer and lr if the frame pointer is needed
2163 first. Make the frame pointer point to the location of the
2164 old frame pointer on the stack. */
2165 if (frame_pointer_needed)
2171 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2172 GEN_INT (-offset)));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2174 aarch64_set_frame_expr (gen_rtx_SET
2175 (Pmode, stack_pointer_rtx,
2176 gen_rtx_MINUS (Pmode,
2178 GEN_INT (offset))));
2179 mem_fp = gen_frame_mem (DImode,
2180 plus_constant (Pmode,
2183 mem_lr = gen_frame_mem (DImode,
2184 plus_constant (Pmode,
2188 insn = emit_insn (gen_store_pairdi (mem_fp,
2189 hard_frame_pointer_rtx,
2191 gen_rtx_REG (DImode,
2196 insn = emit_insn (gen_storewb_pairdi_di
2197 (stack_pointer_rtx, stack_pointer_rtx,
2198 hard_frame_pointer_rtx,
2199 gen_rtx_REG (DImode, LR_REGNUM),
2201 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2202 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2205 /* The first part of a frame-related parallel insn is always
2206 assumed to be relevant to the frame calculations;
2207 subsequent parts, are only frame-related if explicitly
2209 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2210 RTX_FRAME_RELATED_P (insn) = 1;
2212 /* Set up frame pointer to point to the location of the
2213 previous frame pointer on the stack. */
2214 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2216 GEN_INT (fp_offset)));
2217 aarch64_set_frame_expr (gen_rtx_SET
2218 (Pmode, hard_frame_pointer_rtx,
2219 plus_constant (Pmode,
2222 RTX_FRAME_RELATED_P (insn) = 1;
2223 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2224 hard_frame_pointer_rtx));
2228 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2229 GEN_INT (-offset)));
2230 RTX_FRAME_RELATED_P (insn) = 1;
2233 aarch64_save_or_restore_callee_save_registers
2234 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2237 /* when offset >= 512,
2238 sub sp, sp, #<outgoing_args_size> */
2239 if (frame_size > -1)
2241 if (crtl->outgoing_args_size > 0)
2243 insn = emit_insn (gen_add2_insn
2245 GEN_INT (- crtl->outgoing_args_size)));
2246 RTX_FRAME_RELATED_P (insn) = 1;
2251 /* Generate the epilogue instructions for returning from a function. */
2253 aarch64_expand_epilogue (bool for_sibcall)
2255 HOST_WIDE_INT original_frame_size, frame_size, offset;
2256 HOST_WIDE_INT fp_offset;
2260 aarch64_layout_frame ();
2261 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2262 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2263 + crtl->outgoing_args_size);
2264 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2265 STACK_BOUNDARY / BITS_PER_UNIT);
2268 - original_frame_size
2269 - cfun->machine->frame.saved_regs_size);
2271 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2273 /* Store pairs and load pairs have a range only -512 to 504. */
2276 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2278 offset = cfun->machine->frame.saved_regs_size;
2280 frame_size -= (offset + crtl->outgoing_args_size);
2282 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2284 insn = emit_insn (gen_add2_insn
2286 GEN_INT (crtl->outgoing_args_size)));
2287 RTX_FRAME_RELATED_P (insn) = 1;
2293 /* If there were outgoing arguments or we've done dynamic stack
2294 allocation, then restore the stack pointer from the frame
2295 pointer. This is at most one insn and more efficient than using
2296 GCC's internal mechanism. */
2297 if (frame_pointer_needed
2298 && (crtl->outgoing_args_size || cfun->calls_alloca))
2300 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2301 hard_frame_pointer_rtx,
2302 GEN_INT (- fp_offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2304 /* As SP is set to (FP - fp_offset), according to the rules in
2305 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2306 from the value of SP from now on. */
2307 cfa_reg = stack_pointer_rtx;
2310 aarch64_save_or_restore_callee_save_registers
2311 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2313 /* Restore the frame pointer and lr if the frame pointer is needed. */
2316 if (frame_pointer_needed)
2322 mem_fp = gen_frame_mem (DImode,
2323 plus_constant (Pmode,
2326 mem_lr = gen_frame_mem (DImode,
2327 plus_constant (Pmode,
2331 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2333 gen_rtx_REG (DImode,
2339 insn = emit_insn (gen_loadwb_pairdi_di
2342 hard_frame_pointer_rtx,
2343 gen_rtx_REG (DImode, LR_REGNUM),
2345 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2346 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2347 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2348 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2349 plus_constant (Pmode, cfa_reg,
2353 /* The first part of a frame-related parallel insn
2354 is always assumed to be relevant to the frame
2355 calculations; subsequent parts, are only
2356 frame-related if explicitly marked. */
2357 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2358 RTX_FRAME_RELATED_P (insn) = 1;
2359 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2360 add_reg_note (insn, REG_CFA_RESTORE,
2361 gen_rtx_REG (DImode, LR_REGNUM));
2365 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2367 RTX_FRAME_RELATED_P (insn) = 1;
2372 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2374 RTX_FRAME_RELATED_P (insn) = 1;
2378 /* Stack adjustment for exception handler. */
2379 if (crtl->calls_eh_return)
2381 /* We need to unwind the stack by the offset computed by
2382 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2383 based on SP. Ideally we would update the SP and define the
2384 CFA along the lines of:
2386 SP = SP + EH_RETURN_STACKADJ_RTX
2387 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2389 However the dwarf emitter only understands a constant
2392 The solution chosen here is to use the otherwise unused IP0
2393 as a temporary register to hold the current SP value. The
2394 CFA is described using IP0 then SP is modified. */
2396 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2398 insn = emit_move_insn (ip0, stack_pointer_rtx);
2399 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2400 RTX_FRAME_RELATED_P (insn) = 1;
2402 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2404 /* Ensure the assignment to IP0 does not get optimized away. */
2408 if (frame_size > -1)
2410 if (frame_size >= 0x1000000)
2412 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2413 emit_move_insn (op0, GEN_INT (frame_size));
2414 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2415 aarch64_set_frame_expr (gen_rtx_SET
2416 (Pmode, stack_pointer_rtx,
2417 plus_constant (Pmode,
2421 else if (frame_size > 0)
2423 if ((frame_size & 0xfff) != 0)
2425 insn = emit_insn (gen_add2_insn
2427 GEN_INT ((frame_size
2428 & (HOST_WIDE_INT) 0xfff))));
2429 RTX_FRAME_RELATED_P (insn) = 1;
2431 if ((frame_size & 0xfff) != frame_size)
2433 insn = emit_insn (gen_add2_insn
2435 GEN_INT ((frame_size
2436 & ~ (HOST_WIDE_INT) 0xfff))));
2437 RTX_FRAME_RELATED_P (insn) = 1;
2441 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2442 plus_constant (Pmode,
2447 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2449 emit_jump_insn (ret_rtx);
2452 /* Return the place to copy the exception unwinding return address to.
2453 This will probably be a stack slot, but could (in theory be the
2454 return register). */
2456 aarch64_final_eh_return_addr (void)
2458 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2459 aarch64_layout_frame ();
2460 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2461 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2462 + crtl->outgoing_args_size);
2463 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2464 STACK_BOUNDARY / BITS_PER_UNIT);
2466 - original_frame_size
2467 - cfun->machine->frame.saved_regs_size;
2469 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2470 return gen_rtx_REG (DImode, LR_REGNUM);
2472 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2473 result in a store to save LR introduced by builtin_eh_return () being
2474 incorrectly deleted because the alias is not detected.
2475 So in the calculation of the address to copy the exception unwinding
2476 return address to, we note 2 cases.
2477 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2478 we return a SP-relative location since all the addresses are SP-relative
2479 in this case. This prevents the store from being optimized away.
2480 If the fp_offset is not 0, then the addresses will be FP-relative and
2481 therefore we return a FP-relative location. */
2483 if (frame_pointer_needed)
2486 return gen_frame_mem (DImode,
2487 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2489 return gen_frame_mem (DImode,
2490 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2493 /* If FP is not needed, we calculate the location of LR, which would be
2494 at the top of the saved registers block. */
2496 return gen_frame_mem (DImode,
2497 plus_constant (Pmode,
2500 + cfun->machine->frame.saved_regs_size
2501 - 2 * UNITS_PER_WORD));
2504 /* Possibly output code to build up a constant in a register. For
2505 the benefit of the costs infrastructure, returns the number of
2506 instructions which would be emitted. GENERATE inhibits or
2507 enables code generation. */
2510 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2514 if (aarch64_bitmask_imm (val, DImode))
2517 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2525 HOST_WIDE_INT valp = val >> 16;
2529 for (i = 16; i < 64; i += 16)
2531 valm = (valp & 0xffff);
2542 /* zcount contains the number of additional MOVK instructions
2543 required if the constant is built up with an initial MOVZ instruction,
2544 while ncount is the number of MOVK instructions required if starting
2545 with a MOVN instruction. Choose the sequence that yields the fewest
2546 number of instructions, preferring MOVZ instructions when they are both
2548 if (ncount < zcount)
2551 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2552 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2559 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2560 GEN_INT (val & 0xffff));
2567 for (i = 16; i < 64; i += 16)
2569 if ((val & 0xffff) != tval)
2572 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2574 GEN_INT (val & 0xffff)));
2584 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2586 HOST_WIDE_INT mdelta = delta;
2587 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2588 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2593 if (mdelta >= 4096 * 4096)
2595 (void) aarch64_build_constant (scratchreg, delta, true);
2596 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2598 else if (mdelta > 0)
2602 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2603 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2605 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2606 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2608 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2609 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2611 if (mdelta % 4096 != 0)
2613 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2614 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2615 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2620 /* Output code to add DELTA to the first argument, and then jump
2621 to FUNCTION. Used for C++ multiple inheritance. */
2623 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2624 HOST_WIDE_INT delta,
2625 HOST_WIDE_INT vcall_offset,
2628 /* The this pointer is always in x0. Note that this differs from
2629 Arm where the this pointer maybe bumped to r1 if r0 is required
2630 to return a pointer to an aggregate. On AArch64 a result value
2631 pointer will be in x8. */
2632 int this_regno = R0_REGNUM;
2633 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2635 reload_completed = 1;
2636 emit_note (NOTE_INSN_PROLOGUE_END);
2638 if (vcall_offset == 0)
2639 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2642 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2644 this_rtx = gen_rtx_REG (Pmode, this_regno);
2645 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2646 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2651 if (delta >= -256 && delta < 256)
2652 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2653 plus_constant (Pmode, this_rtx, delta));
2655 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2658 if (Pmode == ptr_mode)
2659 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2661 aarch64_emit_move (temp0,
2662 gen_rtx_ZERO_EXTEND (Pmode,
2663 gen_rtx_MEM (ptr_mode, addr)));
2665 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2666 addr = plus_constant (Pmode, temp0, vcall_offset);
2669 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2670 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2673 if (Pmode == ptr_mode)
2674 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2676 aarch64_emit_move (temp1,
2677 gen_rtx_SIGN_EXTEND (Pmode,
2678 gen_rtx_MEM (ptr_mode, addr)));
2680 emit_insn (gen_add2_insn (this_rtx, temp1));
2683 /* Generate a tail call to the target function. */
2684 if (!TREE_USED (function))
2686 assemble_external (function);
2687 TREE_USED (function) = 1;
2689 funexp = XEXP (DECL_RTL (function), 0);
2690 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2691 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2692 SIBLING_CALL_P (insn) = 1;
2694 insn = get_insns ();
2695 shorten_branches (insn);
2696 final_start_function (insn, file, 1);
2697 final (insn, file, 1);
2698 final_end_function ();
2700 /* Stop pretending to be a post-reload pass. */
2701 reload_completed = 0;
2705 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2707 if (GET_CODE (*x) == SYMBOL_REF)
2708 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2710 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2711 TLS offsets, not real symbol references. */
2712 if (GET_CODE (*x) == UNSPEC
2713 && XINT (*x, 1) == UNSPEC_TLS)
2720 aarch64_tls_referenced_p (rtx x)
2722 if (!TARGET_HAVE_TLS)
2725 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2730 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2732 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2733 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2744 aarch64_build_bitmask_table (void)
2746 unsigned HOST_WIDE_INT mask, imm;
2747 unsigned int log_e, e, s, r;
2748 unsigned int nimms = 0;
2750 for (log_e = 1; log_e <= 6; log_e++)
2754 mask = ~(HOST_WIDE_INT) 0;
2756 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2757 for (s = 1; s < e; s++)
2759 for (r = 0; r < e; r++)
2761 /* set s consecutive bits to 1 (s < 64) */
2762 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2763 /* rotate right by r */
2765 imm = ((imm >> r) | (imm << (e - r))) & mask;
2766 /* replicate the constant depending on SIMD size */
2768 case 1: imm |= (imm << 2);
2769 case 2: imm |= (imm << 4);
2770 case 3: imm |= (imm << 8);
2771 case 4: imm |= (imm << 16);
2772 case 5: imm |= (imm << 32);
2778 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2779 aarch64_bitmasks[nimms++] = imm;
2784 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2785 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2786 aarch64_bitmasks_cmp);
2790 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2791 a left shift of 0 or 12 bits. */
2793 aarch64_uimm12_shift (HOST_WIDE_INT val)
2795 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2796 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2801 /* Return true if val is an immediate that can be loaded into a
2802 register by a MOVZ instruction. */
2804 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2806 if (GET_MODE_SIZE (mode) > 4)
2808 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2809 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2814 /* Ignore sign extension. */
2815 val &= (HOST_WIDE_INT) 0xffffffff;
2817 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2818 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2822 /* Return true if val is a valid bitmask immediate. */
2824 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2826 if (GET_MODE_SIZE (mode) < 8)
2828 /* Replicate bit pattern. */
2829 val &= (HOST_WIDE_INT) 0xffffffff;
2832 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2833 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2837 /* Return true if val is an immediate that can be loaded into a
2838 register in a single instruction. */
2840 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2842 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2844 return aarch64_bitmask_imm (val, mode);
2848 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2852 if (GET_CODE (x) == HIGH)
2855 split_const (x, &base, &offset);
2856 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2858 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2859 != SYMBOL_FORCE_TO_MEM)
2862 /* Avoid generating a 64-bit relocation in ILP32; leave
2863 to aarch64_expand_mov_immediate to handle it properly. */
2864 return mode != ptr_mode;
2867 return aarch64_tls_referenced_p (x);
2870 /* Return true if register REGNO is a valid index register.
2871 STRICT_P is true if REG_OK_STRICT is in effect. */
2874 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2876 if (!HARD_REGISTER_NUM_P (regno))
2884 regno = reg_renumber[regno];
2886 return GP_REGNUM_P (regno);
2889 /* Return true if register REGNO is a valid base register for mode MODE.
2890 STRICT_P is true if REG_OK_STRICT is in effect. */
2893 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2895 if (!HARD_REGISTER_NUM_P (regno))
2903 regno = reg_renumber[regno];
2906 /* The fake registers will be eliminated to either the stack or
2907 hard frame pointer, both of which are usually valid base registers.
2908 Reload deals with the cases where the eliminated form isn't valid. */
2909 return (GP_REGNUM_P (regno)
2910 || regno == SP_REGNUM
2911 || regno == FRAME_POINTER_REGNUM
2912 || regno == ARG_POINTER_REGNUM);
2915 /* Return true if X is a valid base register for mode MODE.
2916 STRICT_P is true if REG_OK_STRICT is in effect. */
2919 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2921 if (!strict_p && GET_CODE (x) == SUBREG)
2924 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2927 /* Return true if address offset is a valid index. If it is, fill in INFO
2928 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2931 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2932 enum machine_mode mode, bool strict_p)
2934 enum aarch64_address_type type;
2939 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2940 && GET_MODE (x) == Pmode)
2942 type = ADDRESS_REG_REG;
2946 /* (sign_extend:DI (reg:SI)) */
2947 else if ((GET_CODE (x) == SIGN_EXTEND
2948 || GET_CODE (x) == ZERO_EXTEND)
2949 && GET_MODE (x) == DImode
2950 && GET_MODE (XEXP (x, 0)) == SImode)
2952 type = (GET_CODE (x) == SIGN_EXTEND)
2953 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2954 index = XEXP (x, 0);
2957 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2958 else if (GET_CODE (x) == MULT
2959 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2960 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2961 && GET_MODE (XEXP (x, 0)) == DImode
2962 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2963 && CONST_INT_P (XEXP (x, 1)))
2965 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2966 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2967 index = XEXP (XEXP (x, 0), 0);
2968 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2970 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2971 else if (GET_CODE (x) == ASHIFT
2972 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2973 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2974 && GET_MODE (XEXP (x, 0)) == DImode
2975 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2976 && CONST_INT_P (XEXP (x, 1)))
2978 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2979 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2980 index = XEXP (XEXP (x, 0), 0);
2981 shift = INTVAL (XEXP (x, 1));
2983 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2984 else if ((GET_CODE (x) == SIGN_EXTRACT
2985 || GET_CODE (x) == ZERO_EXTRACT)
2986 && GET_MODE (x) == DImode
2987 && GET_CODE (XEXP (x, 0)) == MULT
2988 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2989 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2991 type = (GET_CODE (x) == SIGN_EXTRACT)
2992 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2993 index = XEXP (XEXP (x, 0), 0);
2994 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2995 if (INTVAL (XEXP (x, 1)) != 32 + shift
2996 || INTVAL (XEXP (x, 2)) != 0)
2999 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3000 (const_int 0xffffffff<<shift)) */
3001 else if (GET_CODE (x) == AND
3002 && GET_MODE (x) == DImode
3003 && GET_CODE (XEXP (x, 0)) == MULT
3004 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3005 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3006 && CONST_INT_P (XEXP (x, 1)))
3008 type = ADDRESS_REG_UXTW;
3009 index = XEXP (XEXP (x, 0), 0);
3010 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3011 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3014 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3015 else if ((GET_CODE (x) == SIGN_EXTRACT
3016 || GET_CODE (x) == ZERO_EXTRACT)
3017 && GET_MODE (x) == DImode
3018 && GET_CODE (XEXP (x, 0)) == ASHIFT
3019 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3020 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3022 type = (GET_CODE (x) == SIGN_EXTRACT)
3023 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3024 index = XEXP (XEXP (x, 0), 0);
3025 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3026 if (INTVAL (XEXP (x, 1)) != 32 + shift
3027 || INTVAL (XEXP (x, 2)) != 0)
3030 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3031 (const_int 0xffffffff<<shift)) */
3032 else if (GET_CODE (x) == AND
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == ASHIFT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3037 && CONST_INT_P (XEXP (x, 1)))
3039 type = ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3042 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3045 /* (mult:P (reg:P) (const_int scale)) */
3046 else if (GET_CODE (x) == MULT
3047 && GET_MODE (x) == Pmode
3048 && GET_MODE (XEXP (x, 0)) == Pmode
3049 && CONST_INT_P (XEXP (x, 1)))
3051 type = ADDRESS_REG_REG;
3052 index = XEXP (x, 0);
3053 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3055 /* (ashift:P (reg:P) (const_int shift)) */
3056 else if (GET_CODE (x) == ASHIFT
3057 && GET_MODE (x) == Pmode
3058 && GET_MODE (XEXP (x, 0)) == Pmode
3059 && CONST_INT_P (XEXP (x, 1)))
3061 type = ADDRESS_REG_REG;
3062 index = XEXP (x, 0);
3063 shift = INTVAL (XEXP (x, 1));
3068 if (GET_CODE (index) == SUBREG)
3069 index = SUBREG_REG (index);
3072 (shift > 0 && shift <= 3
3073 && (1 << shift) == GET_MODE_SIZE (mode)))
3075 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3078 info->offset = index;
3079 info->shift = shift;
3087 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3089 return (offset >= -64 * GET_MODE_SIZE (mode)
3090 && offset < 64 * GET_MODE_SIZE (mode)
3091 && offset % GET_MODE_SIZE (mode) == 0);
3095 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3096 HOST_WIDE_INT offset)
3098 return offset >= -256 && offset < 256;
3102 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3105 && offset < 4096 * GET_MODE_SIZE (mode)
3106 && offset % GET_MODE_SIZE (mode) == 0);
3109 /* Return true if X is a valid address for machine mode MODE. If it is,
3110 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3111 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3114 aarch64_classify_address (struct aarch64_address_info *info,
3115 rtx x, enum machine_mode mode,
3116 RTX_CODE outer_code, bool strict_p)
3118 enum rtx_code code = GET_CODE (x);
3120 bool allow_reg_index_p =
3121 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3123 /* Don't support anything other than POST_INC or REG addressing for
3125 if (aarch64_vector_mode_p (mode)
3126 && (code != POST_INC && code != REG))
3133 info->type = ADDRESS_REG_IMM;
3135 info->offset = const0_rtx;
3136 return aarch64_base_register_rtx_p (x, strict_p);
3141 if (GET_MODE_SIZE (mode) != 0
3142 && CONST_INT_P (op1)
3143 && aarch64_base_register_rtx_p (op0, strict_p))
3145 HOST_WIDE_INT offset = INTVAL (op1);
3147 info->type = ADDRESS_REG_IMM;
3151 /* TImode and TFmode values are allowed in both pairs of X
3152 registers and individual Q registers. The available
3154 X,X: 7-bit signed scaled offset
3155 Q: 9-bit signed offset
3156 We conservatively require an offset representable in either mode.
3158 if (mode == TImode || mode == TFmode)
3159 return (offset_7bit_signed_scaled_p (mode, offset)
3160 && offset_9bit_signed_unscaled_p (mode, offset));
3162 if (outer_code == PARALLEL)
3163 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3164 && offset_7bit_signed_scaled_p (mode, offset));
3166 return (offset_9bit_signed_unscaled_p (mode, offset)
3167 || offset_12bit_unsigned_scaled_p (mode, offset));
3170 if (allow_reg_index_p)
3172 /* Look for base + (scaled/extended) index register. */
3173 if (aarch64_base_register_rtx_p (op0, strict_p)
3174 && aarch64_classify_index (info, op1, mode, strict_p))
3179 if (aarch64_base_register_rtx_p (op1, strict_p)
3180 && aarch64_classify_index (info, op0, mode, strict_p))
3193 info->type = ADDRESS_REG_WB;
3194 info->base = XEXP (x, 0);
3195 info->offset = NULL_RTX;
3196 return aarch64_base_register_rtx_p (info->base, strict_p);
3200 info->type = ADDRESS_REG_WB;
3201 info->base = XEXP (x, 0);
3202 if (GET_CODE (XEXP (x, 1)) == PLUS
3203 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3204 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3205 && aarch64_base_register_rtx_p (info->base, strict_p))
3207 HOST_WIDE_INT offset;
3208 info->offset = XEXP (XEXP (x, 1), 1);
3209 offset = INTVAL (info->offset);
3211 /* TImode and TFmode values are allowed in both pairs of X
3212 registers and individual Q registers. The available
3214 X,X: 7-bit signed scaled offset
3215 Q: 9-bit signed offset
3216 We conservatively require an offset representable in either mode.
3218 if (mode == TImode || mode == TFmode)
3219 return (offset_7bit_signed_scaled_p (mode, offset)
3220 && offset_9bit_signed_unscaled_p (mode, offset));
3222 if (outer_code == PARALLEL)
3223 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3224 && offset_7bit_signed_scaled_p (mode, offset));
3226 return offset_9bit_signed_unscaled_p (mode, offset);
3233 /* load literal: pc-relative constant pool entry. Only supported
3234 for SI mode or larger. */
3235 info->type = ADDRESS_SYMBOLIC;
3236 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3240 split_const (x, &sym, &addend);
3241 return (GET_CODE (sym) == LABEL_REF
3242 || (GET_CODE (sym) == SYMBOL_REF
3243 && CONSTANT_POOL_ADDRESS_P (sym)));
3248 info->type = ADDRESS_LO_SUM;
3249 info->base = XEXP (x, 0);
3250 info->offset = XEXP (x, 1);
3251 if (allow_reg_index_p
3252 && aarch64_base_register_rtx_p (info->base, strict_p))
3255 split_const (info->offset, &sym, &offs);
3256 if (GET_CODE (sym) == SYMBOL_REF
3257 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3258 == SYMBOL_SMALL_ABSOLUTE))
3260 /* The symbol and offset must be aligned to the access size. */
3262 unsigned int ref_size;
3264 if (CONSTANT_POOL_ADDRESS_P (sym))
3265 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3266 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3268 tree exp = SYMBOL_REF_DECL (sym);
3269 align = TYPE_ALIGN (TREE_TYPE (exp));
3270 align = CONSTANT_ALIGNMENT (exp, align);
3272 else if (SYMBOL_REF_DECL (sym))
3273 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3274 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3275 && SYMBOL_REF_BLOCK (sym) != NULL)
3276 align = SYMBOL_REF_BLOCK (sym)->alignment;
3278 align = BITS_PER_UNIT;
3280 ref_size = GET_MODE_SIZE (mode);
3282 ref_size = GET_MODE_SIZE (DImode);
3284 return ((INTVAL (offs) & (ref_size - 1)) == 0
3285 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3296 aarch64_symbolic_address_p (rtx x)
3300 split_const (x, &x, &offset);
3301 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3304 /* Classify the base of symbolic expression X, given that X appears in
3307 enum aarch64_symbol_type
3308 aarch64_classify_symbolic_expression (rtx x,
3309 enum aarch64_symbol_context context)
3313 split_const (x, &x, &offset);
3314 return aarch64_classify_symbol (x, context);
3318 /* Return TRUE if X is a legitimate address for accessing memory in
3321 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3323 struct aarch64_address_info addr;
3325 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3328 /* Return TRUE if X is a legitimate address for accessing memory in
3329 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3332 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3333 RTX_CODE outer_code, bool strict_p)
3335 struct aarch64_address_info addr;
3337 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3340 /* Return TRUE if rtx X is immediate constant 0.0 */
3342 aarch64_float_const_zero_rtx_p (rtx x)
3346 if (GET_MODE (x) == VOIDmode)
3349 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3350 if (REAL_VALUE_MINUS_ZERO (r))
3351 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3352 return REAL_VALUES_EQUAL (r, dconst0);
3355 /* Return the fixed registers used for condition codes. */
3358 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3361 *p2 = INVALID_REGNUM;
3366 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3368 /* All floating point compares return CCFP if it is an equality
3369 comparison, and CCFPE otherwise. */
3370 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3397 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3399 && (code == EQ || code == NE || code == LT || code == GE)
3400 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3401 || GET_CODE (x) == NEG))
3404 /* A compare with a shifted operand. Because of canonicalization,
3405 the comparison will have to be swapped when we emit the assembly
3407 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3408 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3409 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3410 || GET_CODE (x) == LSHIFTRT
3411 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3414 /* Similarly for a negated operand, but we can only do this for
3416 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3417 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3418 && (code == EQ || code == NE)
3419 && GET_CODE (x) == NEG)
3422 /* A compare of a mode narrower than SI mode against zero can be done
3423 by extending the value in the comparison. */
3424 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3426 /* Only use sign-extension if we really need it. */
3427 return ((code == GT || code == GE || code == LE || code == LT)
3428 ? CC_SESWPmode : CC_ZESWPmode);
3430 /* For everything else, return CCmode. */
3435 aarch64_get_condition_code (rtx x)
3437 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3438 enum rtx_code comp_code = GET_CODE (x);
3440 if (GET_MODE_CLASS (mode) != MODE_CC)
3441 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3449 case GE: return AARCH64_GE;
3450 case GT: return AARCH64_GT;
3451 case LE: return AARCH64_LS;
3452 case LT: return AARCH64_MI;
3453 case NE: return AARCH64_NE;
3454 case EQ: return AARCH64_EQ;
3455 case ORDERED: return AARCH64_VC;
3456 case UNORDERED: return AARCH64_VS;
3457 case UNLT: return AARCH64_LT;
3458 case UNLE: return AARCH64_LE;
3459 case UNGT: return AARCH64_HI;
3460 case UNGE: return AARCH64_PL;
3461 default: gcc_unreachable ();
3468 case NE: return AARCH64_NE;
3469 case EQ: return AARCH64_EQ;
3470 case GE: return AARCH64_GE;
3471 case GT: return AARCH64_GT;
3472 case LE: return AARCH64_LE;
3473 case LT: return AARCH64_LT;
3474 case GEU: return AARCH64_CS;
3475 case GTU: return AARCH64_HI;
3476 case LEU: return AARCH64_LS;
3477 case LTU: return AARCH64_CC;
3478 default: gcc_unreachable ();
3487 case NE: return AARCH64_NE;
3488 case EQ: return AARCH64_EQ;
3489 case GE: return AARCH64_LE;
3490 case GT: return AARCH64_LT;
3491 case LE: return AARCH64_GE;
3492 case LT: return AARCH64_GT;
3493 case GEU: return AARCH64_LS;
3494 case GTU: return AARCH64_CC;
3495 case LEU: return AARCH64_CS;
3496 case LTU: return AARCH64_HI;
3497 default: gcc_unreachable ();
3504 case NE: return AARCH64_NE;
3505 case EQ: return AARCH64_EQ;
3506 case GE: return AARCH64_PL;
3507 case LT: return AARCH64_MI;
3508 default: gcc_unreachable ();
3515 case NE: return AARCH64_NE;
3516 case EQ: return AARCH64_EQ;
3517 default: gcc_unreachable ();
3528 bit_count (unsigned HOST_WIDE_INT value)
3542 aarch64_print_operand (FILE *f, rtx x, char code)
3546 /* An integer or symbol address without a preceding # sign. */
3548 switch (GET_CODE (x))
3551 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3555 output_addr_const (f, x);
3559 if (GET_CODE (XEXP (x, 0)) == PLUS
3560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3562 output_addr_const (f, x);
3568 output_operand_lossage ("Unsupported operand for code '%c'", code);
3573 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3577 if (GET_CODE (x) != CONST_INT
3578 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3580 output_operand_lossage ("invalid operand for '%%%c'", code);
3596 output_operand_lossage ("invalid operand for '%%%c'", code);
3606 /* Print N such that 2^N == X. */
3607 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3609 output_operand_lossage ("invalid operand for '%%%c'", code);
3613 asm_fprintf (f, "%d", n);
3618 /* Print the number of non-zero bits in X (a const_int). */
3619 if (GET_CODE (x) != CONST_INT)
3621 output_operand_lossage ("invalid operand for '%%%c'", code);
3625 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3629 /* Print the higher numbered register of a pair (TImode) of regs. */
3630 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3632 output_operand_lossage ("invalid operand for '%%%c'", code);
3636 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3640 /* Print a condition (eq, ne, etc). */
3642 /* CONST_TRUE_RTX means always -- that's the default. */
3643 if (x == const_true_rtx)
3646 if (!COMPARISON_P (x))
3648 output_operand_lossage ("invalid operand for '%%%c'", code);
3652 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3656 /* Print the inverse of a condition (eq <-> ne, etc). */
3658 /* CONST_TRUE_RTX means never -- that's the default. */
3659 if (x == const_true_rtx)
3665 if (!COMPARISON_P (x))
3667 output_operand_lossage ("invalid operand for '%%%c'", code);
3671 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3672 (aarch64_get_condition_code (x))], f);
3680 /* Print a scalar FP/SIMD register name. */
3681 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3683 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3686 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3693 /* Print the first FP/SIMD register name in a list. */
3694 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3696 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3699 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3703 /* Print bottom 16 bits of integer constant in hex. */
3704 if (GET_CODE (x) != CONST_INT)
3706 output_operand_lossage ("invalid operand for '%%%c'", code);
3709 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3714 /* Print a general register name or the zero register (32-bit or
3717 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3719 asm_fprintf (f, "%czr", code);
3723 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3725 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3729 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3731 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3738 /* Print a normal operand, if it's a general register, then we
3742 output_operand_lossage ("missing operand");
3746 switch (GET_CODE (x))
3749 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3753 aarch64_memory_reference_mode = GET_MODE (x);
3754 output_address (XEXP (x, 0));
3759 output_addr_const (asm_out_file, x);
3763 asm_fprintf (f, "%wd", INTVAL (x));
3767 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3769 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3771 HOST_WIDE_INT_MAX));
3772 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3774 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3783 /* CONST_DOUBLE can represent a double-width integer.
3784 In this case, the mode of x is VOIDmode. */
3785 if (GET_MODE (x) == VOIDmode)
3787 else if (aarch64_float_const_zero_rtx_p (x))
3792 else if (aarch64_float_const_representable_p (x))
3795 char float_buf[buf_size] = {'\0'};
3797 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3798 real_to_decimal_for_mode (float_buf, &r,
3801 asm_fprintf (asm_out_file, "%s", float_buf);
3805 output_operand_lossage ("invalid constant");
3808 output_operand_lossage ("invalid operand");
3814 if (GET_CODE (x) == HIGH)
3817 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3819 case SYMBOL_SMALL_GOT:
3820 asm_fprintf (asm_out_file, ":got:");
3823 case SYMBOL_SMALL_TLSGD:
3824 asm_fprintf (asm_out_file, ":tlsgd:");
3827 case SYMBOL_SMALL_TLSDESC:
3828 asm_fprintf (asm_out_file, ":tlsdesc:");
3831 case SYMBOL_SMALL_GOTTPREL:
3832 asm_fprintf (asm_out_file, ":gottprel:");
3835 case SYMBOL_SMALL_TPREL:
3836 asm_fprintf (asm_out_file, ":tprel:");
3839 case SYMBOL_TINY_GOT:
3846 output_addr_const (asm_out_file, x);
3850 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3852 case SYMBOL_SMALL_GOT:
3853 asm_fprintf (asm_out_file, ":lo12:");
3856 case SYMBOL_SMALL_TLSGD:
3857 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3860 case SYMBOL_SMALL_TLSDESC:
3861 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3864 case SYMBOL_SMALL_GOTTPREL:
3865 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3868 case SYMBOL_SMALL_TPREL:
3869 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3872 case SYMBOL_TINY_GOT:
3873 asm_fprintf (asm_out_file, ":got:");
3879 output_addr_const (asm_out_file, x);
3884 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3886 case SYMBOL_SMALL_TPREL:
3887 asm_fprintf (asm_out_file, ":tprel_hi12:");
3892 output_addr_const (asm_out_file, x);
3896 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3902 aarch64_print_operand_address (FILE *f, rtx x)
3904 struct aarch64_address_info addr;
3906 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3910 case ADDRESS_REG_IMM:
3911 if (addr.offset == const0_rtx)
3912 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3914 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3915 INTVAL (addr.offset));
3918 case ADDRESS_REG_REG:
3919 if (addr.shift == 0)
3920 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3921 reg_names [REGNO (addr.offset)]);
3923 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3924 reg_names [REGNO (addr.offset)], addr.shift);
3927 case ADDRESS_REG_UXTW:
3928 if (addr.shift == 0)
3929 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3930 REGNO (addr.offset) - R0_REGNUM);
3932 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3933 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3936 case ADDRESS_REG_SXTW:
3937 if (addr.shift == 0)
3938 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3939 REGNO (addr.offset) - R0_REGNUM);
3941 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3942 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3945 case ADDRESS_REG_WB:
3946 switch (GET_CODE (x))
3949 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3950 GET_MODE_SIZE (aarch64_memory_reference_mode));
3953 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3954 GET_MODE_SIZE (aarch64_memory_reference_mode));
3957 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3958 GET_MODE_SIZE (aarch64_memory_reference_mode));
3961 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3962 GET_MODE_SIZE (aarch64_memory_reference_mode));
3965 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3966 INTVAL (addr.offset));
3969 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3970 INTVAL (addr.offset));
3977 case ADDRESS_LO_SUM:
3978 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3979 output_addr_const (f, addr.offset);
3980 asm_fprintf (f, "]");
3983 case ADDRESS_SYMBOLIC:
3987 output_addr_const (f, x);
3991 aarch64_label_mentioned_p (rtx x)
3996 if (GET_CODE (x) == LABEL_REF)
3999 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4000 referencing instruction, but they are constant offsets, not
4002 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4005 fmt = GET_RTX_FORMAT (GET_CODE (x));
4006 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4012 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4013 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4016 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4023 /* Implement REGNO_REG_CLASS. */
4026 aarch64_regno_regclass (unsigned regno)
4028 if (GP_REGNUM_P (regno))
4031 if (regno == SP_REGNUM)
4034 if (regno == FRAME_POINTER_REGNUM
4035 || regno == ARG_POINTER_REGNUM)
4036 return POINTER_REGS;
4038 if (FP_REGNUM_P (regno))
4039 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4044 /* Try a machine-dependent way of reloading an illegitimate address
4045 operand. If we find one, push the reload and return the new rtx. */
4048 aarch64_legitimize_reload_address (rtx *x_p,
4049 enum machine_mode mode,
4050 int opnum, int type,
4051 int ind_levels ATTRIBUTE_UNUSED)
4055 /* Do not allow mem (plus (reg, const)) if vector mode. */
4056 if (aarch64_vector_mode_p (mode)
4057 && GET_CODE (x) == PLUS
4058 && REG_P (XEXP (x, 0))
4059 && CONST_INT_P (XEXP (x, 1)))
4063 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4064 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4065 opnum, (enum reload_type) type);
4069 /* We must recognize output that we have already generated ourselves. */
4070 if (GET_CODE (x) == PLUS
4071 && GET_CODE (XEXP (x, 0)) == PLUS
4072 && REG_P (XEXP (XEXP (x, 0), 0))
4073 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4074 && CONST_INT_P (XEXP (x, 1)))
4076 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4077 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4078 opnum, (enum reload_type) type);
4082 /* We wish to handle large displacements off a base register by splitting
4083 the addend across an add and the mem insn. This can cut the number of
4084 extra insns needed from 3 to 1. It is only useful for load/store of a
4085 single register with 12 bit offset field. */
4086 if (GET_CODE (x) == PLUS
4087 && REG_P (XEXP (x, 0))
4088 && CONST_INT_P (XEXP (x, 1))
4089 && HARD_REGISTER_P (XEXP (x, 0))
4092 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4094 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4095 HOST_WIDE_INT low = val & 0xfff;
4096 HOST_WIDE_INT high = val - low;
4099 enum machine_mode xmode = GET_MODE (x);
4101 /* In ILP32, xmode can be either DImode or SImode. */
4102 gcc_assert (xmode == DImode || xmode == SImode);
4104 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4105 BLKmode alignment. */
4106 if (GET_MODE_SIZE (mode) == 0)
4109 offs = low % GET_MODE_SIZE (mode);
4111 /* Align misaligned offset by adjusting high part to compensate. */
4114 if (aarch64_uimm12_shift (high + offs))
4123 offs = GET_MODE_SIZE (mode) - offs;
4125 high = high + (low & 0x1000) - offs;
4130 /* Check for overflow. */
4131 if (high + low != val)
4134 cst = GEN_INT (high);
4135 if (!aarch64_uimm12_shift (high))
4136 cst = force_const_mem (xmode, cst);
4138 /* Reload high part into base reg, leaving the low part
4139 in the mem instruction.
4140 Note that replacing this gen_rtx_PLUS with plus_constant is
4141 wrong in this case because we rely on the
4142 (plus (plus reg c1) c2) structure being preserved so that
4143 XEXP (*p, 0) in push_reload below uses the correct term. */
4144 x = gen_rtx_PLUS (xmode,
4145 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4148 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4149 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4150 opnum, (enum reload_type) type);
4159 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4161 enum machine_mode mode,
4162 secondary_reload_info *sri)
4164 /* Without the TARGET_SIMD instructions we cannot move a Q register
4165 to a Q register directly. We need a scratch. */
4166 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4167 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4168 && reg_class_subset_p (rclass, FP_REGS))
4171 sri->icode = CODE_FOR_aarch64_reload_movtf;
4172 else if (mode == TImode)
4173 sri->icode = CODE_FOR_aarch64_reload_movti;
4177 /* A TFmode or TImode memory access should be handled via an FP_REGS
4178 because AArch64 has richer addressing modes for LDR/STR instructions
4179 than LDP/STP instructions. */
4180 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4181 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4184 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4191 aarch64_can_eliminate (const int from, const int to)
4193 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4194 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4196 if (frame_pointer_needed)
4198 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4200 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4202 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4203 && !cfun->calls_alloca)
4205 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4215 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4217 HOST_WIDE_INT frame_size;
4218 HOST_WIDE_INT offset;
4220 aarch64_layout_frame ();
4221 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4222 + crtl->outgoing_args_size
4223 + cfun->machine->saved_varargs_size);
4225 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4226 offset = frame_size;
4228 if (to == HARD_FRAME_POINTER_REGNUM)
4230 if (from == ARG_POINTER_REGNUM)
4231 return offset - crtl->outgoing_args_size;
4233 if (from == FRAME_POINTER_REGNUM)
4234 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4237 if (to == STACK_POINTER_REGNUM)
4239 if (from == FRAME_POINTER_REGNUM)
4241 HOST_WIDE_INT elim = crtl->outgoing_args_size
4242 + cfun->machine->frame.saved_regs_size
4244 - cfun->machine->frame.fp_lr_offset;
4245 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4254 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4258 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4262 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4267 aarch64_asm_trampoline_template (FILE *f)
4271 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4272 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4276 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4277 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4279 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4280 assemble_aligned_integer (4, const0_rtx);
4281 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4282 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4286 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4288 rtx fnaddr, mem, a_tramp;
4289 const int tramp_code_sz = 16;
4291 /* Don't need to copy the trailing D-words, we fill those in below. */
4292 emit_block_move (m_tramp, assemble_trampoline_template (),
4293 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4294 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4295 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4296 if (GET_MODE (fnaddr) != ptr_mode)
4297 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4298 emit_move_insn (mem, fnaddr);
4300 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4301 emit_move_insn (mem, chain_value);
4303 /* XXX We should really define a "clear_cache" pattern and use
4304 gen_clear_cache(). */
4305 a_tramp = XEXP (m_tramp, 0);
4306 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4307 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4308 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4312 static unsigned char
4313 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4324 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4325 (GET_MODE_SIZE (mode) + 7) / 8;
4339 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4341 if (regclass == POINTER_REGS)
4342 return GENERAL_REGS;
4344 if (regclass == STACK_REG)
4347 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4353 /* If it's an integer immediate that MOVI can't handle, then
4354 FP_REGS is not an option, so we return NO_REGS instead. */
4355 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4356 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4359 /* Register eliminiation can result in a request for
4360 SP+constant->FP_REGS. We cannot support such operations which
4361 use SP as source and an FP_REG as destination, so reject out
4363 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4365 rtx lhs = XEXP (x, 0);
4367 /* Look through a possible SUBREG introduced by ILP32. */
4368 if (GET_CODE (lhs) == SUBREG)
4369 lhs = SUBREG_REG (lhs);
4371 gcc_assert (REG_P (lhs));
4372 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4381 aarch64_asm_output_labelref (FILE* f, const char *name)
4383 asm_fprintf (f, "%U%s", name);
4387 aarch64_elf_asm_constructor (rtx symbol, int priority)
4389 if (priority == DEFAULT_INIT_PRIORITY)
4390 default_ctor_section_asm_out_constructor (symbol, priority);
4395 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4396 s = get_section (buf, SECTION_WRITE, NULL);
4397 switch_to_section (s);
4398 assemble_align (POINTER_SIZE);
4399 assemble_aligned_integer (POINTER_BYTES, symbol);
4404 aarch64_elf_asm_destructor (rtx symbol, int priority)
4406 if (priority == DEFAULT_INIT_PRIORITY)
4407 default_dtor_section_asm_out_destructor (symbol, priority);
4412 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4413 s = get_section (buf, SECTION_WRITE, NULL);
4414 switch_to_section (s);
4415 assemble_align (POINTER_SIZE);
4416 assemble_aligned_integer (POINTER_BYTES, symbol);
4421 aarch64_output_casesi (rtx *operands)
4425 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4427 static const char *const patterns[4][2] =
4430 "ldrb\t%w3, [%0,%w1,uxtw]",
4431 "add\t%3, %4, %w3, sxtb #2"
4434 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4435 "add\t%3, %4, %w3, sxth #2"
4438 "ldr\t%w3, [%0,%w1,uxtw #2]",
4439 "add\t%3, %4, %w3, sxtw #2"
4441 /* We assume that DImode is only generated when not optimizing and
4442 that we don't really need 64-bit address offsets. That would
4443 imply an object file with 8GB of code in a single function! */
4445 "ldr\t%w3, [%0,%w1,uxtw #2]",
4446 "add\t%3, %4, %w3, sxtw #2"
4450 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4452 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4454 gcc_assert (index >= 0 && index <= 3);
4456 /* Need to implement table size reduction, by chaning the code below. */
4457 output_asm_insn (patterns[index][0], operands);
4458 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4459 snprintf (buf, sizeof (buf),
4460 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4461 output_asm_insn (buf, operands);
4462 output_asm_insn (patterns[index][1], operands);
4463 output_asm_insn ("br\t%3", operands);
4464 assemble_label (asm_out_file, label);
4469 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4470 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4474 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4476 if (shift >= 0 && shift <= 3)
4479 for (size = 8; size <= 32; size *= 2)
4481 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4482 if (mask == bits << shift)
4490 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4491 const_rtx x ATTRIBUTE_UNUSED)
4493 /* We can't use blocks for constants when we're using a per-function
4499 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4500 rtx x ATTRIBUTE_UNUSED,
4501 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4503 /* Force all constant pool entries into the current function section. */
4504 return function_section (current_function_decl);
4510 /* Helper function for rtx cost calculation. Strip a shift expression
4511 from X. Returns the inner operand if successful, or the original
4512 expression on failure. */
4514 aarch64_strip_shift (rtx x)
4518 if ((GET_CODE (op) == ASHIFT
4519 || GET_CODE (op) == ASHIFTRT
4520 || GET_CODE (op) == LSHIFTRT)
4521 && CONST_INT_P (XEXP (op, 1)))
4522 return XEXP (op, 0);
4524 if (GET_CODE (op) == MULT
4525 && CONST_INT_P (XEXP (op, 1))
4526 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4527 return XEXP (op, 0);
4532 /* Helper function for rtx cost calculation. Strip an extend
4533 expression from X. Returns the inner operand if successful, or the
4534 original expression on failure. We deal with a number of possible
4535 canonicalization variations here. */
4537 aarch64_strip_extend (rtx x)
4541 /* Zero and sign extraction of a widened value. */
4542 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4543 && XEXP (op, 2) == const0_rtx
4544 && GET_CODE (XEXP (op, 0)) == MULT
4545 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4547 return XEXP (XEXP (op, 0), 0);
4549 /* It can also be represented (for zero-extend) as an AND with an
4551 if (GET_CODE (op) == AND
4552 && GET_CODE (XEXP (op, 0)) == MULT
4553 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4554 && CONST_INT_P (XEXP (op, 1))
4555 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4556 INTVAL (XEXP (op, 1))) != 0)
4557 return XEXP (XEXP (op, 0), 0);
4559 /* Now handle extended register, as this may also have an optional
4560 left shift by 1..4. */
4561 if (GET_CODE (op) == ASHIFT
4562 && CONST_INT_P (XEXP (op, 1))
4563 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4566 if (GET_CODE (op) == ZERO_EXTEND
4567 || GET_CODE (op) == SIGN_EXTEND)
4576 /* Helper function for rtx cost calculation. Calculate the cost of
4577 a MULT, which may be part of a multiply-accumulate rtx. Return
4578 the calculated cost of the expression, recursing manually in to
4579 operands where needed. */
4582 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4585 const struct cpu_cost_table *extra_cost
4586 = aarch64_tune_params->insn_extra_cost;
4588 bool maybe_fma = (outer == PLUS || outer == MINUS);
4589 enum machine_mode mode = GET_MODE (x);
4591 gcc_checking_assert (code == MULT);
4596 if (VECTOR_MODE_P (mode))
4597 mode = GET_MODE_INNER (mode);
4599 /* Integer multiply/fma. */
4600 if (GET_MODE_CLASS (mode) == MODE_INT)
4602 /* The multiply will be canonicalized as a shift, cost it as such. */
4603 if (CONST_INT_P (op1)
4604 && exact_log2 (INTVAL (op1)) > 0)
4609 /* ADD (shifted register). */
4610 cost += extra_cost->alu.arith_shift;
4612 /* LSL (immediate). */
4613 cost += extra_cost->alu.shift;
4616 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4621 /* Integer multiplies or FMAs have zero/sign extending variants. */
4622 if ((GET_CODE (op0) == ZERO_EXTEND
4623 && GET_CODE (op1) == ZERO_EXTEND)
4624 || (GET_CODE (op0) == SIGN_EXTEND
4625 && GET_CODE (op1) == SIGN_EXTEND))
4627 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4633 /* MADD/SMADDL/UMADDL. */
4634 cost += extra_cost->mult[0].extend_add;
4636 /* MUL/SMULL/UMULL. */
4637 cost += extra_cost->mult[0].extend;
4643 /* This is either an integer multiply or an FMA. In both cases
4644 we want to recurse and cost the operands. */
4645 cost += rtx_cost (op0, MULT, 0, speed)
4646 + rtx_cost (op1, MULT, 1, speed);
4652 cost += extra_cost->mult[mode == DImode].add;
4655 cost += extra_cost->mult[mode == DImode].simple;
4664 /* Floating-point FMA can also support negations of the
4666 if (GET_CODE (op0) == NEG)
4669 op0 = XEXP (op0, 0);
4671 if (GET_CODE (op1) == NEG)
4674 op1 = XEXP (op1, 0);
4678 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4679 cost += extra_cost->fp[mode == DFmode].fma;
4682 cost += extra_cost->fp[mode == DFmode].mult;
4685 cost += rtx_cost (op0, MULT, 0, speed)
4686 + rtx_cost (op1, MULT, 1, speed);
4692 aarch64_address_cost (rtx x,
4693 enum machine_mode mode,
4694 addr_space_t as ATTRIBUTE_UNUSED,
4697 enum rtx_code c = GET_CODE (x);
4698 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4699 struct aarch64_address_info info;
4703 if (!aarch64_classify_address (&info, x, mode, c, false))
4705 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4707 /* This is a CONST or SYMBOL ref which will be split
4708 in a different way depending on the code model in use.
4709 Cost it through the generic infrastructure. */
4710 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4711 /* Divide through by the cost of one instruction to
4712 bring it to the same units as the address costs. */
4713 cost_symbol_ref /= COSTS_N_INSNS (1);
4714 /* The cost is then the cost of preparing the address,
4715 followed by an immediate (possibly 0) offset. */
4716 return cost_symbol_ref + addr_cost->imm_offset;
4720 /* This is most likely a jump table from a case
4722 return addr_cost->register_offset;
4728 case ADDRESS_LO_SUM:
4729 case ADDRESS_SYMBOLIC:
4730 case ADDRESS_REG_IMM:
4731 cost += addr_cost->imm_offset;
4734 case ADDRESS_REG_WB:
4735 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4736 cost += addr_cost->pre_modify;
4737 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4738 cost += addr_cost->post_modify;
4744 case ADDRESS_REG_REG:
4745 cost += addr_cost->register_offset;
4748 case ADDRESS_REG_UXTW:
4749 case ADDRESS_REG_SXTW:
4750 cost += addr_cost->register_extend;
4760 /* For the sake of calculating the cost of the shifted register
4761 component, we can treat same sized modes in the same way. */
4762 switch (GET_MODE_BITSIZE (mode))
4765 cost += addr_cost->addr_scale_costs.hi;
4769 cost += addr_cost->addr_scale_costs.si;
4773 cost += addr_cost->addr_scale_costs.di;
4776 /* We can't tell, or this is a 128-bit vector. */
4778 cost += addr_cost->addr_scale_costs.ti;
4786 /* Calculate the cost of calculating X, storing it in *COST. Result
4787 is true if the total cost of the operation has now been calculated. */
4789 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4790 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4793 const struct cpu_cost_table *extra_cost
4794 = aarch64_tune_params->insn_extra_cost;
4795 enum machine_mode mode = GET_MODE (x);
4797 /* By default, assume that everything has equivalent cost to the
4798 cheapest instruction. Any additional costs are applied as a delta
4799 above this default. */
4800 *cost = COSTS_N_INSNS (1);
4802 /* TODO: The cost infrastructure currently does not handle
4803 vector operations. Assume that all vector operations
4804 are equally expensive. */
4805 if (VECTOR_MODE_P (mode))
4808 *cost += extra_cost->vect.alu;
4815 /* The cost depends entirely on the operands to SET. */
4820 switch (GET_CODE (op0))
4825 rtx address = XEXP (op0, 0);
4826 if (GET_MODE_CLASS (mode) == MODE_INT)
4827 *cost += extra_cost->ldst.store;
4828 else if (mode == SFmode)
4829 *cost += extra_cost->ldst.storef;
4830 else if (mode == DFmode)
4831 *cost += extra_cost->ldst.stored;
4834 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4838 *cost += rtx_cost (op1, SET, 1, speed);
4842 if (! REG_P (SUBREG_REG (op0)))
4843 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4847 /* const0_rtx is in general free, but we will use an
4848 instruction to set a register to 0. */
4849 if (REG_P (op1) || op1 == const0_rtx)
4851 /* The cost is 1 per register copied. */
4852 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4854 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4857 /* Cost is just the cost of the RHS of the set. */
4858 *cost += rtx_cost (op1, SET, 1, speed);
4863 /* Bit-field insertion. Strip any redundant widening of
4864 the RHS to meet the width of the target. */
4865 if (GET_CODE (op1) == SUBREG)
4866 op1 = SUBREG_REG (op1);
4867 if ((GET_CODE (op1) == ZERO_EXTEND
4868 || GET_CODE (op1) == SIGN_EXTEND)
4869 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4870 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4871 >= INTVAL (XEXP (op0, 1))))
4872 op1 = XEXP (op1, 0);
4874 if (CONST_INT_P (op1))
4876 /* MOV immediate is assumed to always be cheap. */
4877 *cost = COSTS_N_INSNS (1);
4883 *cost += extra_cost->alu.bfi;
4884 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4890 /* We can't make sense of this, assume default cost. */
4891 *cost = COSTS_N_INSNS (1);
4897 /* If an instruction can incorporate a constant within the
4898 instruction, the instruction's expression avoids calling
4899 rtx_cost() on the constant. If rtx_cost() is called on a
4900 constant, then it is usually because the constant must be
4901 moved into a register by one or more instructions.
4903 The exception is constant 0, which can be expressed
4904 as XZR/WZR and is therefore free. The exception to this is
4905 if we have (set (reg) (const0_rtx)) in which case we must cost
4906 the move. However, we can catch that when we cost the SET, so
4907 we don't need to consider that here. */
4908 if (x == const0_rtx)
4912 /* To an approximation, building any other constant is
4913 proportionally expensive to the number of instructions
4914 required to build that constant. This is true whether we
4915 are compiling for SPEED or otherwise. */
4916 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4925 /* mov[df,sf]_aarch64. */
4926 if (aarch64_float_const_representable_p (x))
4927 /* FMOV (scalar immediate). */
4928 *cost += extra_cost->fp[mode == DFmode].fpconst;
4929 else if (!aarch64_float_const_zero_rtx_p (x))
4931 /* This will be a load from memory. */
4933 *cost += extra_cost->ldst.loadd;
4935 *cost += extra_cost->ldst.loadf;
4938 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4939 or MOV v0.s[0], wzr - neither of which are modeled by the
4940 cost tables. Just use the default cost. */
4950 /* For loads we want the base cost of a load, plus an
4951 approximation for the additional cost of the addressing
4953 rtx address = XEXP (x, 0);
4954 if (GET_MODE_CLASS (mode) == MODE_INT)
4955 *cost += extra_cost->ldst.load;
4956 else if (mode == SFmode)
4957 *cost += extra_cost->ldst.loadf;
4958 else if (mode == DFmode)
4959 *cost += extra_cost->ldst.loadd;
4962 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4971 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4973 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4974 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4977 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4981 /* Cost this as SUB wzr, X. */
4982 op0 = CONST0_RTX (GET_MODE (x));
4987 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4989 /* Support (neg(fma...)) as a single instruction only if
4990 sign of zeros is unimportant. This matches the decision
4991 making in aarch64.md. */
4992 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4995 *cost = rtx_cost (op0, NEG, 0, speed);
5000 *cost += extra_cost->fp[mode == DFmode].neg;
5010 if (op1 == const0_rtx
5011 && GET_CODE (op0) == AND)
5017 /* Comparisons can work if the order is swapped.
5018 Canonicalization puts the more complex operation first, but
5019 we want it in op1. */
5021 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5034 /* Detect valid immediates. */
5035 if ((GET_MODE_CLASS (mode) == MODE_INT
5036 || (GET_MODE_CLASS (mode) == MODE_CC
5037 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5038 && CONST_INT_P (op1)
5039 && aarch64_uimm12_shift (INTVAL (op1)))
5041 *cost += rtx_cost (op0, MINUS, 0, speed);
5044 /* SUB(S) (immediate). */
5045 *cost += extra_cost->alu.arith;
5050 rtx new_op1 = aarch64_strip_extend (op1);
5052 /* Cost this as an FMA-alike operation. */
5053 if ((GET_CODE (new_op1) == MULT
5054 || GET_CODE (new_op1) == ASHIFT)
5057 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5058 (enum rtx_code) code,
5060 *cost += rtx_cost (op0, MINUS, 0, speed);
5064 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5068 if (GET_MODE_CLASS (mode) == MODE_INT)
5070 *cost += extra_cost->alu.arith;
5071 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5073 *cost += extra_cost->fp[mode == DFmode].addsub;
5085 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5086 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5089 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5090 *cost += rtx_cost (op1, PLUS, 1, speed);
5094 if (GET_MODE_CLASS (mode) == MODE_INT
5095 && CONST_INT_P (op1)
5096 && aarch64_uimm12_shift (INTVAL (op1)))
5098 *cost += rtx_cost (op0, PLUS, 0, speed);
5101 /* ADD (immediate). */
5102 *cost += extra_cost->alu.arith;
5106 /* Strip any extend, leave shifts behind as we will
5107 cost them through mult_cost. */
5108 new_op0 = aarch64_strip_extend (op0);
5110 if (GET_CODE (new_op0) == MULT
5111 || GET_CODE (new_op0) == ASHIFT)
5113 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5115 *cost += rtx_cost (op1, PLUS, 1, speed);
5119 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5120 + rtx_cost (op1, PLUS, 1, speed));
5124 if (GET_MODE_CLASS (mode) == MODE_INT)
5126 *cost += extra_cost->alu.arith;
5127 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5129 *cost += extra_cost->fp[mode == DFmode].addsub;
5142 && GET_CODE (op0) == MULT
5143 && CONST_INT_P (XEXP (op0, 1))
5144 && CONST_INT_P (op1)
5145 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5148 /* This is a UBFM/SBFM. */
5149 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5151 *cost += extra_cost->alu.bfx;
5155 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5157 /* We possibly get the immediate for free, this is not
5159 if (CONST_INT_P (op1)
5160 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5162 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5165 *cost += extra_cost->alu.logical;
5173 /* Handle ORN, EON, or BIC. */
5174 if (GET_CODE (op0) == NOT)
5175 op0 = XEXP (op0, 0);
5177 new_op0 = aarch64_strip_shift (op0);
5179 /* If we had a shift on op0 then this is a logical-shift-
5180 by-register/immediate operation. Otherwise, this is just
5181 a logical operation. */
5186 /* Shift by immediate. */
5187 if (CONST_INT_P (XEXP (op0, 1)))
5188 *cost += extra_cost->alu.log_shift;
5190 *cost += extra_cost->alu.log_shift_reg;
5193 *cost += extra_cost->alu.logical;
5196 /* In both cases we want to cost both operands. */
5197 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5198 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5208 *cost += extra_cost->alu.logical;
5210 /* The logical instruction could have the shifted register form,
5211 but the cost is the same if the shift is processed as a separate
5212 instruction, so we don't bother with it here. */
5218 /* If a value is written in SI mode, then zero extended to DI
5219 mode, the operation will in general be free as a write to
5220 a 'w' register implicitly zeroes the upper bits of an 'x'
5221 register. However, if this is
5223 (set (reg) (zero_extend (reg)))
5225 we must cost the explicit register move. */
5227 && GET_MODE (op0) == SImode
5230 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5232 if (!op_cost && speed)
5234 *cost += extra_cost->alu.extend;
5236 /* Free, the cost is that of the SI mode operation. */
5241 else if (MEM_P (XEXP (x, 0)))
5243 /* All loads can zero extend to any size for free. */
5244 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5250 *cost += extra_cost->alu.extend;
5255 if (MEM_P (XEXP (x, 0)))
5260 rtx address = XEXP (XEXP (x, 0), 0);
5261 *cost += extra_cost->ldst.load_sign_extend;
5264 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5271 *cost += extra_cost->alu.extend;
5275 if (!CONST_INT_P (XEXP (x, 1)))
5276 *cost += COSTS_N_INSNS (2);
5283 /* Shifting by a register often takes an extra cycle. */
5284 if (speed && !CONST_INT_P (XEXP (x, 1)))
5285 *cost += extra_cost->alu.arith_shift_reg;
5287 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5291 if (!CONSTANT_P (XEXP (x, 0)))
5292 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5296 if (!CONSTANT_P (XEXP (x, 1)))
5297 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5298 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5303 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5307 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5308 /* aarch64_rtx_mult_cost always handles recursion to its
5314 *cost = COSTS_N_INSNS (2);
5317 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5318 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5319 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5320 else if (GET_MODE (x) == DFmode)
5321 *cost += (extra_cost->fp[1].mult
5322 + extra_cost->fp[1].div);
5323 else if (GET_MODE (x) == SFmode)
5324 *cost += (extra_cost->fp[0].mult
5325 + extra_cost->fp[0].div);
5327 return false; /* All arguments need to be in registers. */
5331 *cost = COSTS_N_INSNS (1);
5334 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5335 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5336 else if (GET_MODE (x) == DFmode)
5337 *cost += extra_cost->fp[1].div;
5338 else if (GET_MODE (x) == SFmode)
5339 *cost += extra_cost->fp[0].div;
5341 return false; /* All arguments need to be in registers. */
5349 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5350 calculated for X. This cost is stored in *COST. Returns true
5351 if the total cost of X was calculated. */
5353 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5354 int param, int *cost, bool speed)
5356 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5358 if (dump_file && (dump_flags & TDF_DETAILS))
5360 print_rtl_single (dump_file, x);
5361 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5362 speed ? "Hot" : "Cold",
5363 *cost, result ? "final" : "partial");
5370 aarch64_register_move_cost (enum machine_mode mode,
5371 reg_class_t from_i, reg_class_t to_i)
5373 enum reg_class from = (enum reg_class) from_i;
5374 enum reg_class to = (enum reg_class) to_i;
5375 const struct cpu_regmove_cost *regmove_cost
5376 = aarch64_tune_params->regmove_cost;
5378 /* Moving between GPR and stack cost is the same as GP2GP. */
5379 if ((from == GENERAL_REGS && to == STACK_REG)
5380 || (to == GENERAL_REGS && from == STACK_REG))
5381 return regmove_cost->GP2GP;
5383 /* To/From the stack register, we move via the gprs. */
5384 if (to == STACK_REG || from == STACK_REG)
5385 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5386 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5388 if (from == GENERAL_REGS && to == GENERAL_REGS)
5389 return regmove_cost->GP2GP;
5390 else if (from == GENERAL_REGS)
5391 return regmove_cost->GP2FP;
5392 else if (to == GENERAL_REGS)
5393 return regmove_cost->FP2GP;
5395 /* When AdvSIMD instructions are disabled it is not possible to move
5396 a 128-bit value directly between Q registers. This is handled in
5397 secondary reload. A general register is used as a scratch to move
5398 the upper DI value and the lower DI value is moved directly,
5399 hence the cost is the sum of three moves. */
5400 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5401 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5403 return regmove_cost->FP2FP;
5407 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5408 reg_class_t rclass ATTRIBUTE_UNUSED,
5409 bool in ATTRIBUTE_UNUSED)
5411 return aarch64_tune_params->memmov_cost;
5414 /* Return the number of instructions that can be issued per cycle. */
5416 aarch64_sched_issue_rate (void)
5418 return aarch64_tune_params->issue_rate;
5421 /* Vectorizer cost model target hooks. */
5423 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5425 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5427 int misalign ATTRIBUTE_UNUSED)
5431 switch (type_of_cost)
5434 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5437 return aarch64_tune_params->vec_costs->scalar_load_cost;
5440 return aarch64_tune_params->vec_costs->scalar_store_cost;
5443 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5446 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5449 return aarch64_tune_params->vec_costs->vec_store_cost;
5452 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5455 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5457 case unaligned_load:
5458 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5460 case unaligned_store:
5461 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5463 case cond_branch_taken:
5464 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5466 case cond_branch_not_taken:
5467 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5470 case vec_promote_demote:
5471 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5474 elements = TYPE_VECTOR_SUBPARTS (vectype);
5475 return elements / 2 + 1;
5482 /* Implement targetm.vectorize.add_stmt_cost. */
5484 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5485 struct _stmt_vec_info *stmt_info, int misalign,
5486 enum vect_cost_model_location where)
5488 unsigned *cost = (unsigned *) data;
5489 unsigned retval = 0;
5491 if (flag_vect_cost_model)
5493 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5495 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5497 /* Statements in an inner loop relative to the loop being
5498 vectorized are weighted more heavily. The value here is
5499 a function (linear for now) of the loop nest level. */
5500 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5502 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5503 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5504 unsigned nest_level = loop_depth (loop);
5506 count *= nest_level;
5509 retval = (unsigned) (count * stmt_cost);
5510 cost[where] += retval;
5516 static void initialize_aarch64_code_model (void);
5518 /* Parse the architecture extension string. */
5521 aarch64_parse_extension (char *str)
5523 /* The extension string is parsed left to right. */
5524 const struct aarch64_option_extension *opt = NULL;
5526 /* Flag to say whether we are adding or removing an extension. */
5527 int adding_ext = -1;
5529 while (str != NULL && *str != 0)
5535 ext = strchr (str, '+');
5542 if (len >= 2 && strncmp (str, "no", 2) == 0)
5553 error ("missing feature modifier after %qs", "+no");
5557 /* Scan over the extensions table trying to find an exact match. */
5558 for (opt = all_extensions; opt->name != NULL; opt++)
5560 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5562 /* Add or remove the extension. */
5564 aarch64_isa_flags |= opt->flags_on;
5566 aarch64_isa_flags &= ~(opt->flags_off);
5571 if (opt->name == NULL)
5573 /* Extension not found in list. */
5574 error ("unknown feature modifier %qs", str);
5584 /* Parse the ARCH string. */
5587 aarch64_parse_arch (void)
5590 const struct processor *arch;
5591 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5594 strcpy (str, aarch64_arch_string);
5596 ext = strchr (str, '+');
5605 error ("missing arch name in -march=%qs", str);
5609 /* Loop through the list of supported ARCHs to find a match. */
5610 for (arch = all_architectures; arch->name != NULL; arch++)
5612 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5614 selected_arch = arch;
5615 aarch64_isa_flags = selected_arch->flags;
5618 selected_cpu = &all_cores[selected_arch->core];
5622 /* ARCH string contains at least one extension. */
5623 aarch64_parse_extension (ext);
5626 if (strcmp (selected_arch->arch, selected_cpu->arch))
5628 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5629 selected_cpu->name, selected_arch->name);
5636 /* ARCH name not found in list. */
5637 error ("unknown value %qs for -march", str);
5641 /* Parse the CPU string. */
5644 aarch64_parse_cpu (void)
5647 const struct processor *cpu;
5648 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5651 strcpy (str, aarch64_cpu_string);
5653 ext = strchr (str, '+');
5662 error ("missing cpu name in -mcpu=%qs", str);
5666 /* Loop through the list of supported CPUs to find a match. */
5667 for (cpu = all_cores; cpu->name != NULL; cpu++)
5669 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5672 selected_tune = cpu;
5673 aarch64_isa_flags = selected_cpu->flags;
5677 /* CPU string contains at least one extension. */
5678 aarch64_parse_extension (ext);
5685 /* CPU name not found in list. */
5686 error ("unknown value %qs for -mcpu", str);
5690 /* Parse the TUNE string. */
5693 aarch64_parse_tune (void)
5695 const struct processor *cpu;
5696 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5697 strcpy (str, aarch64_tune_string);
5699 /* Loop through the list of supported CPUs to find a match. */
5700 for (cpu = all_cores; cpu->name != NULL; cpu++)
5702 if (strcmp (cpu->name, str) == 0)
5704 selected_tune = cpu;
5709 /* CPU name not found in list. */
5710 error ("unknown value %qs for -mtune", str);
5715 /* Implement TARGET_OPTION_OVERRIDE. */
5718 aarch64_override_options (void)
5720 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5721 If either of -march or -mtune is given, they override their
5722 respective component of -mcpu.
5724 So, first parse AARCH64_CPU_STRING, then the others, be careful
5725 with -march as, if -mcpu is not present on the command line, march
5726 must set a sensible default CPU. */
5727 if (aarch64_cpu_string)
5729 aarch64_parse_cpu ();
5732 if (aarch64_arch_string)
5734 aarch64_parse_arch ();
5737 if (aarch64_tune_string)
5739 aarch64_parse_tune ();
5742 #ifndef HAVE_AS_MABI_OPTION
5743 /* The compiler may have been configured with 2.23.* binutils, which does
5744 not have support for ILP32. */
5746 error ("Assembler does not support -mabi=ilp32");
5749 initialize_aarch64_code_model ();
5751 aarch64_build_bitmask_table ();
5753 /* This target defaults to strict volatile bitfields. */
5754 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5755 flag_strict_volatile_bitfields = 1;
5757 /* If the user did not specify a processor, choose the default
5758 one for them. This will be the CPU set during configuration using
5759 --with-cpu, otherwise it is "generic". */
5762 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5763 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5766 gcc_assert (selected_cpu);
5768 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5770 selected_tune = &all_cores[selected_cpu->core];
5772 aarch64_tune_flags = selected_tune->flags;
5773 aarch64_tune = selected_tune->core;
5774 aarch64_tune_params = selected_tune->tune;
5776 if (aarch64_fix_a53_err835769 == 2)
5778 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5779 aarch64_fix_a53_err835769 = 1;
5781 aarch64_fix_a53_err835769 = 0;
5785 aarch64_override_options_after_change ();
5788 /* Implement targetm.override_options_after_change. */
5791 aarch64_override_options_after_change (void)
5793 if (flag_omit_frame_pointer)
5794 flag_omit_leaf_frame_pointer = false;
5795 else if (flag_omit_leaf_frame_pointer)
5796 flag_omit_frame_pointer = true;
5799 static struct machine_function *
5800 aarch64_init_machine_status (void)
5802 struct machine_function *machine;
5803 machine = ggc_alloc_cleared_machine_function ();
5808 aarch64_init_expanders (void)
5810 init_machine_status = aarch64_init_machine_status;
5813 /* A checking mechanism for the implementation of the various code models. */
5815 initialize_aarch64_code_model (void)
5819 switch (aarch64_cmodel_var)
5821 case AARCH64_CMODEL_TINY:
5822 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5824 case AARCH64_CMODEL_SMALL:
5825 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5827 case AARCH64_CMODEL_LARGE:
5828 sorry ("code model %qs with -f%s", "large",
5829 flag_pic > 1 ? "PIC" : "pic");
5835 aarch64_cmodel = aarch64_cmodel_var;
5838 /* Return true if SYMBOL_REF X binds locally. */
5841 aarch64_symbol_binds_local_p (const_rtx x)
5843 return (SYMBOL_REF_DECL (x)
5844 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5845 : SYMBOL_REF_LOCAL_P (x));
5848 /* Return true if SYMBOL_REF X is thread local */
5850 aarch64_tls_symbol_p (rtx x)
5852 if (! TARGET_HAVE_TLS)
5855 if (GET_CODE (x) != SYMBOL_REF)
5858 return SYMBOL_REF_TLS_MODEL (x) != 0;
5861 /* Classify a TLS symbol into one of the TLS kinds. */
5862 enum aarch64_symbol_type
5863 aarch64_classify_tls_symbol (rtx x)
5865 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5869 case TLS_MODEL_GLOBAL_DYNAMIC:
5870 case TLS_MODEL_LOCAL_DYNAMIC:
5871 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5873 case TLS_MODEL_INITIAL_EXEC:
5874 return SYMBOL_SMALL_GOTTPREL;
5876 case TLS_MODEL_LOCAL_EXEC:
5877 return SYMBOL_SMALL_TPREL;
5879 case TLS_MODEL_EMULATED:
5880 case TLS_MODEL_NONE:
5881 return SYMBOL_FORCE_TO_MEM;
5888 /* Return the method that should be used to access SYMBOL_REF or
5889 LABEL_REF X in context CONTEXT. */
5891 enum aarch64_symbol_type
5892 aarch64_classify_symbol (rtx x,
5893 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5895 if (GET_CODE (x) == LABEL_REF)
5897 switch (aarch64_cmodel)
5899 case AARCH64_CMODEL_LARGE:
5900 return SYMBOL_FORCE_TO_MEM;
5902 case AARCH64_CMODEL_TINY_PIC:
5903 case AARCH64_CMODEL_TINY:
5904 return SYMBOL_TINY_ABSOLUTE;
5906 case AARCH64_CMODEL_SMALL_PIC:
5907 case AARCH64_CMODEL_SMALL:
5908 return SYMBOL_SMALL_ABSOLUTE;
5915 if (GET_CODE (x) == SYMBOL_REF)
5917 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5918 return SYMBOL_FORCE_TO_MEM;
5920 if (aarch64_tls_symbol_p (x))
5921 return aarch64_classify_tls_symbol (x);
5923 switch (aarch64_cmodel)
5925 case AARCH64_CMODEL_TINY:
5926 if (SYMBOL_REF_WEAK (x))
5927 return SYMBOL_FORCE_TO_MEM;
5928 return SYMBOL_TINY_ABSOLUTE;
5930 case AARCH64_CMODEL_SMALL:
5931 if (SYMBOL_REF_WEAK (x))
5932 return SYMBOL_FORCE_TO_MEM;
5933 return SYMBOL_SMALL_ABSOLUTE;
5935 case AARCH64_CMODEL_TINY_PIC:
5936 if (!aarch64_symbol_binds_local_p (x))
5937 return SYMBOL_TINY_GOT;
5938 return SYMBOL_TINY_ABSOLUTE;
5940 case AARCH64_CMODEL_SMALL_PIC:
5941 if (!aarch64_symbol_binds_local_p (x))
5942 return SYMBOL_SMALL_GOT;
5943 return SYMBOL_SMALL_ABSOLUTE;
5950 /* By default push everything into the constant pool. */
5951 return SYMBOL_FORCE_TO_MEM;
5955 aarch64_constant_address_p (rtx x)
5957 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5961 aarch64_legitimate_pic_operand_p (rtx x)
5963 if (GET_CODE (x) == SYMBOL_REF
5964 || (GET_CODE (x) == CONST
5965 && GET_CODE (XEXP (x, 0)) == PLUS
5966 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5972 /* Return true if X holds either a quarter-precision or
5973 floating-point +0.0 constant. */
5975 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5977 if (!CONST_DOUBLE_P (x))
5980 /* TODO: We could handle moving 0.0 to a TFmode register,
5981 but first we would like to refactor the movtf_aarch64
5982 to be more amicable to split moves properly and
5983 correctly gate on TARGET_SIMD. For now - reject all
5984 constants which are not to SFmode or DFmode registers. */
5985 if (!(mode == SFmode || mode == DFmode))
5988 if (aarch64_float_const_zero_rtx_p (x))
5990 return aarch64_float_const_representable_p (x);
5994 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5996 /* Do not allow vector struct mode constants. We could support
5997 0 and -1 easily, but they need support in aarch64-simd.md. */
5998 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6001 /* This could probably go away because
6002 we now decompose CONST_INTs according to expand_mov_immediate. */
6003 if ((GET_CODE (x) == CONST_VECTOR
6004 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6005 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6006 return !targetm.cannot_force_const_mem (mode, x);
6008 if (GET_CODE (x) == HIGH
6009 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6012 return aarch64_constant_address_p (x);
6016 aarch64_load_tp (rtx target)
6019 || GET_MODE (target) != Pmode
6020 || !register_operand (target, Pmode))
6021 target = gen_reg_rtx (Pmode);
6023 /* Can return in any reg. */
6024 emit_insn (gen_aarch64_load_tp_hard (target));
6028 /* On AAPCS systems, this is the "struct __va_list". */
6029 static GTY(()) tree va_list_type;
6031 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6032 Return the type to use as __builtin_va_list.
6034 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6046 aarch64_build_builtin_va_list (void)
6049 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6051 /* Create the type. */
6052 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6053 /* Give it the required name. */
6054 va_list_name = build_decl (BUILTINS_LOCATION,
6056 get_identifier ("__va_list"),
6058 DECL_ARTIFICIAL (va_list_name) = 1;
6059 TYPE_NAME (va_list_type) = va_list_name;
6060 TYPE_STUB_DECL (va_list_type) = va_list_name;
6062 /* Create the fields. */
6063 f_stack = build_decl (BUILTINS_LOCATION,
6064 FIELD_DECL, get_identifier ("__stack"),
6066 f_grtop = build_decl (BUILTINS_LOCATION,
6067 FIELD_DECL, get_identifier ("__gr_top"),
6069 f_vrtop = build_decl (BUILTINS_LOCATION,
6070 FIELD_DECL, get_identifier ("__vr_top"),
6072 f_groff = build_decl (BUILTINS_LOCATION,
6073 FIELD_DECL, get_identifier ("__gr_offs"),
6075 f_vroff = build_decl (BUILTINS_LOCATION,
6076 FIELD_DECL, get_identifier ("__vr_offs"),
6079 DECL_ARTIFICIAL (f_stack) = 1;
6080 DECL_ARTIFICIAL (f_grtop) = 1;
6081 DECL_ARTIFICIAL (f_vrtop) = 1;
6082 DECL_ARTIFICIAL (f_groff) = 1;
6083 DECL_ARTIFICIAL (f_vroff) = 1;
6085 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6086 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6087 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6088 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6089 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6091 TYPE_FIELDS (va_list_type) = f_stack;
6092 DECL_CHAIN (f_stack) = f_grtop;
6093 DECL_CHAIN (f_grtop) = f_vrtop;
6094 DECL_CHAIN (f_vrtop) = f_groff;
6095 DECL_CHAIN (f_groff) = f_vroff;
6097 /* Compute its layout. */
6098 layout_type (va_list_type);
6100 return va_list_type;
6103 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6105 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6107 const CUMULATIVE_ARGS *cum;
6108 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6109 tree stack, grtop, vrtop, groff, vroff;
6111 int gr_save_area_size;
6112 int vr_save_area_size;
6115 cum = &crtl->args.info;
6117 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6119 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6121 if (TARGET_GENERAL_REGS_ONLY)
6123 if (cum->aapcs_nvrn > 0)
6124 sorry ("%qs and floating point or vector arguments",
6125 "-mgeneral-regs-only");
6126 vr_save_area_size = 0;
6129 f_stack = TYPE_FIELDS (va_list_type_node);
6130 f_grtop = DECL_CHAIN (f_stack);
6131 f_vrtop = DECL_CHAIN (f_grtop);
6132 f_groff = DECL_CHAIN (f_vrtop);
6133 f_vroff = DECL_CHAIN (f_groff);
6135 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6137 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6139 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6141 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6143 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6146 /* Emit code to initialize STACK, which points to the next varargs stack
6147 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6148 by named arguments. STACK is 8-byte aligned. */
6149 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6150 if (cum->aapcs_stack_size > 0)
6151 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6152 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6155 /* Emit code to initialize GRTOP, the top of the GR save area.
6156 virtual_incoming_args_rtx should have been 16 byte aligned. */
6157 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6158 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6159 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6161 /* Emit code to initialize VRTOP, the top of the VR save area.
6162 This address is gr_save_area_bytes below GRTOP, rounded
6163 down to the next 16-byte boundary. */
6164 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6165 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6166 STACK_BOUNDARY / BITS_PER_UNIT);
6169 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6170 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6171 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6173 /* Emit code to initialize GROFF, the offset from GRTOP of the
6174 next GPR argument. */
6175 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6176 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6177 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6179 /* Likewise emit code to initialize VROFF, the offset from FTOP
6180 of the next VR argument. */
6181 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6182 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6183 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6186 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6189 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6190 gimple_seq *post_p ATTRIBUTE_UNUSED)
6194 bool is_ha; /* is HFA or HVA. */
6195 bool dw_align; /* double-word align. */
6196 enum machine_mode ag_mode = VOIDmode;
6198 enum machine_mode mode;
6200 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6201 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6202 HOST_WIDE_INT size, rsize, adjust, align;
6203 tree t, u, cond1, cond2;
6205 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6207 type = build_pointer_type (type);
6209 mode = TYPE_MODE (type);
6211 f_stack = TYPE_FIELDS (va_list_type_node);
6212 f_grtop = DECL_CHAIN (f_stack);
6213 f_vrtop = DECL_CHAIN (f_grtop);
6214 f_groff = DECL_CHAIN (f_vrtop);
6215 f_vroff = DECL_CHAIN (f_groff);
6217 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6218 f_stack, NULL_TREE);
6219 size = int_size_in_bytes (type);
6220 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6224 if (aarch64_vfp_is_call_or_return_candidate (mode,
6230 /* TYPE passed in fp/simd registers. */
6231 if (TARGET_GENERAL_REGS_ONLY)
6232 sorry ("%qs and floating point or vector arguments",
6233 "-mgeneral-regs-only");
6235 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6236 unshare_expr (valist), f_vrtop, NULL_TREE);
6237 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6238 unshare_expr (valist), f_vroff, NULL_TREE);
6240 rsize = nregs * UNITS_PER_VREG;
6244 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6245 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6247 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6248 && size < UNITS_PER_VREG)
6250 adjust = UNITS_PER_VREG - size;
6255 /* TYPE passed in general registers. */
6256 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6257 unshare_expr (valist), f_grtop, NULL_TREE);
6258 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6259 unshare_expr (valist), f_groff, NULL_TREE);
6260 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6261 nregs = rsize / UNITS_PER_WORD;
6266 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6267 && size < UNITS_PER_WORD)
6269 adjust = UNITS_PER_WORD - size;
6273 /* Get a local temporary for the field value. */
6274 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6276 /* Emit code to branch if off >= 0. */
6277 t = build2 (GE_EXPR, boolean_type_node, off,
6278 build_int_cst (TREE_TYPE (off), 0));
6279 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6283 /* Emit: offs = (offs + 15) & -16. */
6284 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6285 build_int_cst (TREE_TYPE (off), 15));
6286 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6287 build_int_cst (TREE_TYPE (off), -16));
6288 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6293 /* Update ap.__[g|v]r_offs */
6294 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6295 build_int_cst (TREE_TYPE (off), rsize));
6296 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6300 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6302 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6303 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6304 build_int_cst (TREE_TYPE (f_off), 0));
6305 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6307 /* String up: make sure the assignment happens before the use. */
6308 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6309 COND_EXPR_ELSE (cond1) = t;
6311 /* Prepare the trees handling the argument that is passed on the stack;
6312 the top level node will store in ON_STACK. */
6313 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6316 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6317 t = fold_convert (intDI_type_node, arg);
6318 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6319 build_int_cst (TREE_TYPE (t), 15));
6320 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6321 build_int_cst (TREE_TYPE (t), -16));
6322 t = fold_convert (TREE_TYPE (arg), t);
6323 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6327 /* Advance ap.__stack */
6328 t = fold_convert (intDI_type_node, arg);
6329 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6330 build_int_cst (TREE_TYPE (t), size + 7));
6331 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6332 build_int_cst (TREE_TYPE (t), -8));
6333 t = fold_convert (TREE_TYPE (arg), t);
6334 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6335 /* String up roundup and advance. */
6337 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6338 /* String up with arg */
6339 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6340 /* Big-endianness related address adjustment. */
6341 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6342 && size < UNITS_PER_WORD)
6344 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6345 size_int (UNITS_PER_WORD - size));
6346 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6349 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6350 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6352 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6355 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6356 build_int_cst (TREE_TYPE (off), adjust));
6358 t = fold_convert (sizetype, t);
6359 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6363 /* type ha; // treat as "struct {ftype field[n];}"
6364 ... [computing offs]
6365 for (i = 0; i <nregs; ++i, offs += 16)
6366 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6369 tree tmp_ha, field_t, field_ptr_t;
6371 /* Declare a local variable. */
6372 tmp_ha = create_tmp_var_raw (type, "ha");
6373 gimple_add_tmp_var (tmp_ha);
6375 /* Establish the base type. */
6379 field_t = float_type_node;
6380 field_ptr_t = float_ptr_type_node;
6383 field_t = double_type_node;
6384 field_ptr_t = double_ptr_type_node;
6387 field_t = long_double_type_node;
6388 field_ptr_t = long_double_ptr_type_node;
6390 /* The half precision and quad precision are not fully supported yet. Enable
6391 the following code after the support is complete. Need to find the correct
6392 type node for __fp16 *. */
6395 field_t = float_type_node;
6396 field_ptr_t = float_ptr_type_node;
6402 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6403 field_t = build_vector_type_for_mode (innertype, ag_mode);
6404 field_ptr_t = build_pointer_type (field_t);
6411 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6412 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6414 t = fold_convert (field_ptr_t, addr);
6415 t = build2 (MODIFY_EXPR, field_t,
6416 build1 (INDIRECT_REF, field_t, tmp_ha),
6417 build1 (INDIRECT_REF, field_t, t));
6419 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6420 for (i = 1; i < nregs; ++i)
6422 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6423 u = fold_convert (field_ptr_t, addr);
6424 u = build2 (MODIFY_EXPR, field_t,
6425 build2 (MEM_REF, field_t, tmp_ha,
6426 build_int_cst (field_ptr_t,
6428 int_size_in_bytes (field_t)))),
6429 build1 (INDIRECT_REF, field_t, u));
6430 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6433 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6434 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6437 COND_EXPR_ELSE (cond2) = t;
6438 addr = fold_convert (build_pointer_type (type), cond1);
6439 addr = build_va_arg_indirect_ref (addr);
6442 addr = build_va_arg_indirect_ref (addr);
6447 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6450 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6451 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6454 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6455 CUMULATIVE_ARGS local_cum;
6456 int gr_saved, vr_saved;
6458 /* The caller has advanced CUM up to, but not beyond, the last named
6459 argument. Advance a local copy of CUM past the last "real" named
6460 argument, to find out how many registers are left over. */
6462 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6464 /* Found out how many registers we need to save. */
6465 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6466 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6468 if (TARGET_GENERAL_REGS_ONLY)
6470 if (local_cum.aapcs_nvrn > 0)
6471 sorry ("%qs and floating point or vector arguments",
6472 "-mgeneral-regs-only");
6482 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6483 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6484 - gr_saved * UNITS_PER_WORD);
6485 mem = gen_frame_mem (BLKmode, ptr);
6486 set_mem_alias_set (mem, get_varargs_alias_set ());
6488 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6493 /* We can't use move_block_from_reg, because it will use
6494 the wrong mode, storing D regs only. */
6495 enum machine_mode mode = TImode;
6498 /* Set OFF to the offset from virtual_incoming_args_rtx of
6499 the first vector register. The VR save area lies below
6500 the GR one, and is aligned to 16 bytes. */
6501 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6502 STACK_BOUNDARY / BITS_PER_UNIT);
6503 off -= vr_saved * UNITS_PER_VREG;
6505 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6509 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6510 mem = gen_frame_mem (mode, ptr);
6511 set_mem_alias_set (mem, get_varargs_alias_set ());
6512 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6513 off += UNITS_PER_VREG;
6518 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6519 any complication of having crtl->args.pretend_args_size changed. */
6520 cfun->machine->saved_varargs_size
6521 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6522 STACK_BOUNDARY / BITS_PER_UNIT)
6523 + vr_saved * UNITS_PER_VREG);
6527 aarch64_conditional_register_usage (void)
6532 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6535 call_used_regs[i] = 1;
6540 /* Walk down the type tree of TYPE counting consecutive base elements.
6541 If *MODEP is VOIDmode, then set it to the first valid floating point
6542 type. If a non-floating point type is found, or if a floating point
6543 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6544 otherwise return the count in the sub-tree. */
6546 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6548 enum machine_mode mode;
6551 switch (TREE_CODE (type))
6554 mode = TYPE_MODE (type);
6555 if (mode != DFmode && mode != SFmode && mode != TFmode)
6558 if (*modep == VOIDmode)
6567 mode = TYPE_MODE (TREE_TYPE (type));
6568 if (mode != DFmode && mode != SFmode && mode != TFmode)
6571 if (*modep == VOIDmode)
6580 /* Use V2SImode and V4SImode as representatives of all 64-bit
6581 and 128-bit vector types. */
6582 size = int_size_in_bytes (type);
6595 if (*modep == VOIDmode)
6598 /* Vector modes are considered to be opaque: two vectors are
6599 equivalent for the purposes of being homogeneous aggregates
6600 if they are the same size. */
6609 tree index = TYPE_DOMAIN (type);
6611 /* Can't handle incomplete types. */
6612 if (!COMPLETE_TYPE_P (type))
6615 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6618 || !TYPE_MAX_VALUE (index)
6619 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6620 || !TYPE_MIN_VALUE (index)
6621 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6625 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6626 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6628 /* There must be no padding. */
6629 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6630 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6631 != count * GET_MODE_BITSIZE (*modep)))
6643 /* Can't handle incomplete types. */
6644 if (!COMPLETE_TYPE_P (type))
6647 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6649 if (TREE_CODE (field) != FIELD_DECL)
6652 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6658 /* There must be no padding. */
6659 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6660 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6661 != count * GET_MODE_BITSIZE (*modep)))
6668 case QUAL_UNION_TYPE:
6670 /* These aren't very interesting except in a degenerate case. */
6675 /* Can't handle incomplete types. */
6676 if (!COMPLETE_TYPE_P (type))
6679 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6681 if (TREE_CODE (field) != FIELD_DECL)
6684 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6687 count = count > sub_count ? count : sub_count;
6690 /* There must be no padding. */
6691 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6692 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6693 != count * GET_MODE_BITSIZE (*modep)))
6706 /* Return true if we use LRA instead of reload pass. */
6708 aarch64_lra_p (void)
6710 return aarch64_lra_flag;
6713 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6714 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6715 array types. The C99 floating-point complex types are also considered
6716 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6717 types, which are GCC extensions and out of the scope of AAPCS64, are
6718 treated as composite types here as well.
6720 Note that MODE itself is not sufficient in determining whether a type
6721 is such a composite type or not. This is because
6722 stor-layout.c:compute_record_mode may have already changed the MODE
6723 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6724 structure with only one field may have its MODE set to the mode of the
6725 field. Also an integer mode whose size matches the size of the
6726 RECORD_TYPE type may be used to substitute the original mode
6727 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6728 solely relied on. */
6731 aarch64_composite_type_p (const_tree type,
6732 enum machine_mode mode)
6734 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6738 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6739 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6745 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6746 type as described in AAPCS64 \S 4.1.2.
6748 See the comment above aarch64_composite_type_p for the notes on MODE. */
6751 aarch64_short_vector_p (const_tree type,
6752 enum machine_mode mode)
6754 HOST_WIDE_INT size = -1;
6756 if (type && TREE_CODE (type) == VECTOR_TYPE)
6757 size = int_size_in_bytes (type);
6758 else if (!aarch64_composite_type_p (type, mode)
6759 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6760 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6761 size = GET_MODE_SIZE (mode);
6763 return (size == 8 || size == 16) ? true : false;
6766 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6767 shall be passed or returned in simd/fp register(s) (providing these
6768 parameter passing registers are available).
6770 Upon successful return, *COUNT returns the number of needed registers,
6771 *BASE_MODE returns the mode of the individual register and when IS_HAF
6772 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6773 floating-point aggregate or a homogeneous short-vector aggregate. */
6776 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6778 enum machine_mode *base_mode,
6782 enum machine_mode new_mode = VOIDmode;
6783 bool composite_p = aarch64_composite_type_p (type, mode);
6785 if (is_ha != NULL) *is_ha = false;
6787 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6788 || aarch64_short_vector_p (type, mode))
6793 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6795 if (is_ha != NULL) *is_ha = true;
6797 new_mode = GET_MODE_INNER (mode);
6799 else if (type && composite_p)
6801 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6803 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6805 if (is_ha != NULL) *is_ha = true;
6814 *base_mode = new_mode;
6818 /* Implement TARGET_STRUCT_VALUE_RTX. */
6821 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6822 int incoming ATTRIBUTE_UNUSED)
6824 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6827 /* Implements target hook vector_mode_supported_p. */
6829 aarch64_vector_mode_supported_p (enum machine_mode mode)
6832 && (mode == V4SImode || mode == V8HImode
6833 || mode == V16QImode || mode == V2DImode
6834 || mode == V2SImode || mode == V4HImode
6835 || mode == V8QImode || mode == V2SFmode
6836 || mode == V4SFmode || mode == V2DFmode
6837 || mode == V1DFmode))
6843 /* Return appropriate SIMD container
6844 for MODE within a vector of WIDTH bits. */
6845 static enum machine_mode
6846 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6848 gcc_assert (width == 64 || width == 128);
6887 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6888 static enum machine_mode
6889 aarch64_preferred_simd_mode (enum machine_mode mode)
6891 return aarch64_simd_container_mode (mode, 128);
6894 /* Return the bitmask of possible vector sizes for the vectorizer
6897 aarch64_autovectorize_vector_sizes (void)
6902 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6903 vector types in order to conform to the AAPCS64 (see "Procedure
6904 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6905 qualify for emission with the mangled names defined in that document,
6906 a vector type must not only be of the correct mode but also be
6907 composed of AdvSIMD vector element types (e.g.
6908 _builtin_aarch64_simd_qi); these types are registered by
6909 aarch64_init_simd_builtins (). In other words, vector types defined
6910 in other ways e.g. via vector_size attribute will get default
6914 enum machine_mode mode;
6915 const char *element_type_name;
6916 const char *mangled_name;
6917 } aarch64_simd_mangle_map_entry;
6919 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6920 /* 64-bit containerized types. */
6921 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6922 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6923 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6924 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6925 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6926 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6927 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6928 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6929 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6930 /* 128-bit containerized types. */
6931 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6932 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6933 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6934 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6935 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6936 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6937 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6938 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6939 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6940 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6941 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6942 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6943 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6944 { VOIDmode, NULL, NULL }
6947 /* Implement TARGET_MANGLE_TYPE. */
6950 aarch64_mangle_type (const_tree type)
6952 /* The AArch64 ABI documents say that "__va_list" has to be
6953 managled as if it is in the "std" namespace. */
6954 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6955 return "St9__va_list";
6957 /* Check the mode of the vector type, and the name of the vector
6958 element type, against the table. */
6959 if (TREE_CODE (type) == VECTOR_TYPE)
6961 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6963 while (pos->mode != VOIDmode)
6965 tree elt_type = TREE_TYPE (type);
6967 if (pos->mode == TYPE_MODE (type)
6968 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6969 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6970 pos->element_type_name))
6971 return pos->mangled_name;
6977 /* Use the default mangling. */
6982 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6988 is_memory_op (rtx mem_insn)
6990 rtx pattern = PATTERN (mem_insn);
6991 return for_each_rtx (&pattern, is_mem_p, NULL);
6994 /* Find the first rtx before insn that will generate an assembly
6998 aarch64_prev_real_insn (rtx insn)
7005 insn = prev_real_insn (insn);
7007 while (insn && recog_memoized (insn) < 0);
7013 is_madd_op (enum attr_type t1)
7016 /* A number of these may be AArch32 only. */
7017 enum attr_type mlatypes[] = {
7018 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7019 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7020 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7023 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7025 if (t1 == mlatypes[i])
7032 /* Check if there is a register dependency between a load and the insn
7033 for which we hold recog_data. */
7036 dep_between_memop_and_curr (rtx memop)
7044 if (!REG_P (SET_DEST (memop)))
7047 load_reg = SET_DEST (memop);
7048 for (opno = 0; opno < recog_data.n_operands; opno++)
7050 rtx operand = recog_data.operand[opno];
7052 && reg_overlap_mentioned_p (load_reg, operand))
7060 aarch64_madd_needs_nop (rtx insn)
7062 enum attr_type attr_type;
7066 if (!aarch64_fix_a53_err835769)
7069 if (recog_memoized (insn) < 0)
7072 attr_type = get_attr_type (insn);
7073 if (!is_madd_op (attr_type))
7076 prev = aarch64_prev_real_insn (insn);
7077 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7078 Restore recog state to INSN to avoid state corruption. */
7079 extract_constrain_insn_cached (insn);
7084 body = single_set (prev);
7086 /* If the previous insn is a memory op and there is no dependency between
7087 it and the madd, emit a nop between them. If we know the previous insn is
7088 a memory op but body is NULL, emit the nop to be safe, it's probably a
7089 load/store pair insn. */
7090 if (is_memory_op (prev)
7091 && GET_MODE (recog_data.operand[0]) == DImode
7092 && (!dep_between_memop_and_curr (body)))
7100 aarch64_final_prescan_insn (rtx insn)
7102 if (aarch64_madd_needs_nop (insn))
7103 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7107 /* Return the equivalent letter for size. */
7109 sizetochar (int size)
7113 case 64: return 'd';
7114 case 32: return 's';
7115 case 16: return 'h';
7116 case 8 : return 'b';
7117 default: gcc_unreachable ();
7121 /* Return true iff x is a uniform vector of floating-point
7122 constants, and the constant can be represented in
7123 quarter-precision form. Note, as aarch64_float_const_representable
7124 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7126 aarch64_vect_float_const_representable_p (rtx x)
7129 REAL_VALUE_TYPE r0, ri;
7132 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7135 x0 = CONST_VECTOR_ELT (x, 0);
7136 if (!CONST_DOUBLE_P (x0))
7139 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7141 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7143 xi = CONST_VECTOR_ELT (x, i);
7144 if (!CONST_DOUBLE_P (xi))
7147 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7148 if (!REAL_VALUES_EQUAL (r0, ri))
7152 return aarch64_float_const_representable_p (x0);
7155 /* Return true for valid and false for invalid. */
7157 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7158 struct simd_immediate_info *info)
7160 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7162 for (i = 0; i < idx; i += (STRIDE)) \
7167 immtype = (CLASS); \
7168 elsize = (ELSIZE); \
7174 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7175 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7176 unsigned char bytes[16];
7177 int immtype = -1, matches;
7178 unsigned int invmask = inverse ? 0xff : 0;
7181 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7183 if (! (aarch64_simd_imm_zero_p (op, mode)
7184 || aarch64_vect_float_const_representable_p (op)))
7189 info->value = CONST_VECTOR_ELT (op, 0);
7190 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7198 /* Splat vector constant out into a byte vector. */
7199 for (i = 0; i < n_elts; i++)
7201 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7202 it must be laid out in the vector register in reverse order. */
7203 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7204 unsigned HOST_WIDE_INT elpart;
7205 unsigned int part, parts;
7207 if (GET_CODE (el) == CONST_INT)
7209 elpart = INTVAL (el);
7212 else if (GET_CODE (el) == CONST_DOUBLE)
7214 elpart = CONST_DOUBLE_LOW (el);
7220 for (part = 0; part < parts; part++)
7223 for (byte = 0; byte < innersize; byte++)
7225 bytes[idx++] = (elpart & 0xff) ^ invmask;
7226 elpart >>= BITS_PER_UNIT;
7228 if (GET_CODE (el) == CONST_DOUBLE)
7229 elpart = CONST_DOUBLE_HIGH (el);
7234 gcc_assert (idx == GET_MODE_SIZE (mode));
7238 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7239 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7241 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7242 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7244 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7245 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7247 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7248 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7250 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7252 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7254 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7255 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7257 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7258 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7260 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7261 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7263 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7264 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7266 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7268 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7270 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7271 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7273 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7274 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7276 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7277 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7279 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7280 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7282 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7284 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7285 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7294 info->element_width = elsize;
7295 info->mvn = emvn != 0;
7296 info->shift = eshift;
7298 unsigned HOST_WIDE_INT imm = 0;
7300 if (immtype >= 12 && immtype <= 15)
7303 /* Un-invert bytes of recognized vector, if necessary. */
7305 for (i = 0; i < idx; i++)
7306 bytes[i] ^= invmask;
7310 /* FIXME: Broken on 32-bit H_W_I hosts. */
7311 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7313 for (i = 0; i < 8; i++)
7314 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7315 << (i * BITS_PER_UNIT);
7318 info->value = GEN_INT (imm);
7322 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7323 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7325 /* Construct 'abcdefgh' because the assembler cannot handle
7326 generic constants. */
7329 imm = (imm >> info->shift) & 0xff;
7330 info->value = GEN_INT (imm);
7339 aarch64_const_vec_all_same_int_p (rtx x,
7340 HOST_WIDE_INT minval,
7341 HOST_WIDE_INT maxval)
7343 HOST_WIDE_INT firstval;
7346 if (GET_CODE (x) != CONST_VECTOR
7347 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7350 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7351 if (firstval < minval || firstval > maxval)
7354 count = CONST_VECTOR_NUNITS (x);
7355 for (i = 1; i < count; i++)
7356 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7362 /* Check of immediate shift constants are within range. */
7364 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7366 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7368 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7370 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7373 /* Return true if X is a uniform vector where all elements
7374 are either the floating-point constant 0.0 or the
7375 integer constant 0. */
7377 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7379 return x == CONST0_RTX (mode);
7383 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7385 HOST_WIDE_INT imm = INTVAL (x);
7388 for (i = 0; i < 8; i++)
7390 unsigned int byte = imm & 0xff;
7391 if (byte != 0xff && byte != 0)
7400 aarch64_mov_operand_p (rtx x,
7401 enum aarch64_symbol_context context,
7402 enum machine_mode mode)
7404 if (GET_CODE (x) == HIGH
7405 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7408 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7411 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7414 return aarch64_classify_symbolic_expression (x, context)
7415 == SYMBOL_TINY_ABSOLUTE;
7418 /* Return a const_int vector of VAL. */
7420 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7422 int nunits = GET_MODE_NUNITS (mode);
7423 rtvec v = rtvec_alloc (nunits);
7426 for (i=0; i < nunits; i++)
7427 RTVEC_ELT (v, i) = GEN_INT (val);
7429 return gen_rtx_CONST_VECTOR (mode, v);
7432 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7435 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7437 enum machine_mode vmode;
7439 gcc_assert (!VECTOR_MODE_P (mode));
7440 vmode = aarch64_preferred_simd_mode (mode);
7441 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7442 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7445 /* Construct and return a PARALLEL RTX vector. */
7447 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7449 int nunits = GET_MODE_NUNITS (mode);
7450 rtvec v = rtvec_alloc (nunits / 2);
7451 int base = high ? nunits / 2 : 0;
7455 for (i=0; i < nunits / 2; i++)
7456 RTVEC_ELT (v, i) = GEN_INT (base + i);
7458 t1 = gen_rtx_PARALLEL (mode, v);
7462 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7463 HIGH (exclusive). */
7465 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7468 gcc_assert (GET_CODE (operand) == CONST_INT);
7469 lane = INTVAL (operand);
7471 if (lane < low || lane >= high)
7472 error ("lane out of range");
7476 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7478 gcc_assert (GET_CODE (operand) == CONST_INT);
7479 HOST_WIDE_INT lane = INTVAL (operand);
7481 if (lane < low || lane >= high)
7482 error ("constant out of range");
7485 /* Emit code to reinterpret one AdvSIMD type as another,
7486 without altering bits. */
7488 aarch64_simd_reinterpret (rtx dest, rtx src)
7490 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7493 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7496 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7497 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7500 rtx mem = gen_rtx_MEM (mode, destaddr);
7501 rtx tmp1 = gen_reg_rtx (mode);
7502 rtx tmp2 = gen_reg_rtx (mode);
7504 emit_insn (intfn (tmp1, op1, tmp2));
7506 emit_move_insn (mem, tmp1);
7507 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7508 emit_move_insn (mem, tmp2);
7511 /* Return TRUE if OP is a valid vector addressing mode. */
7513 aarch64_simd_mem_operand_p (rtx op)
7515 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7516 || GET_CODE (XEXP (op, 0)) == REG);
7519 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7520 not to early-clobber SRC registers in the process.
7522 We assume that the operands described by SRC and DEST represent a
7523 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7524 number of components into which the copy has been decomposed. */
7526 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7527 rtx *src, unsigned int count)
7531 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7532 || REGNO (operands[0]) < REGNO (operands[1]))
7534 for (i = 0; i < count; i++)
7536 operands[2 * i] = dest[i];
7537 operands[2 * i + 1] = src[i];
7542 for (i = 0; i < count; i++)
7544 operands[2 * i] = dest[count - i - 1];
7545 operands[2 * i + 1] = src[count - i - 1];
7550 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7551 one of VSTRUCT modes: OI, CI or XI. */
7553 aarch64_simd_attr_length_move (rtx insn)
7555 enum machine_mode mode;
7557 extract_insn_cached (insn);
7559 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7561 mode = GET_MODE (recog_data.operand[0]);
7577 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7578 alignment of a vector to 128 bits. */
7579 static HOST_WIDE_INT
7580 aarch64_simd_vector_alignment (const_tree type)
7582 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7583 return MIN (align, 128);
7586 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7588 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7593 /* We guarantee alignment for vectors up to 128-bits. */
7594 if (tree_int_cst_compare (TYPE_SIZE (type),
7595 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7598 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7602 /* If VALS is a vector constant that can be loaded into a register
7603 using DUP, generate instructions to do so and return an RTX to
7604 assign to the register. Otherwise return NULL_RTX. */
7606 aarch64_simd_dup_constant (rtx vals)
7608 enum machine_mode mode = GET_MODE (vals);
7609 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7610 int n_elts = GET_MODE_NUNITS (mode);
7611 bool all_same = true;
7615 if (GET_CODE (vals) != CONST_VECTOR)
7618 for (i = 1; i < n_elts; ++i)
7620 x = CONST_VECTOR_ELT (vals, i);
7621 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7628 /* We can load this constant by using DUP and a constant in a
7629 single ARM register. This will be cheaper than a vector
7631 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7632 return gen_rtx_VEC_DUPLICATE (mode, x);
7636 /* Generate code to load VALS, which is a PARALLEL containing only
7637 constants (for vec_init) or CONST_VECTOR, efficiently into a
7638 register. Returns an RTX to copy into the register, or NULL_RTX
7639 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7641 aarch64_simd_make_constant (rtx vals)
7643 enum machine_mode mode = GET_MODE (vals);
7645 rtx const_vec = NULL_RTX;
7646 int n_elts = GET_MODE_NUNITS (mode);
7650 if (GET_CODE (vals) == CONST_VECTOR)
7652 else if (GET_CODE (vals) == PARALLEL)
7654 /* A CONST_VECTOR must contain only CONST_INTs and
7655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7656 Only store valid constants in a CONST_VECTOR. */
7657 for (i = 0; i < n_elts; ++i)
7659 rtx x = XVECEXP (vals, 0, i);
7660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7663 if (n_const == n_elts)
7664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7669 if (const_vec != NULL_RTX
7670 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7671 /* Load using MOVI/MVNI. */
7673 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7674 /* Loaded using DUP. */
7676 else if (const_vec != NULL_RTX)
7677 /* Load from constant pool. We can not take advantage of single-cycle
7678 LD1 because we need a PC-relative addressing mode. */
7681 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7682 We can not construct an initializer. */
7687 aarch64_expand_vector_init (rtx target, rtx vals)
7689 enum machine_mode mode = GET_MODE (target);
7690 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7691 int n_elts = GET_MODE_NUNITS (mode);
7692 int n_var = 0, one_var = -1;
7693 bool all_same = true;
7697 x = XVECEXP (vals, 0, 0);
7698 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7699 n_var = 1, one_var = 0;
7701 for (i = 1; i < n_elts; ++i)
7703 x = XVECEXP (vals, 0, i);
7704 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7705 ++n_var, one_var = i;
7707 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7713 rtx constant = aarch64_simd_make_constant (vals);
7714 if (constant != NULL_RTX)
7716 emit_move_insn (target, constant);
7721 /* Splat a single non-constant element if we can. */
7724 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7725 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7729 /* One field is non-constant. Load constant then overwrite varying
7730 field. This is more efficient than using the stack. */
7733 rtx copy = copy_rtx (vals);
7734 rtx index = GEN_INT (one_var);
7735 enum insn_code icode;
7737 /* Load constant part of vector, substitute neighboring value for
7739 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7740 aarch64_expand_vector_init (target, copy);
7742 /* Insert variable. */
7743 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7744 icode = optab_handler (vec_set_optab, mode);
7745 gcc_assert (icode != CODE_FOR_nothing);
7746 emit_insn (GEN_FCN (icode) (target, x, index));
7750 /* Construct the vector in memory one field at a time
7751 and load the whole vector. */
7752 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7753 for (i = 0; i < n_elts; i++)
7754 emit_move_insn (adjust_address_nv (mem, inner_mode,
7755 i * GET_MODE_SIZE (inner_mode)),
7756 XVECEXP (vals, 0, i));
7757 emit_move_insn (target, mem);
7761 static unsigned HOST_WIDE_INT
7762 aarch64_shift_truncation_mask (enum machine_mode mode)
7765 (aarch64_vector_mode_supported_p (mode)
7766 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7769 #ifndef TLS_SECTION_ASM_FLAG
7770 #define TLS_SECTION_ASM_FLAG 'T'
7774 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7775 tree decl ATTRIBUTE_UNUSED)
7777 char flagchars[10], *f = flagchars;
7779 /* If we have already declared this section, we can use an
7780 abbreviated form to switch back to it -- unless this section is
7781 part of a COMDAT groups, in which case GAS requires the full
7782 declaration every time. */
7783 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7784 && (flags & SECTION_DECLARED))
7786 fprintf (asm_out_file, "\t.section\t%s\n", name);
7790 if (!(flags & SECTION_DEBUG))
7792 if (flags & SECTION_WRITE)
7794 if (flags & SECTION_CODE)
7796 if (flags & SECTION_SMALL)
7798 if (flags & SECTION_MERGE)
7800 if (flags & SECTION_STRINGS)
7802 if (flags & SECTION_TLS)
7803 *f++ = TLS_SECTION_ASM_FLAG;
7804 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7808 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7810 if (!(flags & SECTION_NOTYPE))
7815 if (flags & SECTION_BSS)
7820 #ifdef TYPE_OPERAND_FMT
7821 format = "," TYPE_OPERAND_FMT;
7826 fprintf (asm_out_file, format, type);
7828 if (flags & SECTION_ENTSIZE)
7829 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7830 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7832 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7833 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7835 fprintf (asm_out_file, ",%s,comdat",
7836 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7840 putc ('\n', asm_out_file);
7843 /* Select a format to encode pointers in exception handling data. */
7845 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7848 switch (aarch64_cmodel)
7850 case AARCH64_CMODEL_TINY:
7851 case AARCH64_CMODEL_TINY_PIC:
7852 case AARCH64_CMODEL_SMALL:
7853 case AARCH64_CMODEL_SMALL_PIC:
7854 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7856 type = DW_EH_PE_sdata4;
7859 /* No assumptions here. 8-byte relocs required. */
7860 type = DW_EH_PE_sdata8;
7863 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7866 /* Emit load exclusive. */
7869 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7870 rtx mem, rtx model_rtx)
7872 rtx (*gen) (rtx, rtx, rtx);
7876 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7877 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7878 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7879 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7884 emit_insn (gen (rval, mem, model_rtx));
7887 /* Emit store exclusive. */
7890 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7891 rtx rval, rtx mem, rtx model_rtx)
7893 rtx (*gen) (rtx, rtx, rtx, rtx);
7897 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7898 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7899 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7900 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7905 emit_insn (gen (bval, rval, mem, model_rtx));
7908 /* Mark the previous jump instruction as unlikely. */
7911 aarch64_emit_unlikely_jump (rtx insn)
7913 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7915 insn = emit_jump_insn (insn);
7916 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7919 /* Expand a compare and swap pattern. */
7922 aarch64_expand_compare_and_swap (rtx operands[])
7924 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7925 enum machine_mode mode, cmp_mode;
7926 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7931 oldval = operands[3];
7932 newval = operands[4];
7933 is_weak = operands[5];
7934 mod_s = operands[6];
7935 mod_f = operands[7];
7936 mode = GET_MODE (mem);
7939 /* Normally the succ memory model must be stronger than fail, but in the
7940 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7941 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7943 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7944 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7945 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7951 /* For short modes, we're going to perform the comparison in SImode,
7952 so do the zero-extension now. */
7954 rval = gen_reg_rtx (SImode);
7955 oldval = convert_modes (SImode, mode, oldval, true);
7960 /* Force the value into a register if needed. */
7961 if (!aarch64_plus_operand (oldval, mode))
7962 oldval = force_reg (cmp_mode, oldval);
7971 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7972 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7973 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7974 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7979 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7981 if (mode == QImode || mode == HImode)
7982 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7984 x = gen_rtx_REG (CCmode, CC_REGNUM);
7985 x = gen_rtx_EQ (SImode, x, const0_rtx);
7986 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7989 /* Split a compare and swap pattern. */
7992 aarch64_split_compare_and_swap (rtx operands[])
7994 rtx rval, mem, oldval, newval, scratch;
7995 enum machine_mode mode;
7997 rtx label1, label2, x, cond;
8001 oldval = operands[2];
8002 newval = operands[3];
8003 is_weak = (operands[4] != const0_rtx);
8004 scratch = operands[7];
8005 mode = GET_MODE (mem);
8010 label1 = gen_label_rtx ();
8011 emit_label (label1);
8013 label2 = gen_label_rtx ();
8015 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8017 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8018 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8019 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8020 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8021 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8023 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8027 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8028 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8029 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8030 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8034 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8035 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8036 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8039 emit_label (label2);
8042 /* Split an atomic operation. */
8045 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8046 rtx value, rtx model_rtx, rtx cond)
8048 enum machine_mode mode = GET_MODE (mem);
8049 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8052 label = gen_label_rtx ();
8056 new_out = gen_lowpart (wmode, new_out);
8058 old_out = gen_lowpart (wmode, old_out);
8061 value = simplify_gen_subreg (wmode, value, mode, 0);
8063 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8072 x = gen_rtx_AND (wmode, old_out, value);
8073 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8074 x = gen_rtx_NOT (wmode, new_out);
8075 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8079 if (CONST_INT_P (value))
8081 value = GEN_INT (-INTVAL (value));
8087 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8088 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8092 aarch64_emit_store_exclusive (mode, cond, mem,
8093 gen_lowpart (mode, new_out), model_rtx);
8095 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8096 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8097 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8098 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8102 aarch64_print_extension (void)
8104 const struct aarch64_option_extension *opt = NULL;
8106 for (opt = all_extensions; opt->name != NULL; opt++)
8107 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8108 asm_fprintf (asm_out_file, "+%s", opt->name);
8110 asm_fprintf (asm_out_file, "\n");
8114 aarch64_start_file (void)
8118 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8119 aarch64_print_extension ();
8121 else if (selected_cpu)
8123 const char *truncated_name
8124 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8125 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8126 aarch64_print_extension ();
8128 default_file_start();
8131 /* Target hook for c_mode_for_suffix. */
8132 static enum machine_mode
8133 aarch64_c_mode_for_suffix (char suffix)
8141 /* We can only represent floating point constants which will fit in
8142 "quarter-precision" values. These values are characterised by
8143 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8146 (-1)^s * (n/16) * 2^r
8149 's' is the sign bit.
8150 'n' is an integer in the range 16 <= n <= 31.
8151 'r' is an integer in the range -3 <= r <= 4. */
8153 /* Return true iff X can be represented by a quarter-precision
8154 floating point immediate operand X. Note, we cannot represent 0.0. */
8156 aarch64_float_const_representable_p (rtx x)
8158 /* This represents our current view of how many bits
8159 make up the mantissa. */
8160 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8162 unsigned HOST_WIDE_INT mantissa, mask;
8163 HOST_WIDE_INT m1, m2;
8164 REAL_VALUE_TYPE r, m;
8166 if (!CONST_DOUBLE_P (x))
8169 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8171 /* We cannot represent infinities, NaNs or +/-zero. We won't
8172 know if we have +zero until we analyse the mantissa, but we
8173 can reject the other invalid values. */
8174 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8175 || REAL_VALUE_MINUS_ZERO (r))
8178 /* Extract exponent. */
8179 r = real_value_abs (&r);
8180 exponent = REAL_EXP (&r);
8182 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8183 highest (sign) bit, with a fixed binary point at bit point_pos.
8184 m1 holds the low part of the mantissa, m2 the high part.
8185 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8186 bits for the mantissa, this can fail (low bits will be lost). */
8187 real_ldexp (&m, &r, point_pos - exponent);
8188 REAL_VALUE_TO_INT (&m1, &m2, m);
8190 /* If the low part of the mantissa has bits set we cannot represent
8194 /* We have rejected the lower HOST_WIDE_INT, so update our
8195 understanding of how many bits lie in the mantissa and
8196 look only at the high HOST_WIDE_INT. */
8198 point_pos -= HOST_BITS_PER_WIDE_INT;
8200 /* We can only represent values with a mantissa of the form 1.xxxx. */
8201 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8202 if ((mantissa & mask) != 0)
8205 /* Having filtered unrepresentable values, we may now remove all
8206 but the highest 5 bits. */
8207 mantissa >>= point_pos - 5;
8209 /* We cannot represent the value 0.0, so reject it. This is handled
8214 /* Then, as bit 4 is always set, we can mask it off, leaving
8215 the mantissa in the range [0, 15]. */
8216 mantissa &= ~(1 << 4);
8217 gcc_assert (mantissa <= 15);
8219 /* GCC internally does not use IEEE754-like encoding (where normalized
8220 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8221 Our mantissa values are shifted 4 places to the left relative to
8222 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8223 by 5 places to correct for GCC's representation. */
8224 exponent = 5 - exponent;
8226 return (exponent >= 0 && exponent <= 7);
8230 aarch64_output_simd_mov_immediate (rtx const_vector,
8231 enum machine_mode mode,
8235 static char templ[40];
8236 const char *mnemonic;
8237 const char *shift_op;
8238 unsigned int lane_count = 0;
8241 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8243 /* This will return true to show const_vector is legal for use as either
8244 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8245 also update INFO to show how the immediate should be generated. */
8246 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8247 gcc_assert (is_valid);
8249 element_char = sizetochar (info.element_width);
8250 lane_count = width / info.element_width;
8252 mode = GET_MODE_INNER (mode);
8253 if (mode == SFmode || mode == DFmode)
8255 gcc_assert (info.shift == 0 && ! info.mvn);
8256 if (aarch64_float_const_zero_rtx_p (info.value))
8257 info.value = GEN_INT (0);
8262 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8263 char float_buf[buf_size] = {'\0'};
8264 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8267 if (lane_count == 1)
8268 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8270 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8271 lane_count, element_char, float_buf);
8276 mnemonic = info.mvn ? "mvni" : "movi";
8277 shift_op = info.msl ? "msl" : "lsl";
8279 if (lane_count == 1)
8280 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8281 mnemonic, UINTVAL (info.value));
8282 else if (info.shift)
8283 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8284 ", %s %d", mnemonic, lane_count, element_char,
8285 UINTVAL (info.value), shift_op, info.shift);
8287 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8288 mnemonic, lane_count, element_char, UINTVAL (info.value));
8293 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8294 enum machine_mode mode)
8296 enum machine_mode vmode;
8298 gcc_assert (!VECTOR_MODE_P (mode));
8299 vmode = aarch64_simd_container_mode (mode, 64);
8300 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8301 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8304 /* Split operands into moves from op[1] + op[2] into op[0]. */
8307 aarch64_split_combinev16qi (rtx operands[3])
8309 unsigned int dest = REGNO (operands[0]);
8310 unsigned int src1 = REGNO (operands[1]);
8311 unsigned int src2 = REGNO (operands[2]);
8312 enum machine_mode halfmode = GET_MODE (operands[1]);
8313 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8316 gcc_assert (halfmode == V16QImode);
8318 if (src1 == dest && src2 == dest + halfregs)
8320 /* No-op move. Can't split to nothing; emit something. */
8321 emit_note (NOTE_INSN_DELETED);
8325 /* Preserve register attributes for variable tracking. */
8326 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8327 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8328 GET_MODE_SIZE (halfmode));
8330 /* Special case of reversed high/low parts. */
8331 if (reg_overlap_mentioned_p (operands[2], destlo)
8332 && reg_overlap_mentioned_p (operands[1], desthi))
8334 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8335 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8336 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8338 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8340 /* Try to avoid unnecessary moves if part of the result
8341 is in the right place already. */
8343 emit_move_insn (destlo, operands[1]);
8344 if (src2 != dest + halfregs)
8345 emit_move_insn (desthi, operands[2]);
8349 if (src2 != dest + halfregs)
8350 emit_move_insn (desthi, operands[2]);
8352 emit_move_insn (destlo, operands[1]);
8356 /* vec_perm support. */
8358 #define MAX_VECT_LEN 16
8360 struct expand_vec_perm_d
8362 rtx target, op0, op1;
8363 unsigned char perm[MAX_VECT_LEN];
8364 enum machine_mode vmode;
8370 /* Generate a variable permutation. */
8373 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8375 enum machine_mode vmode = GET_MODE (target);
8376 bool one_vector_p = rtx_equal_p (op0, op1);
8378 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8379 gcc_checking_assert (GET_MODE (op0) == vmode);
8380 gcc_checking_assert (GET_MODE (op1) == vmode);
8381 gcc_checking_assert (GET_MODE (sel) == vmode);
8382 gcc_checking_assert (TARGET_SIMD);
8386 if (vmode == V8QImode)
8388 /* Expand the argument to a V16QI mode by duplicating it. */
8389 rtx pair = gen_reg_rtx (V16QImode);
8390 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8391 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8395 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8402 if (vmode == V8QImode)
8404 pair = gen_reg_rtx (V16QImode);
8405 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8406 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8410 pair = gen_reg_rtx (OImode);
8411 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8412 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8418 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8420 enum machine_mode vmode = GET_MODE (target);
8421 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8422 bool one_vector_p = rtx_equal_p (op0, op1);
8423 rtx rmask[MAX_VECT_LEN], mask;
8425 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8427 /* The TBL instruction does not use a modulo index, so we must take care
8428 of that ourselves. */
8429 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8430 for (i = 0; i < nelt; ++i)
8432 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8433 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8435 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8438 /* Recognize patterns suitable for the TRN instructions. */
8440 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8442 unsigned int i, odd, mask, nelt = d->nelt;
8443 rtx out, in0, in1, x;
8444 rtx (*gen) (rtx, rtx, rtx);
8445 enum machine_mode vmode = d->vmode;
8447 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8450 /* Note that these are little-endian tests.
8451 We correct for big-endian later. */
8452 if (d->perm[0] == 0)
8454 else if (d->perm[0] == 1)
8458 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8460 for (i = 0; i < nelt; i += 2)
8462 if (d->perm[i] != i + odd)
8464 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8474 if (BYTES_BIG_ENDIAN)
8476 x = in0, in0 = in1, in1 = x;
8485 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8486 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8487 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8488 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8489 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8490 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8491 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8492 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8493 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8494 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8503 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8504 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8505 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8506 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8507 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8508 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8509 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8510 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8511 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8512 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8518 emit_insn (gen (out, in0, in1));
8522 /* Recognize patterns suitable for the UZP instructions. */
8524 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8526 unsigned int i, odd, mask, nelt = d->nelt;
8527 rtx out, in0, in1, x;
8528 rtx (*gen) (rtx, rtx, rtx);
8529 enum machine_mode vmode = d->vmode;
8531 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8534 /* Note that these are little-endian tests.
8535 We correct for big-endian later. */
8536 if (d->perm[0] == 0)
8538 else if (d->perm[0] == 1)
8542 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8544 for (i = 0; i < nelt; i++)
8546 unsigned elt = (i * 2 + odd) & mask;
8547 if (d->perm[i] != elt)
8557 if (BYTES_BIG_ENDIAN)
8559 x = in0, in0 = in1, in1 = x;
8568 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8569 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8570 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8571 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8572 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8573 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8574 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8575 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8576 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8577 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8586 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8587 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8588 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8589 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8590 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8591 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8592 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8593 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8594 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8595 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8601 emit_insn (gen (out, in0, in1));
8605 /* Recognize patterns suitable for the ZIP instructions. */
8607 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8609 unsigned int i, high, mask, nelt = d->nelt;
8610 rtx out, in0, in1, x;
8611 rtx (*gen) (rtx, rtx, rtx);
8612 enum machine_mode vmode = d->vmode;
8614 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8617 /* Note that these are little-endian tests.
8618 We correct for big-endian later. */
8620 if (d->perm[0] == high)
8623 else if (d->perm[0] == 0)
8627 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8629 for (i = 0; i < nelt / 2; i++)
8631 unsigned elt = (i + high) & mask;
8632 if (d->perm[i * 2] != elt)
8634 elt = (elt + nelt) & mask;
8635 if (d->perm[i * 2 + 1] != elt)
8645 if (BYTES_BIG_ENDIAN)
8647 x = in0, in0 = in1, in1 = x;
8656 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8657 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8658 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8659 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8660 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8661 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8662 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8663 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8664 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8665 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8674 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8675 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8676 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8677 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8678 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8679 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8680 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8681 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8682 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8683 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8689 emit_insn (gen (out, in0, in1));
8694 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8696 rtx (*gen) (rtx, rtx, rtx);
8697 rtx out = d->target;
8699 enum machine_mode vmode = d->vmode;
8700 unsigned int i, elt, nelt = d->nelt;
8703 /* TODO: This may not be big-endian safe. */
8704 if (BYTES_BIG_ENDIAN)
8708 for (i = 1; i < nelt; i++)
8710 if (elt != d->perm[i])
8714 /* The generic preparation in aarch64_expand_vec_perm_const_1
8715 swaps the operand order and the permute indices if it finds
8716 d->perm[0] to be in the second operand. Thus, we can always
8717 use d->op0 and need not do any extra arithmetic to get the
8718 correct lane number. */
8720 lane = GEN_INT (elt);
8724 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8725 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8726 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8727 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8728 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8729 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8730 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8731 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8732 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8733 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8738 emit_insn (gen (out, in0, lane));
8743 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8745 rtx rperm[MAX_VECT_LEN], sel;
8746 enum machine_mode vmode = d->vmode;
8747 unsigned int i, nelt = d->nelt;
8749 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8750 numbering of elements for big-endian, we must reverse the order. */
8751 if (BYTES_BIG_ENDIAN)
8757 /* Generic code will try constant permutation twice. Once with the
8758 original mode and again with the elements lowered to QImode.
8759 So wait and don't do the selector expansion ourselves. */
8760 if (vmode != V8QImode && vmode != V16QImode)
8763 for (i = 0; i < nelt; ++i)
8764 rperm[i] = GEN_INT (d->perm[i]);
8765 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8766 sel = force_reg (vmode, sel);
8768 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8773 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8775 /* The pattern matching functions above are written to look for a small
8776 number to begin the sequence (0, 1, N/2). If we begin with an index
8777 from the second operand, we can swap the operands. */
8778 if (d->perm[0] >= d->nelt)
8780 unsigned i, nelt = d->nelt;
8783 for (i = 0; i < nelt; ++i)
8784 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8793 if (aarch64_evpc_zip (d))
8795 else if (aarch64_evpc_uzp (d))
8797 else if (aarch64_evpc_trn (d))
8799 else if (aarch64_evpc_dup (d))
8801 return aarch64_evpc_tbl (d);
8806 /* Expand a vec_perm_const pattern. */
8809 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8811 struct expand_vec_perm_d d;
8818 d.vmode = GET_MODE (target);
8819 gcc_assert (VECTOR_MODE_P (d.vmode));
8820 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8821 d.testing_p = false;
8823 for (i = which = 0; i < nelt; ++i)
8825 rtx e = XVECEXP (sel, 0, i);
8826 int ei = INTVAL (e) & (2 * nelt - 1);
8827 which |= (ei < nelt ? 1 : 2);
8837 d.one_vector_p = false;
8838 if (!rtx_equal_p (op0, op1))
8841 /* The elements of PERM do not suggest that only the first operand
8842 is used, but both operands are identical. Allow easier matching
8843 of the permutation by folding the permutation into the single
8847 for (i = 0; i < nelt; ++i)
8848 d.perm[i] &= nelt - 1;
8850 d.one_vector_p = true;
8855 d.one_vector_p = true;
8859 return aarch64_expand_vec_perm_const_1 (&d);
8863 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8864 const unsigned char *sel)
8866 struct expand_vec_perm_d d;
8867 unsigned int i, nelt, which;
8871 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8873 memcpy (d.perm, sel, nelt);
8875 /* Calculate whether all elements are in one vector. */
8876 for (i = which = 0; i < nelt; ++i)
8878 unsigned char e = d.perm[i];
8879 gcc_assert (e < 2 * nelt);
8880 which |= (e < nelt ? 1 : 2);
8883 /* If all elements are from the second vector, reindex as if from the
8886 for (i = 0; i < nelt; ++i)
8889 /* Check whether the mask can be applied to a single vector. */
8890 d.one_vector_p = (which != 3);
8892 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8893 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8894 if (!d.one_vector_p)
8895 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8898 ret = aarch64_expand_vec_perm_const_1 (&d);
8904 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8906 aarch64_cannot_change_mode_class (enum machine_mode from,
8907 enum machine_mode to,
8908 enum reg_class rclass)
8910 /* Full-reg subregs are allowed on general regs or any class if they are
8912 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8913 || !reg_classes_intersect_p (FP_REGS, rclass))
8916 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8917 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8918 2. Scalar to Scalar for integer modes or same size float modes.
8919 3. Vector to Vector modes. */
8920 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8922 if (aarch64_vector_mode_supported_p (from)
8923 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8926 if (GET_MODE_NUNITS (from) == 1
8927 && GET_MODE_NUNITS (to) == 1
8928 && (GET_MODE_CLASS (from) == MODE_INT
8932 if (aarch64_vector_mode_supported_p (from)
8933 && aarch64_vector_mode_supported_p (to))
8940 #undef TARGET_ADDRESS_COST
8941 #define TARGET_ADDRESS_COST aarch64_address_cost
8943 /* This hook will determines whether unnamed bitfields affect the alignment
8944 of the containing structure. The hook returns true if the structure
8945 should inherit the alignment requirements of an unnamed bitfield's
8947 #undef TARGET_ALIGN_ANON_BITFIELD
8948 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8950 #undef TARGET_ASM_ALIGNED_DI_OP
8951 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8953 #undef TARGET_ASM_ALIGNED_HI_OP
8954 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8956 #undef TARGET_ASM_ALIGNED_SI_OP
8957 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8959 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8960 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8961 hook_bool_const_tree_hwi_hwi_const_tree_true
8963 #undef TARGET_ASM_FILE_START
8964 #define TARGET_ASM_FILE_START aarch64_start_file
8966 #undef TARGET_ASM_OUTPUT_MI_THUNK
8967 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8969 #undef TARGET_ASM_SELECT_RTX_SECTION
8970 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8972 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8973 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8975 #undef TARGET_BUILD_BUILTIN_VA_LIST
8976 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8978 #undef TARGET_CALLEE_COPIES
8979 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8981 #undef TARGET_CAN_ELIMINATE
8982 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8984 #undef TARGET_CANNOT_FORCE_CONST_MEM
8985 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8987 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8988 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8990 /* Only the least significant bit is used for initialization guard
8992 #undef TARGET_CXX_GUARD_MASK_BIT
8993 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8995 #undef TARGET_C_MODE_FOR_SUFFIX
8996 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8998 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8999 #undef TARGET_DEFAULT_TARGET_FLAGS
9000 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9003 #undef TARGET_CLASS_MAX_NREGS
9004 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9006 #undef TARGET_BUILTIN_DECL
9007 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9009 #undef TARGET_EXPAND_BUILTIN
9010 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9012 #undef TARGET_EXPAND_BUILTIN_VA_START
9013 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9015 #undef TARGET_FOLD_BUILTIN
9016 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9018 #undef TARGET_FUNCTION_ARG
9019 #define TARGET_FUNCTION_ARG aarch64_function_arg
9021 #undef TARGET_FUNCTION_ARG_ADVANCE
9022 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9024 #undef TARGET_FUNCTION_ARG_BOUNDARY
9025 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9027 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9028 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9030 #undef TARGET_FUNCTION_VALUE
9031 #define TARGET_FUNCTION_VALUE aarch64_function_value
9033 #undef TARGET_FUNCTION_VALUE_REGNO_P
9034 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9036 #undef TARGET_FRAME_POINTER_REQUIRED
9037 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9039 #undef TARGET_GIMPLE_FOLD_BUILTIN
9040 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9042 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9043 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9045 #undef TARGET_INIT_BUILTINS
9046 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9048 #undef TARGET_LEGITIMATE_ADDRESS_P
9049 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9051 #undef TARGET_LEGITIMATE_CONSTANT_P
9052 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9054 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9055 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9058 #define TARGET_LRA_P aarch64_lra_p
9060 #undef TARGET_MANGLE_TYPE
9061 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9063 #undef TARGET_MEMORY_MOVE_COST
9064 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9066 #undef TARGET_MUST_PASS_IN_STACK
9067 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9069 /* This target hook should return true if accesses to volatile bitfields
9070 should use the narrowest mode possible. It should return false if these
9071 accesses should use the bitfield container type. */
9072 #undef TARGET_NARROW_VOLATILE_BITFIELD
9073 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9075 #undef TARGET_OPTION_OVERRIDE
9076 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9078 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9079 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9080 aarch64_override_options_after_change
9082 #undef TARGET_PASS_BY_REFERENCE
9083 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9085 #undef TARGET_PREFERRED_RELOAD_CLASS
9086 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9088 #undef TARGET_SECONDARY_RELOAD
9089 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9091 #undef TARGET_SHIFT_TRUNCATION_MASK
9092 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9094 #undef TARGET_SETUP_INCOMING_VARARGS
9095 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9097 #undef TARGET_STRUCT_VALUE_RTX
9098 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9100 #undef TARGET_REGISTER_MOVE_COST
9101 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9103 #undef TARGET_RETURN_IN_MEMORY
9104 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9106 #undef TARGET_RETURN_IN_MSB
9107 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9109 #undef TARGET_RTX_COSTS
9110 #define TARGET_RTX_COSTS aarch64_rtx_costs
9112 #undef TARGET_SCHED_ISSUE_RATE
9113 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9115 #undef TARGET_TRAMPOLINE_INIT
9116 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9118 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9119 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9121 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9122 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9124 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9125 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9127 #undef TARGET_VECTORIZE_ADD_STMT_COST
9128 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9130 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9131 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9132 aarch64_builtin_vectorization_cost
9134 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9135 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9137 #undef TARGET_VECTORIZE_BUILTINS
9138 #define TARGET_VECTORIZE_BUILTINS
9140 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9141 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9142 aarch64_builtin_vectorized_function
9144 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9145 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9146 aarch64_autovectorize_vector_sizes
9148 /* Section anchor support. */
9150 #undef TARGET_MIN_ANCHOR_OFFSET
9151 #define TARGET_MIN_ANCHOR_OFFSET -256
9153 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9154 byte offset; we can do much more for larger data types, but have no way
9155 to determine the size of the access. We assume accesses are aligned. */
9156 #undef TARGET_MAX_ANCHOR_OFFSET
9157 #define TARGET_MAX_ANCHOR_OFFSET 4095
9159 #undef TARGET_VECTOR_ALIGNMENT
9160 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9162 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9163 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9164 aarch64_simd_vector_alignment_reachable
9166 /* vec_perm support. */
9168 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9169 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9170 aarch64_vectorize_vec_perm_const_ok
9173 #undef TARGET_FIXED_CONDITION_CODE_REGS
9174 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9176 #undef TARGET_FLAGS_REGNUM
9177 #define TARGET_FLAGS_REGNUM CC_REGNUM
9179 struct gcc_target targetm = TARGET_INITIALIZER;
9181 #include "gt-aarch64.h"