1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "hard-reg-set.h"
37 #include "target-def.h"
38 #include "targhooks.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
49 /* Classifies an address.
52 A simple base register plus immediate offset.
55 A base register indexed by immediate offset with writeback.
58 A base register indexed by (optionally scaled) register.
61 A base register indexed by (optionally scaled) zero-extended register.
64 A base register indexed by (optionally scaled) sign-extended register.
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
70 A constant symbolic address, in pc-relative literal pool. */
72 enum aarch64_address_type {
82 struct aarch64_address_info {
83 enum aarch64_address_type type;
87 enum aarch64_symbol_type symbol_type;
90 /* The current code model. */
91 enum aarch64_code_model aarch64_cmodel;
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
101 enum machine_mode *, int *,
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
116 /* The processor for which instructions should be scheduled. */
117 enum aarch64_processor aarch64_tune = generic;
119 /* The current tuning set. */
120 const struct tune_params *aarch64_tune_params;
122 /* Mask to specify which instructions we are allowed to generate. */
123 unsigned long aarch64_isa_flags = 0;
125 /* Mask to specify which instruction scheduling options should be used. */
126 unsigned long aarch64_tune_flags = 0;
128 /* Tuning parameters. */
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
133 #define NAMED_PARAM(NAME, VAL) (VAL)
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
158 static const struct cpu_addrcost_table generic_addrcost_table =
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 static const struct cpu_regmove_cost generic_regmove_cost =
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
184 static const struct tune_params generic_tunings =
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
192 /* A processor implementing AArch64. */
195 const char *const name;
196 enum aarch64_processor core;
198 const unsigned long flags;
199 const struct tune_params *const tune;
202 /* Processor cores implementing AArch64. */
203 static const struct processor all_cores[] =
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
213 /* Architectures implementing AArch64. */
214 static const struct processor all_architectures[] =
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
224 /* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
232 /* An ISA extension in the co-processor and main instruction set space. */
233 struct aarch64_option_extension
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
240 /* ISA extensions in AArch64. */
241 static const struct aarch64_option_extension all_extensions[] =
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
250 /* Used to track the size of an address when generating a pre/post
251 increment address. */
252 static enum machine_mode aarch64_memory_reference_mode;
254 /* Used to force GTY into this file. */
255 static GTY(()) int gty_dummy;
257 /* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
260 #define AARCH64_NUM_BITMASKS 5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
263 /* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
267 typedef enum aarch64_cond_code
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
277 /* The condition codes of the processor, and the inverse function. */
278 static const char * const aarch64_condition_codes[] =
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
284 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
286 aarch64_dbx_register_number (unsigned regno)
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
300 /* Return TRUE if MODE is any of the large INT modes. */
302 aarch64_vect_struct_mode_p (enum machine_mode mode)
304 return mode == OImode || mode == CImode || mode == XImode;
307 /* Return TRUE if MODE is any of the vector modes. */
309 aarch64_vector_mode_p (enum machine_mode mode)
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
328 /* Implement HARD_REGNO_NREGS. */
331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
333 switch (aarch64_regno_regclass (regno))
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
344 /* Implement HARD_REGNO_MODE_OK. */
347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
359 if (FP_REGNUM_P (regno))
361 if (aarch64_vect_struct_mode_p (mode))
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
371 /* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
379 /* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
382 aarch64_is_long_call_p (rtx sym)
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
387 /* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
396 HOST_WIDE_INT mult_val, extract_val;
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
414 /* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
417 emit_set_insn (rtx x, rtx y)
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
422 /* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
434 /* Build the SYMBOL_REF for __tls_get_addr. */
436 static GTY(()) rtx tls_get_addr_libfunc;
439 aarch64_tls_get_addr (void)
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
446 /* Return the TLS model to use for ADDR. */
448 static enum tls_model
449 tls_symbolic_operand_type (rtx addr)
451 enum tls_model tls_kind = TLS_MODEL_NONE;
454 if (GET_CODE (addr) == CONST)
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
483 Global Dynamic - Traditional TLS:
485 add dest, tmp, #:tlsgd_lo12:imm
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
514 case SYMBOL_SMALL_ABSOLUTE:
517 if (can_create_pseudo_p ())
519 tmp_reg = gen_reg_rtx (Pmode);
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
527 case SYMBOL_SMALL_GOT:
530 if (can_create_pseudo_p ())
532 tmp_reg = gen_reg_rtx (Pmode);
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
539 case SYMBOL_SMALL_TLSGD:
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
554 case SYMBOL_SMALL_TLSDESC:
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
566 case SYMBOL_SMALL_GOTTPREL:
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
576 case SYMBOL_SMALL_TPREL:
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
589 /* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
595 aarch64_emit_move (rtx dest, rtx src)
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
603 aarch64_split_128bit_move (rtx dst, rtx src)
607 gcc_assert (GET_MODE (dst) == TImode);
609 if (REG_P (dst) && REG_P (src))
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
614 gcc_assert (GET_MODE (src) == TImode);
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
633 /* Fall through to r -> r cases. */
636 low_dst = gen_lowpart (word_mode, dst);
638 && reg_overlap_mentioned_p (low_dst, src))
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
653 aarch64_split_128bit_move_p (rtx dst, rtx src)
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
660 aarch64_force_temporary (rtx x, rtx value)
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
666 x = aarch64_emit_move (x, value);
673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
685 return plus_constant (mode, reg, offset);
689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
695 unsigned HOST_WIDE_INT val;
698 int one_match, zero_match;
700 gcc_assert (mode == SImode || mode == DImode);
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
727 mem = force_const_mem (mode, imm);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
762 if (!CONST_INT_P (imm))
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
768 rtx mem = force_const_mem (mode, imm);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
787 /* Remaining cases are all for DImode. */
790 subtargets = optimize && can_create_pseudo_p ();
796 for (i = 0; i < 64; i += 16, mask <<= 16)
798 if ((val & mask) == 0)
800 else if ((val & mask) == mask)
807 for (i = 0; i < 64; i += 16, mask <<= 16)
809 if ((val & mask) != mask)
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
821 goto simple_sequence;
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
828 if (aarch64_uimm12_shift (val - (val & mask)))
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
869 /* See if we can do it by arithmetically combining two
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
887 for (j = 0; j < 64; j += 16, mask <<= 16)
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
918 else if ((val & aarch64_bitmasks[i]) == val)
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
939 for (i = 0; i < 64; i += 16, mask <<= 16)
941 if ((val & mask) != 0)
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
959 /* Indirect calls are not currently supported. */
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
972 /* Implement TARGET_PASS_BY_REFERENCE. */
975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
978 bool named ATTRIBUTE_UNUSED)
981 enum machine_mode dummymode;
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
998 /* Variable sized arguments are always returned by reference. */
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1011 return size > 2 * UNITS_PER_WORD;
1014 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1016 aarch64_return_in_msb (const_tree valtype)
1018 enum machine_mode dummy_mode;
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1043 /* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1047 aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1050 enum machine_mode mode;
1053 enum machine_mode ag_mode;
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1059 if (aarch64_return_in_msb (type))
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1063 if (size % UNITS_PER_WORD != 0)
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1073 if (!aarch64_composite_type_p (type, mode))
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1095 return gen_rtx_REG (mode, R0_REGNUM);
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1103 aarch64_function_value_regno_p (const unsigned int regno)
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1121 If the type T of the result of a function is such that
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1132 enum machine_mode ag_mode;
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1160 &pcum->aapcs_vfp_rmode,
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1173 unsigned int alignment;
1177 if (!integer_zerop (TYPE_SIZE (type)))
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1182 alignment = GET_MODE_ALIGNMENT (mode);
1188 alignment = GET_MODE_ALIGNMENT (mode);
1193 /* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1199 bool named ATTRIBUTE_UNUSED)
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1205 /* We need to do this once per argument. */
1206 if (pcum->aapcs_arg_processed)
1209 pcum->aapcs_arg_processed = true;
1211 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1217 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218 The following code thus handles passing by SIMD/FP registers first. */
1220 nvrn = pcum->aapcs_nvrn;
1222 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223 and homogenous short-vector aggregates (HVA). */
1226 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1228 pcum->aapcs_nextnvrn = nvrn + nregs;
1229 if (!aarch64_composite_type_p (type, mode))
1231 gcc_assert (nregs == 1);
1232 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1238 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 for (i = 0; i < nregs; i++)
1241 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 V0_REGNUM + nvrn + i);
1243 tmp = gen_rtx_EXPR_LIST
1245 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 XVECEXP (par, 0, i) = tmp;
1248 pcum->aapcs_reg = par;
1254 /* C.3 NSRN is set to 8. */
1255 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1260 ncrn = pcum->aapcs_ncrn;
1261 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1265 /* C6 - C9. though the sign and zero extension semantics are
1266 handled elsewhere. This is the case where the argument fits
1267 entirely general registers. */
1268 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1270 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1272 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1274 /* C.8 if the argument has an alignment of 16 then the NGRN is
1275 rounded up to the next even number. */
1276 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1279 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1281 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282 A reg is still generated for it, but the caller should be smart
1283 enough not to use it. */
1284 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1286 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 for (i = 0; i < nregs; i++)
1296 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * UNITS_PER_WORD));
1299 XVECEXP (par, 0, i) = tmp;
1301 pcum->aapcs_reg = par;
1304 pcum->aapcs_nextncrn = ncrn + nregs;
1309 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1311 /* The argument is passed on stack; record the needed number of words for
1312 this argument (we can re-use NREGS) and align the total size if
1315 pcum->aapcs_stack_words = nregs;
1316 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 16 / UNITS_PER_WORD) + 1;
1322 /* Implement TARGET_FUNCTION_ARG. */
1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 const_tree type, bool named)
1328 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1331 if (mode == VOIDmode)
1334 aarch64_layout_arg (pcum_v, mode, type, named);
1335 return pcum->aapcs_reg;
1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 const_tree fntype ATTRIBUTE_UNUSED,
1341 rtx libname ATTRIBUTE_UNUSED,
1342 const_tree fndecl ATTRIBUTE_UNUSED,
1343 unsigned n_named ATTRIBUTE_UNUSED)
1345 pcum->aapcs_ncrn = 0;
1346 pcum->aapcs_nvrn = 0;
1347 pcum->aapcs_nextncrn = 0;
1348 pcum->aapcs_nextnvrn = 0;
1349 pcum->pcs_variant = ARM_PCS_AAPCS64;
1350 pcum->aapcs_reg = NULL_RTX;
1351 pcum->aapcs_arg_processed = false;
1352 pcum->aapcs_stack_words = 0;
1353 pcum->aapcs_stack_size = 0;
1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 enum machine_mode mode,
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1367 aarch64_layout_arg (pcum_v, mode, type, named);
1368 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 != (pcum->aapcs_stack_words != 0));
1370 pcum->aapcs_arg_processed = false;
1371 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374 pcum->aapcs_stack_words = 0;
1375 pcum->aapcs_reg = NULL_RTX;
1380 aarch64_function_arg_regno_p (unsigned regno)
1382 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1386 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1387 PARM_BOUNDARY bits of alignment, but will be given anything up
1388 to STACK_BOUNDARY bits if the type requires it. This makes sure
1389 that both before and after the layout of each argument, the Next
1390 Stacked Argument Address (NSAA) will have a minimum alignment of
1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1396 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1398 if (alignment < PARM_BOUNDARY)
1399 alignment = PARM_BOUNDARY;
1400 if (alignment > STACK_BOUNDARY)
1401 alignment = STACK_BOUNDARY;
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1407 Return true if an argument passed on the stack should be padded upwards,
1408 i.e. if the least-significant byte of the stack slot has useful data.
1410 Small aggregate types are placed in the lowest memory address.
1412 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1417 /* On little-endian targets, the least significant byte of every stack
1418 argument is passed at the lowest byte address of the stack slot. */
1419 if (!BYTES_BIG_ENDIAN)
1422 /* Otherwise, integral types and floating point types are padded downward:
1423 the least significant byte of a stack argument is passed at the highest
1424 byte address of the stack slot. */
1426 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1430 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1436 It specifies padding for the last (may also be the only)
1437 element of a block move between registers and memory. If
1438 assuming the block is in the memory, padding upward means that
1439 the last element is padded after its highest significant byte,
1440 while in downward padding, the last element is padded at the
1441 its least significant byte side.
1443 Small aggregates and small complex types are always padded
1446 We don't need to worry about homogeneous floating-point or
1447 short-vector aggregates; their move is not affected by the
1448 padding direction determined here. Regardless of endianness,
1449 each element of such an aggregate is put in the least
1450 significant bits of a fp/simd register.
1452 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453 register has useful data, and return the opposite if the most
1454 significant byte does. */
1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 bool first ATTRIBUTE_UNUSED)
1461 /* Small composite types are always padded upward. */
1462 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1464 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 : GET_MODE_SIZE (mode));
1466 if (size < 2 * UNITS_PER_WORD)
1470 /* Otherwise, use the default padding. */
1471 return !BYTES_BIG_ENDIAN;
1474 static enum machine_mode
1475 aarch64_libgcc_cmp_return_mode (void)
1481 aarch64_frame_pointer_required (void)
1483 /* If the function contains dynamic stack allocations, we need to
1484 use the frame pointer to access the static parts of the frame. */
1485 if (cfun->calls_alloca)
1488 /* We may have turned flag_omit_frame_pointer on in order to have this
1489 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490 and we'll check it here.
1491 If we really did set flag_omit_frame_pointer normally, then we return false
1492 (no frame pointer required) in all cases. */
1494 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1496 else if (flag_omit_leaf_frame_pointer)
1497 return !crtl->is_leaf;
1501 /* Mark the registers that need to be saved by the callee and calculate
1502 the size of the callee-saved registers area and frame record (both FP
1503 and LR may be omitted). */
1505 aarch64_layout_frame (void)
1507 HOST_WIDE_INT offset = 0;
1510 if (reload_completed && cfun->machine->frame.laid_out)
1513 cfun->machine->frame.fp_lr_offset = 0;
1515 /* First mark all the registers that really need to be saved... */
1516 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1519 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1522 /* ... that includes the eh data registers (if needed)... */
1523 if (crtl->calls_eh_return)
1524 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1527 /* ... and any callee saved register that dataflow says is live. */
1528 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529 if (df_regs_ever_live_p (regno)
1530 && !call_used_regs[regno])
1531 cfun->machine->frame.reg_offset[regno] = 0;
1533 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534 if (df_regs_ever_live_p (regno)
1535 && !call_used_regs[regno])
1536 cfun->machine->frame.reg_offset[regno] = 0;
1538 if (frame_pointer_needed)
1540 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1545 /* Now assign stack slots for them. */
1546 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547 if (cfun->machine->frame.reg_offset[regno] != -1)
1549 cfun->machine->frame.reg_offset[regno] = offset;
1550 offset += UNITS_PER_WORD;
1553 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554 if (cfun->machine->frame.reg_offset[regno] != -1)
1556 cfun->machine->frame.reg_offset[regno] = offset;
1557 offset += UNITS_PER_WORD;
1560 if (frame_pointer_needed)
1562 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563 offset += UNITS_PER_WORD;
1564 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1567 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1569 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570 offset += UNITS_PER_WORD;
1571 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1574 cfun->machine->frame.padding0 =
1575 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1578 cfun->machine->frame.saved_regs_size = offset;
1579 cfun->machine->frame.laid_out = true;
1582 /* Make the last instruction frame-related and note that it performs
1583 the operation described by FRAME_PATTERN. */
1586 aarch64_set_frame_expr (rtx frame_pattern)
1590 insn = get_last_insn ();
1591 RTX_FRAME_RELATED_P (insn) = 1;
1592 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1599 aarch64_register_saved_on_entry (int regno)
1601 return cfun->machine->frame.reg_offset[regno] != -1;
1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 bool restore, rtx base_rtx)
1613 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1616 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1618 if (aarch64_register_saved_on_entry (regno))
1621 mem = gen_mem_ref (DFmode,
1622 plus_constant (Pmode,
1626 for (regno2 = regno + 1;
1627 regno2 <= V31_REGNUM
1628 && !aarch64_register_saved_on_entry (regno2);
1633 if (regno2 <= V31_REGNUM &&
1634 aarch64_register_saved_on_entry (regno2))
1637 /* Next highest register to be saved. */
1638 mem2 = gen_mem_ref (DFmode,
1642 start_offset + increment));
1643 if (restore == false)
1646 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 mem2, gen_rtx_REG (DFmode, regno2)));
1653 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 gen_rtx_REG (DFmode, regno2), mem2));
1656 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1660 /* The first part of a frame-related parallel insn
1661 is always assumed to be relevant to the frame
1662 calculations; subsequent parts, are only
1663 frame-related if explicitly marked. */
1664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1667 start_offset += increment * 2;
1671 if (restore == false)
1672 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1675 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1678 start_offset += increment;
1680 RTX_FRAME_RELATED_P (insn) = 1;
1687 /* offset from the stack pointer of where the saves and
1688 restore's have to happen. */
1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1694 rtx base_rtx = stack_pointer_rtx;
1695 HOST_WIDE_INT start_offset = offset;
1696 HOST_WIDE_INT increment = UNITS_PER_WORD;
1697 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1702 for (regno = R0_REGNUM; regno <= limit; regno++)
1704 if (aarch64_register_saved_on_entry (regno))
1707 mem = gen_mem_ref (Pmode,
1708 plus_constant (Pmode,
1712 for (regno2 = regno + 1;
1714 && !aarch64_register_saved_on_entry (regno2);
1719 if (regno2 <= limit &&
1720 aarch64_register_saved_on_entry (regno2))
1723 /* Next highest register to be saved. */
1724 mem2 = gen_mem_ref (Pmode,
1728 start_offset + increment));
1729 if (restore == false)
1732 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 mem2, gen_rtx_REG (DImode, regno2)));
1739 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 gen_rtx_REG (DImode, regno2), mem2));
1742 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1746 /* The first part of a frame-related parallel insn
1747 is always assumed to be relevant to the frame
1748 calculations; subsequent parts, are only
1749 frame-related if explicitly marked. */
1750 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1753 start_offset += increment * 2;
1757 if (restore == false)
1758 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1761 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1764 start_offset += increment;
1766 RTX_FRAME_RELATED_P (insn) = 1;
1770 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1774 /* AArch64 stack frames generated by this compiler look like:
1776 +-------------------------------+
1778 | incoming stack arguments |
1780 +-------------------------------+ <-- arg_pointer_rtx
1782 | callee-allocated save area |
1783 | for register varargs |
1785 +-------------------------------+
1789 +-------------------------------+ <-- frame_pointer_rtx
1791 | callee-saved registers |
1793 +-------------------------------+
1795 +-------------------------------+
1797 P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 | dynamic allocation |
1799 +-------------------------------+
1801 | outgoing stack arguments |
1803 +-------------------------------+ <-- stack_pointer_rtx
1805 Dynamic stack allocations such as alloca insert data at point P.
1806 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807 hard_frame_pointer_rtx unchanged. */
1809 /* Generate the prologue instructions for entry into a function.
1810 Establish the stack frame by decreasing the stack pointer with a
1811 properly calculated size and, if necessary, create a frame record
1812 filled with the values of LR and previous frame pointer. The
1813 current FP is also set up is it is in use. */
1816 aarch64_expand_prologue (void)
1818 /* sub sp, sp, #<frame_size>
1819 stp {fp, lr}, [sp, #<frame_size> - 16]
1820 add fp, sp, #<frame_size> - hardfp_offset
1821 stp {cs_reg}, [fp, #-16] etc.
1823 sub sp, sp, <final_adjustment_if_any>
1825 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1826 HOST_WIDE_INT frame_size, offset;
1827 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1830 aarch64_layout_frame ();
1831 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 + crtl->outgoing_args_size);
1836 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 STACK_BOUNDARY / BITS_PER_UNIT);
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = frame_size;
1843 - original_frame_size
1844 - cfun->machine->frame.saved_regs_size);
1846 /* Store pairs and load pairs have a range only -512 to 504. */
1849 /* When the frame has a large size, an initial decrease is done on
1850 the stack pointer to jump over the callee-allocated save area for
1851 register varargs, the local variable area and/or the callee-saved
1852 register area. This will allow the pre-index write-back
1853 store pair instructions to be used for setting up the stack frame
1855 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1857 offset = cfun->machine->frame.saved_regs_size;
1859 frame_size -= (offset + crtl->outgoing_args_size);
1862 if (frame_size >= 0x1000000)
1864 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 emit_move_insn (op0, GEN_INT (-frame_size));
1866 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 aarch64_set_frame_expr (gen_rtx_SET
1868 (Pmode, stack_pointer_rtx,
1869 gen_rtx_PLUS (Pmode,
1871 GEN_INT (-frame_size))));
1873 else if (frame_size > 0)
1875 if ((frame_size & 0xfff) != frame_size)
1877 insn = emit_insn (gen_add2_insn
1879 GEN_INT (-(frame_size
1880 & ~(HOST_WIDE_INT)0xfff))));
1881 RTX_FRAME_RELATED_P (insn) = 1;
1883 if ((frame_size & 0xfff) != 0)
1885 insn = emit_insn (gen_add2_insn
1887 GEN_INT (-(frame_size
1888 & (HOST_WIDE_INT)0xfff))));
1889 RTX_FRAME_RELATED_P (insn) = 1;
1898 /* Save the frame pointer and lr if the frame pointer is needed
1899 first. Make the frame pointer point to the location of the
1900 old frame pointer on the stack. */
1901 if (frame_pointer_needed)
1907 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 GEN_INT (-offset)));
1909 RTX_FRAME_RELATED_P (insn) = 1;
1910 aarch64_set_frame_expr (gen_rtx_SET
1911 (Pmode, stack_pointer_rtx,
1912 gen_rtx_MINUS (Pmode,
1914 GEN_INT (offset))));
1915 mem_fp = gen_frame_mem (DImode,
1916 plus_constant (Pmode,
1919 mem_lr = gen_frame_mem (DImode,
1920 plus_constant (Pmode,
1924 insn = emit_insn (gen_store_pairdi (mem_fp,
1925 hard_frame_pointer_rtx,
1927 gen_rtx_REG (DImode,
1932 insn = emit_insn (gen_storewb_pairdi_di
1933 (stack_pointer_rtx, stack_pointer_rtx,
1934 hard_frame_pointer_rtx,
1935 gen_rtx_REG (DImode, LR_REGNUM),
1937 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1941 /* The first part of a frame-related parallel insn is always
1942 assumed to be relevant to the frame calculations;
1943 subsequent parts, are only frame-related if explicitly
1945 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 RTX_FRAME_RELATED_P (insn) = 1;
1948 /* Set up frame pointer to point to the location of the
1949 previous frame pointer on the stack. */
1950 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1952 GEN_INT (fp_offset)));
1953 aarch64_set_frame_expr (gen_rtx_SET
1954 (Pmode, hard_frame_pointer_rtx,
1955 gen_rtx_PLUS (Pmode,
1957 GEN_INT (fp_offset))));
1958 RTX_FRAME_RELATED_P (insn) = 1;
1959 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 hard_frame_pointer_rtx));
1964 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 GEN_INT (-offset)));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1969 aarch64_save_or_restore_callee_save_registers
1970 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1973 /* when offset >= 512,
1974 sub sp, sp, #<outgoing_args_size> */
1975 if (frame_size > -1)
1977 if (crtl->outgoing_args_size > 0)
1979 insn = emit_insn (gen_add2_insn
1981 GEN_INT (- crtl->outgoing_args_size)));
1982 RTX_FRAME_RELATED_P (insn) = 1;
1987 /* Generate the epilogue instructions for returning from a function. */
1989 aarch64_expand_epilogue (bool for_sibcall)
1991 HOST_WIDE_INT original_frame_size, frame_size, offset;
1992 HOST_WIDE_INT fp_offset;
1996 aarch64_layout_frame ();
1997 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 + crtl->outgoing_args_size);
2000 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 STACK_BOUNDARY / BITS_PER_UNIT);
2004 - original_frame_size
2005 - cfun->machine->frame.saved_regs_size);
2007 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2009 /* Store pairs and load pairs have a range only -512 to 504. */
2012 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2014 offset = cfun->machine->frame.saved_regs_size;
2016 frame_size -= (offset + crtl->outgoing_args_size);
2018 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2020 insn = emit_insn (gen_add2_insn
2022 GEN_INT (crtl->outgoing_args_size)));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2029 /* If there were outgoing arguments or we've done dynamic stack
2030 allocation, then restore the stack pointer from the frame
2031 pointer. This is at most one insn and more efficient than using
2032 GCC's internal mechanism. */
2033 if (frame_pointer_needed
2034 && (crtl->outgoing_args_size || cfun->calls_alloca))
2036 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 hard_frame_pointer_rtx,
2038 GEN_INT (- fp_offset)));
2039 RTX_FRAME_RELATED_P (insn) = 1;
2040 /* As SP is set to (FP - fp_offset), according to the rules in
2041 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 from the value of SP from now on. */
2043 cfa_reg = stack_pointer_rtx;
2046 aarch64_save_or_restore_callee_save_registers
2047 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2049 /* Restore the frame pointer and lr if the frame pointer is needed. */
2052 if (frame_pointer_needed)
2058 mem_fp = gen_frame_mem (DImode,
2059 plus_constant (Pmode,
2062 mem_lr = gen_frame_mem (DImode,
2063 plus_constant (Pmode,
2067 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2069 gen_rtx_REG (DImode,
2075 insn = emit_insn (gen_loadwb_pairdi_di
2078 hard_frame_pointer_rtx,
2079 gen_rtx_REG (DImode, LR_REGNUM),
2081 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085 plus_constant (Pmode, cfa_reg,
2089 /* The first part of a frame-related parallel insn
2090 is always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 add_reg_note (insn, REG_CFA_RESTORE,
2097 gen_rtx_REG (DImode, LR_REGNUM));
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2103 RTX_FRAME_RELATED_P (insn) = 1;
2108 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2110 RTX_FRAME_RELATED_P (insn) = 1;
2114 /* Stack adjustment for exception handler. */
2115 if (crtl->calls_eh_return)
2117 /* We need to unwind the stack by the offset computed by
2118 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2119 based on SP. Ideally we would update the SP and define the
2120 CFA along the lines of:
2122 SP = SP + EH_RETURN_STACKADJ_RTX
2123 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2125 However the dwarf emitter only understands a constant
2128 The solution choosen here is to use the otherwise unused IP0
2129 as a temporary register to hold the current SP value. The
2130 CFA is described using IP0 then SP is modified. */
2132 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2134 insn = emit_move_insn (ip0, stack_pointer_rtx);
2135 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136 RTX_FRAME_RELATED_P (insn) = 1;
2138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2140 /* Ensure the assignment to IP0 does not get optimized away. */
2144 if (frame_size > -1)
2146 if (frame_size >= 0x1000000)
2148 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 emit_move_insn (op0, GEN_INT (frame_size));
2150 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 aarch64_set_frame_expr (gen_rtx_SET
2152 (Pmode, stack_pointer_rtx,
2153 gen_rtx_PLUS (Pmode,
2155 GEN_INT (frame_size))));
2157 else if (frame_size > 0)
2159 if ((frame_size & 0xfff) != 0)
2161 insn = emit_insn (gen_add2_insn
2163 GEN_INT ((frame_size
2164 & (HOST_WIDE_INT) 0xfff))));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2167 if ((frame_size & 0xfff) != frame_size)
2169 insn = emit_insn (gen_add2_insn
2171 GEN_INT ((frame_size
2172 & ~ (HOST_WIDE_INT) 0xfff))));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2177 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 gen_rtx_PLUS (Pmode,
2180 GEN_INT (offset))));
2183 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2185 emit_jump_insn (ret_rtx);
2188 /* Return the place to copy the exception unwinding return address to.
2189 This will probably be a stack slot, but could (in theory be the
2190 return register). */
2192 aarch64_final_eh_return_addr (void)
2194 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2202 - original_frame_size
2203 - cfun->machine->frame.saved_regs_size;
2205 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206 return gen_rtx_REG (DImode, LR_REGNUM);
2208 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2209 result in a store to save LR introduced by builtin_eh_return () being
2210 incorrectly deleted because the alias is not detected.
2211 So in the calculation of the address to copy the exception unwinding
2212 return address to, we note 2 cases.
2213 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214 we return a SP-relative location since all the addresses are SP-relative
2215 in this case. This prevents the store from being optimized away.
2216 If the fp_offset is not 0, then the addresses will be FP-relative and
2217 therefore we return a FP-relative location. */
2219 if (frame_pointer_needed)
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2229 /* If FP is not needed, we calculate the location of LR, which would be
2230 at the top of the saved registers block. */
2232 return gen_frame_mem (DImode,
2233 plus_constant (Pmode,
2236 + cfun->machine->frame.saved_regs_size
2237 - 2 * UNITS_PER_WORD));
2240 /* Output code to build up a constant in a register. */
2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2244 if (aarch64_bitmask_imm (val, DImode))
2245 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2251 HOST_WIDE_INT valp = val >> 16;
2255 for (i = 16; i < 64; i += 16)
2257 valm = (valp & 0xffff);
2268 /* zcount contains the number of additional MOVK instructions
2269 required if the constant is built up with an initial MOVZ instruction,
2270 while ncount is the number of MOVK instructions required if starting
2271 with a MOVN instruction. Choose the sequence that yields the fewest
2272 number of instructions, preferring MOVZ instructions when they are both
2274 if (ncount < zcount)
2276 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 GEN_INT ((~val) & 0xffff));
2282 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 GEN_INT (val & 0xffff));
2289 for (i = 16; i < 64; i += 16)
2291 if ((val & 0xffff) != tval)
2292 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 GEN_INT (i), GEN_INT (val & 0xffff)));
2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2302 HOST_WIDE_INT mdelta = delta;
2303 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2309 if (mdelta >= 4096 * 4096)
2311 aarch64_build_constant (scratchreg, delta);
2312 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2314 else if (mdelta > 0)
2318 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2327 if (mdelta % 4096 != 0)
2329 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2336 /* Output code to add DELTA to the first argument, and then jump
2337 to FUNCTION. Used for C++ multiple inheritance. */
2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 HOST_WIDE_INT delta,
2341 HOST_WIDE_INT vcall_offset,
2344 /* The this pointer is always in x0. Note that this differs from
2345 Arm where the this pointer maybe bumped to r1 if r0 is required
2346 to return a pointer to an aggregate. On AArch64 a result value
2347 pointer will be in x8. */
2348 int this_regno = R0_REGNUM;
2349 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2351 reload_completed = 1;
2352 emit_note (NOTE_INSN_PROLOGUE_END);
2354 if (vcall_offset == 0)
2355 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2358 gcc_assert ((vcall_offset & 0x7) == 0);
2360 this_rtx = gen_rtx_REG (Pmode, this_regno);
2361 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2367 if (delta >= -256 && delta < 256)
2368 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 plus_constant (Pmode, this_rtx, delta));
2371 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2374 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2376 if (vcall_offset >= -256 && vcall_offset < 32768)
2377 addr = plus_constant (Pmode, temp0, vcall_offset);
2380 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2384 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385 emit_insn (gen_add2_insn (this_rtx, temp1));
2388 /* Generate a tail call to the target function. */
2389 if (!TREE_USED (function))
2391 assemble_external (function);
2392 TREE_USED (function) = 1;
2394 funexp = XEXP (DECL_RTL (function), 0);
2395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397 SIBLING_CALL_P (insn) = 1;
2399 insn = get_insns ();
2400 shorten_branches (insn);
2401 final_start_function (insn, file, 1);
2402 final (insn, file, 1);
2403 final_end_function ();
2405 /* Stop pretending to be a post-reload pass. */
2406 reload_completed = 0;
2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2412 if (GET_CODE (*x) == SYMBOL_REF)
2413 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2415 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416 TLS offsets, not real symbol references. */
2417 if (GET_CODE (*x) == UNSPEC
2418 && XINT (*x, 1) == UNSPEC_TLS)
2425 aarch64_tls_referenced_p (rtx x)
2427 if (!TARGET_HAVE_TLS)
2430 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2437 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2449 aarch64_build_bitmask_table (void)
2451 unsigned HOST_WIDE_INT mask, imm;
2452 unsigned int log_e, e, s, r;
2453 unsigned int nimms = 0;
2455 for (log_e = 1; log_e <= 6; log_e++)
2459 mask = ~(HOST_WIDE_INT) 0;
2461 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462 for (s = 1; s < e; s++)
2464 for (r = 0; r < e; r++)
2466 /* set s consecutive bits to 1 (s < 64) */
2467 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 /* rotate right by r */
2470 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 /* replicate the constant depending on SIMD size */
2473 case 1: imm |= (imm << 2);
2474 case 2: imm |= (imm << 4);
2475 case 3: imm |= (imm << 8);
2476 case 4: imm |= (imm << 16);
2477 case 5: imm |= (imm << 32);
2483 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 aarch64_bitmasks[nimms++] = imm;
2489 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 aarch64_bitmasks_cmp);
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496 a left shift of 0 or 12 bits. */
2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2500 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2506 /* Return true if val is an immediate that can be loaded into a
2507 register by a MOVZ instruction. */
2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2511 if (GET_MODE_SIZE (mode) > 4)
2513 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2519 /* Ignore sign extension. */
2520 val &= (HOST_WIDE_INT) 0xffffffff;
2522 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2527 /* Return true if val is a valid bitmask immediate. */
2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2531 if (GET_MODE_SIZE (mode) < 8)
2533 /* Replicate bit pattern. */
2534 val &= (HOST_WIDE_INT) 0xffffffff;
2537 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2542 /* Return true if val is an immediate that can be loaded into a
2543 register in a single instruction. */
2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2547 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2549 return aarch64_bitmask_imm (val, mode);
2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2556 if (GET_CODE (x) == HIGH)
2559 split_const (x, &base, &offset);
2560 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2563 return aarch64_tls_referenced_p (x);
2566 /* Return true if register REGNO is a valid index register.
2567 STRICT_P is true if REG_OK_STRICT is in effect. */
2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2572 if (!HARD_REGISTER_NUM_P (regno))
2580 regno = reg_renumber[regno];
2582 return GP_REGNUM_P (regno);
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586 STRICT_P is true if REG_OK_STRICT is in effect. */
2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2591 if (!HARD_REGISTER_NUM_P (regno))
2599 regno = reg_renumber[regno];
2602 /* The fake registers will be eliminated to either the stack or
2603 hard frame pointer, both of which are usually valid base registers.
2604 Reload deals with the cases where the eliminated form isn't valid. */
2605 return (GP_REGNUM_P (regno)
2606 || regno == SP_REGNUM
2607 || regno == FRAME_POINTER_REGNUM
2608 || regno == ARG_POINTER_REGNUM);
2611 /* Return true if X is a valid base register for mode MODE.
2612 STRICT_P is true if REG_OK_STRICT is in effect. */
2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2617 if (!strict_p && GET_CODE (x) == SUBREG)
2620 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2623 /* Return true if address offset is a valid index. If it is, fill in INFO
2624 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 enum machine_mode mode, bool strict_p)
2630 enum aarch64_address_type type;
2635 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636 && GET_MODE (x) == Pmode)
2638 type = ADDRESS_REG_REG;
2642 /* (sign_extend:DI (reg:SI)) */
2643 else if ((GET_CODE (x) == SIGN_EXTEND
2644 || GET_CODE (x) == ZERO_EXTEND)
2645 && GET_MODE (x) == DImode
2646 && GET_MODE (XEXP (x, 0)) == SImode)
2648 type = (GET_CODE (x) == SIGN_EXTEND)
2649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650 index = XEXP (x, 0);
2653 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654 else if (GET_CODE (x) == MULT
2655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 && GET_MODE (XEXP (x, 0)) == DImode
2658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 && CONST_INT_P (XEXP (x, 1)))
2661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663 index = XEXP (XEXP (x, 0), 0);
2664 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2666 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667 else if (GET_CODE (x) == ASHIFT
2668 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 && GET_MODE (XEXP (x, 0)) == DImode
2671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 && CONST_INT_P (XEXP (x, 1)))
2674 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676 index = XEXP (XEXP (x, 0), 0);
2677 shift = INTVAL (XEXP (x, 1));
2679 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680 else if ((GET_CODE (x) == SIGN_EXTRACT
2681 || GET_CODE (x) == ZERO_EXTRACT)
2682 && GET_MODE (x) == DImode
2683 && GET_CODE (XEXP (x, 0)) == MULT
2684 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2687 type = (GET_CODE (x) == SIGN_EXTRACT)
2688 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689 index = XEXP (XEXP (x, 0), 0);
2690 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691 if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 || INTVAL (XEXP (x, 2)) != 0)
2695 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696 (const_int 0xffffffff<<shift)) */
2697 else if (GET_CODE (x) == AND
2698 && GET_MODE (x) == DImode
2699 && GET_CODE (XEXP (x, 0)) == MULT
2700 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 && CONST_INT_P (XEXP (x, 1)))
2704 type = ADDRESS_REG_UXTW;
2705 index = XEXP (XEXP (x, 0), 0);
2706 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2710 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711 else if ((GET_CODE (x) == SIGN_EXTRACT
2712 || GET_CODE (x) == ZERO_EXTRACT)
2713 && GET_MODE (x) == DImode
2714 && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2718 type = (GET_CODE (x) == SIGN_EXTRACT)
2719 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720 index = XEXP (XEXP (x, 0), 0);
2721 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722 if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 || INTVAL (XEXP (x, 2)) != 0)
2726 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727 (const_int 0xffffffff<<shift)) */
2728 else if (GET_CODE (x) == AND
2729 && GET_MODE (x) == DImode
2730 && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 && CONST_INT_P (XEXP (x, 1)))
2735 type = ADDRESS_REG_UXTW;
2736 index = XEXP (XEXP (x, 0), 0);
2737 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2741 /* (mult:P (reg:P) (const_int scale)) */
2742 else if (GET_CODE (x) == MULT
2743 && GET_MODE (x) == Pmode
2744 && GET_MODE (XEXP (x, 0)) == Pmode
2745 && CONST_INT_P (XEXP (x, 1)))
2747 type = ADDRESS_REG_REG;
2748 index = XEXP (x, 0);
2749 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2751 /* (ashift:P (reg:P) (const_int shift)) */
2752 else if (GET_CODE (x) == ASHIFT
2753 && GET_MODE (x) == Pmode
2754 && GET_MODE (XEXP (x, 0)) == Pmode
2755 && CONST_INT_P (XEXP (x, 1)))
2757 type = ADDRESS_REG_REG;
2758 index = XEXP (x, 0);
2759 shift = INTVAL (XEXP (x, 1));
2764 if (GET_CODE (index) == SUBREG)
2765 index = SUBREG_REG (index);
2768 (shift > 0 && shift <= 3
2769 && (1 << shift) == GET_MODE_SIZE (mode)))
2771 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2774 info->offset = index;
2775 info->shift = shift;
2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2785 return (offset >= -64 * GET_MODE_SIZE (mode)
2786 && offset < 64 * GET_MODE_SIZE (mode)
2787 && offset % GET_MODE_SIZE (mode) == 0);
2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 HOST_WIDE_INT offset)
2794 return offset >= -256 && offset < 256;
2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2801 && offset < 4096 * GET_MODE_SIZE (mode)
2802 && offset % GET_MODE_SIZE (mode) == 0);
2805 /* Return true if X is a valid address for machine mode MODE. If it is,
2806 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2807 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2810 aarch64_classify_address (struct aarch64_address_info *info,
2811 rtx x, enum machine_mode mode,
2812 RTX_CODE outer_code, bool strict_p)
2814 enum rtx_code code = GET_CODE (x);
2816 bool allow_reg_index_p =
2817 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2819 /* Don't support anything other than POST_INC or REG addressing for
2821 if (aarch64_vector_mode_p (mode)
2822 && (code != POST_INC && code != REG))
2829 info->type = ADDRESS_REG_IMM;
2831 info->offset = const0_rtx;
2832 return aarch64_base_register_rtx_p (x, strict_p);
2837 if (GET_MODE_SIZE (mode) != 0
2838 && CONST_INT_P (op1)
2839 && aarch64_base_register_rtx_p (op0, strict_p))
2841 HOST_WIDE_INT offset = INTVAL (op1);
2843 info->type = ADDRESS_REG_IMM;
2847 /* TImode and TFmode values are allowed in both pairs of X
2848 registers and individual Q registers. The available
2850 X,X: 7-bit signed scaled offset
2851 Q: 9-bit signed offset
2852 We conservatively require an offset representable in either mode.
2854 if (mode == TImode || mode == TFmode)
2855 return (offset_7bit_signed_scaled_p (mode, offset)
2856 && offset_9bit_signed_unscaled_p (mode, offset));
2858 if (outer_code == PARALLEL)
2859 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 && offset_7bit_signed_scaled_p (mode, offset));
2862 return (offset_9bit_signed_unscaled_p (mode, offset)
2863 || offset_12bit_unsigned_scaled_p (mode, offset));
2866 if (allow_reg_index_p)
2868 /* Look for base + (scaled/extended) index register. */
2869 if (aarch64_base_register_rtx_p (op0, strict_p)
2870 && aarch64_classify_index (info, op1, mode, strict_p))
2875 if (aarch64_base_register_rtx_p (op1, strict_p)
2876 && aarch64_classify_index (info, op0, mode, strict_p))
2889 info->type = ADDRESS_REG_WB;
2890 info->base = XEXP (x, 0);
2891 info->offset = NULL_RTX;
2892 return aarch64_base_register_rtx_p (info->base, strict_p);
2896 info->type = ADDRESS_REG_WB;
2897 info->base = XEXP (x, 0);
2898 if (GET_CODE (XEXP (x, 1)) == PLUS
2899 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 && aarch64_base_register_rtx_p (info->base, strict_p))
2903 HOST_WIDE_INT offset;
2904 info->offset = XEXP (XEXP (x, 1), 1);
2905 offset = INTVAL (info->offset);
2907 /* TImode and TFmode values are allowed in both pairs of X
2908 registers and individual Q registers. The available
2910 X,X: 7-bit signed scaled offset
2911 Q: 9-bit signed offset
2912 We conservatively require an offset representable in either mode.
2914 if (mode == TImode || mode == TFmode)
2915 return (offset_7bit_signed_scaled_p (mode, offset)
2916 && offset_9bit_signed_unscaled_p (mode, offset));
2918 if (outer_code == PARALLEL)
2919 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 && offset_7bit_signed_scaled_p (mode, offset));
2922 return offset_9bit_signed_unscaled_p (mode, offset);
2929 /* load literal: pc-relative constant pool entry. */
2930 info->type = ADDRESS_SYMBOLIC;
2931 if (outer_code != PARALLEL)
2935 split_const (x, &sym, &addend);
2936 return (GET_CODE (sym) == LABEL_REF
2937 || (GET_CODE (sym) == SYMBOL_REF
2938 && CONSTANT_POOL_ADDRESS_P (sym)));
2943 info->type = ADDRESS_LO_SUM;
2944 info->base = XEXP (x, 0);
2945 info->offset = XEXP (x, 1);
2946 if (allow_reg_index_p
2947 && aarch64_base_register_rtx_p (info->base, strict_p))
2950 split_const (info->offset, &sym, &offs);
2951 if (GET_CODE (sym) == SYMBOL_REF
2952 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2953 == SYMBOL_SMALL_ABSOLUTE))
2955 /* The symbol and offset must be aligned to the access size. */
2957 unsigned int ref_size;
2959 if (CONSTANT_POOL_ADDRESS_P (sym))
2960 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2961 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2963 tree exp = SYMBOL_REF_DECL (sym);
2964 align = TYPE_ALIGN (TREE_TYPE (exp));
2965 align = CONSTANT_ALIGNMENT (exp, align);
2967 else if (SYMBOL_REF_DECL (sym))
2968 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970 align = BITS_PER_UNIT;
2972 ref_size = GET_MODE_SIZE (mode);
2974 ref_size = GET_MODE_SIZE (DImode);
2976 return ((INTVAL (offs) & (ref_size - 1)) == 0
2977 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2988 aarch64_symbolic_address_p (rtx x)
2992 split_const (x, &x, &offset);
2993 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2996 /* Classify the base of symbolic expression X, given that X appears in
2998 static enum aarch64_symbol_type
2999 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3002 split_const (x, &x, &offset);
3003 return aarch64_classify_symbol (x, context);
3007 /* Return TRUE if X is a legitimate address for accessing memory in
3010 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3012 struct aarch64_address_info addr;
3014 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3017 /* Return TRUE if X is a legitimate address for accessing memory in
3018 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3021 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3022 RTX_CODE outer_code, bool strict_p)
3024 struct aarch64_address_info addr;
3026 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3029 /* Return TRUE if rtx X is immediate constant 0.0 */
3031 aarch64_float_const_zero_rtx_p (rtx x)
3035 if (GET_MODE (x) == VOIDmode)
3038 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3039 if (REAL_VALUE_MINUS_ZERO (r))
3040 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3041 return REAL_VALUES_EQUAL (r, dconst0);
3044 /* Return the fixed registers used for condition codes. */
3047 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3050 *p2 = INVALID_REGNUM;
3055 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3057 /* All floating point compares return CCFP if it is an equality
3058 comparison, and CCFPE otherwise. */
3059 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3086 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088 && (code == EQ || code == NE || code == LT || code == GE)
3089 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3092 /* A compare with a shifted operand. Because of canonicalization,
3093 the comparison will have to be swapped when we emit the assembly
3095 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3096 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3097 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3098 || GET_CODE (x) == LSHIFTRT
3099 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3102 /* A compare of a mode narrower than SI mode against zero can be done
3103 by extending the value in the comparison. */
3104 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3106 /* Only use sign-extension if we really need it. */
3107 return ((code == GT || code == GE || code == LE || code == LT)
3108 ? CC_SESWPmode : CC_ZESWPmode);
3110 /* For everything else, return CCmode. */
3115 aarch64_get_condition_code (rtx x)
3117 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3118 enum rtx_code comp_code = GET_CODE (x);
3120 if (GET_MODE_CLASS (mode) != MODE_CC)
3121 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3129 case GE: return AARCH64_GE;
3130 case GT: return AARCH64_GT;
3131 case LE: return AARCH64_LS;
3132 case LT: return AARCH64_MI;
3133 case NE: return AARCH64_NE;
3134 case EQ: return AARCH64_EQ;
3135 case ORDERED: return AARCH64_VC;
3136 case UNORDERED: return AARCH64_VS;
3137 case UNLT: return AARCH64_LT;
3138 case UNLE: return AARCH64_LE;
3139 case UNGT: return AARCH64_HI;
3140 case UNGE: return AARCH64_PL;
3141 default: gcc_unreachable ();
3148 case NE: return AARCH64_NE;
3149 case EQ: return AARCH64_EQ;
3150 case GE: return AARCH64_GE;
3151 case GT: return AARCH64_GT;
3152 case LE: return AARCH64_LE;
3153 case LT: return AARCH64_LT;
3154 case GEU: return AARCH64_CS;
3155 case GTU: return AARCH64_HI;
3156 case LEU: return AARCH64_LS;
3157 case LTU: return AARCH64_CC;
3158 default: gcc_unreachable ();
3167 case NE: return AARCH64_NE;
3168 case EQ: return AARCH64_EQ;
3169 case GE: return AARCH64_LE;
3170 case GT: return AARCH64_LT;
3171 case LE: return AARCH64_GE;
3172 case LT: return AARCH64_GT;
3173 case GEU: return AARCH64_LS;
3174 case GTU: return AARCH64_CC;
3175 case LEU: return AARCH64_CS;
3176 case LTU: return AARCH64_HI;
3177 default: gcc_unreachable ();
3184 case NE: return AARCH64_NE;
3185 case EQ: return AARCH64_EQ;
3186 case GE: return AARCH64_PL;
3187 case LT: return AARCH64_MI;
3188 default: gcc_unreachable ();
3199 bit_count (unsigned HOST_WIDE_INT value)
3213 aarch64_print_operand (FILE *f, rtx x, char code)
3218 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3222 if (GET_CODE (x) != CONST_INT
3223 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3225 output_operand_lossage ("invalid operand for '%%%c'", code);
3241 output_operand_lossage ("invalid operand for '%%%c'", code);
3251 /* Print N such that 2^N == X. */
3252 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3254 output_operand_lossage ("invalid operand for '%%%c'", code);
3258 asm_fprintf (f, "%d", n);
3263 /* Print the number of non-zero bits in X (a const_int). */
3264 if (GET_CODE (x) != CONST_INT)
3266 output_operand_lossage ("invalid operand for '%%%c'", code);
3270 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3274 /* Print the higher numbered register of a pair (TImode) of regs. */
3275 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3277 output_operand_lossage ("invalid operand for '%%%c'", code);
3281 asm_fprintf (f, "%r", REGNO (x) + 1);
3285 /* Print the least significant register of a pair (TImode) of regs. */
3286 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3288 output_operand_lossage ("invalid operand for '%%%c'", code);
3291 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
3295 /* Print the most significant register of a pair (TImode) of regs. */
3296 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3298 output_operand_lossage ("invalid operand for '%%%c'", code);
3301 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
3305 /* Print a condition (eq, ne, etc). */
3307 /* CONST_TRUE_RTX means always -- that's the default. */
3308 if (x == const_true_rtx)
3311 if (!COMPARISON_P (x))
3313 output_operand_lossage ("invalid operand for '%%%c'", code);
3317 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3321 /* Print the inverse of a condition (eq <-> ne, etc). */
3323 /* CONST_TRUE_RTX means never -- that's the default. */
3324 if (x == const_true_rtx)
3330 if (!COMPARISON_P (x))
3332 output_operand_lossage ("invalid operand for '%%%c'", code);
3336 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3337 (aarch64_get_condition_code (x))], f);
3345 /* Print a scalar FP/SIMD register name. */
3346 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3351 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3358 /* Print the first FP/SIMD register name in a list. */
3359 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3361 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3364 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3365 REGNO (x) - V0_REGNUM + (code - 'S'));
3370 /* Print a general register name or the zero register (32-bit or
3373 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3375 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3379 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3381 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3382 REGNO (x) - R0_REGNUM);
3386 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3388 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3395 /* Print a normal operand, if it's a general register, then we
3399 output_operand_lossage ("missing operand");
3403 switch (GET_CODE (x))
3406 asm_fprintf (f, "%r", REGNO (x));
3410 aarch64_memory_reference_mode = GET_MODE (x);
3411 output_address (XEXP (x, 0));
3416 output_addr_const (asm_out_file, x);
3420 asm_fprintf (f, "%wd", INTVAL (x));
3424 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3426 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3428 HOST_WIDE_INT_MAX));
3429 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3431 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3440 /* CONST_DOUBLE can represent a double-width integer.
3441 In this case, the mode of x is VOIDmode. */
3442 if (GET_MODE (x) == VOIDmode)
3444 else if (aarch64_float_const_zero_rtx_p (x))
3449 else if (aarch64_float_const_representable_p (x))
3452 char float_buf[buf_size] = {'\0'};
3454 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455 real_to_decimal_for_mode (float_buf, &r,
3458 asm_fprintf (asm_out_file, "%s", float_buf);
3462 output_operand_lossage ("invalid constant");
3465 output_operand_lossage ("invalid operand");
3471 if (GET_CODE (x) == HIGH)
3474 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3476 case SYMBOL_SMALL_GOT:
3477 asm_fprintf (asm_out_file, ":got:");
3480 case SYMBOL_SMALL_TLSGD:
3481 asm_fprintf (asm_out_file, ":tlsgd:");
3484 case SYMBOL_SMALL_TLSDESC:
3485 asm_fprintf (asm_out_file, ":tlsdesc:");
3488 case SYMBOL_SMALL_GOTTPREL:
3489 asm_fprintf (asm_out_file, ":gottprel:");
3492 case SYMBOL_SMALL_TPREL:
3493 asm_fprintf (asm_out_file, ":tprel:");
3499 output_addr_const (asm_out_file, x);
3503 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3505 case SYMBOL_SMALL_GOT:
3506 asm_fprintf (asm_out_file, ":lo12:");
3509 case SYMBOL_SMALL_TLSGD:
3510 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3513 case SYMBOL_SMALL_TLSDESC:
3514 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3517 case SYMBOL_SMALL_GOTTPREL:
3518 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3521 case SYMBOL_SMALL_TPREL:
3522 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3528 output_addr_const (asm_out_file, x);
3533 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3535 case SYMBOL_SMALL_TPREL:
3536 asm_fprintf (asm_out_file, ":tprel_hi12:");
3541 output_addr_const (asm_out_file, x);
3545 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3551 aarch64_print_operand_address (FILE *f, rtx x)
3553 struct aarch64_address_info addr;
3555 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3559 case ADDRESS_REG_IMM:
3560 if (addr.offset == const0_rtx)
3561 asm_fprintf (f, "[%r]", REGNO (addr.base));
3563 asm_fprintf (f, "[%r,%wd]", REGNO (addr.base),
3564 INTVAL (addr.offset));
3567 case ADDRESS_REG_REG:
3568 if (addr.shift == 0)
3569 asm_fprintf (f, "[%r,%r]", REGNO (addr.base),
3570 REGNO (addr.offset));
3572 asm_fprintf (f, "[%r,%r,lsl %u]", REGNO (addr.base),
3573 REGNO (addr.offset), addr.shift);
3576 case ADDRESS_REG_UXTW:
3577 if (addr.shift == 0)
3578 asm_fprintf (f, "[%r,w%d,uxtw]", REGNO (addr.base),
3579 REGNO (addr.offset) - R0_REGNUM);
3581 asm_fprintf (f, "[%r,w%d,uxtw %u]", REGNO (addr.base),
3582 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3585 case ADDRESS_REG_SXTW:
3586 if (addr.shift == 0)
3587 asm_fprintf (f, "[%r,w%d,sxtw]", REGNO (addr.base),
3588 REGNO (addr.offset) - R0_REGNUM);
3590 asm_fprintf (f, "[%r,w%d,sxtw %u]", REGNO (addr.base),
3591 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3594 case ADDRESS_REG_WB:
3595 switch (GET_CODE (x))
3598 asm_fprintf (f, "[%r,%d]!", REGNO (addr.base),
3599 GET_MODE_SIZE (aarch64_memory_reference_mode));
3602 asm_fprintf (f, "[%r],%d", REGNO (addr.base),
3603 GET_MODE_SIZE (aarch64_memory_reference_mode));
3606 asm_fprintf (f, "[%r,-%d]!", REGNO (addr.base),
3607 GET_MODE_SIZE (aarch64_memory_reference_mode));
3610 asm_fprintf (f, "[%r],-%d", REGNO (addr.base),
3611 GET_MODE_SIZE (aarch64_memory_reference_mode));
3614 asm_fprintf (f, "[%r,%wd]!", REGNO (addr.base),
3615 INTVAL (addr.offset));
3618 asm_fprintf (f, "[%r],%wd", REGNO (addr.base),
3619 INTVAL (addr.offset));
3626 case ADDRESS_LO_SUM:
3627 asm_fprintf (f, "[%r,#:lo12:", REGNO (addr.base));
3628 output_addr_const (f, addr.offset);
3629 asm_fprintf (f, "]");
3632 case ADDRESS_SYMBOLIC:
3636 output_addr_const (f, x);
3640 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3641 int labelno ATTRIBUTE_UNUSED)
3643 sorry ("function profiling");
3647 aarch64_label_mentioned_p (rtx x)
3652 if (GET_CODE (x) == LABEL_REF)
3655 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3656 referencing instruction, but they are constant offsets, not
3658 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3661 fmt = GET_RTX_FORMAT (GET_CODE (x));
3662 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3668 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3669 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3672 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3679 /* Implement REGNO_REG_CLASS. */
3682 aarch64_regno_regclass (unsigned regno)
3684 if (GP_REGNUM_P (regno))
3687 if (regno == SP_REGNUM)
3690 if (regno == FRAME_POINTER_REGNUM
3691 || regno == ARG_POINTER_REGNUM)
3694 if (FP_REGNUM_P (regno))
3695 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3700 /* Try a machine-dependent way of reloading an illegitimate address
3701 operand. If we find one, push the reload and return the new rtx. */
3704 aarch64_legitimize_reload_address (rtx *x_p,
3705 enum machine_mode mode,
3706 int opnum, int type,
3707 int ind_levels ATTRIBUTE_UNUSED)
3711 /* Do not allow mem (plus (reg, const)) if vector mode. */
3712 if (aarch64_vector_mode_p (mode)
3713 && GET_CODE (x) == PLUS
3714 && REG_P (XEXP (x, 0))
3715 && CONST_INT_P (XEXP (x, 1)))
3719 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3720 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3721 opnum, (enum reload_type) type);
3725 /* We must recognize output that we have already generated ourselves. */
3726 if (GET_CODE (x) == PLUS
3727 && GET_CODE (XEXP (x, 0)) == PLUS
3728 && REG_P (XEXP (XEXP (x, 0), 0))
3729 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3730 && CONST_INT_P (XEXP (x, 1)))
3732 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3733 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3734 opnum, (enum reload_type) type);
3738 /* We wish to handle large displacements off a base register by splitting
3739 the addend across an add and the mem insn. This can cut the number of
3740 extra insns needed from 3 to 1. It is only useful for load/store of a
3741 single register with 12 bit offset field. */
3742 if (GET_CODE (x) == PLUS
3743 && REG_P (XEXP (x, 0))
3744 && CONST_INT_P (XEXP (x, 1))
3745 && HARD_REGISTER_P (XEXP (x, 0))
3748 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3750 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3751 HOST_WIDE_INT low = val & 0xfff;
3752 HOST_WIDE_INT high = val - low;
3756 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3757 BLKmode alignment. */
3758 if (GET_MODE_SIZE (mode) == 0)
3761 offs = low % GET_MODE_SIZE (mode);
3763 /* Align misaligned offset by adjusting high part to compensate. */
3766 if (aarch64_uimm12_shift (high + offs))
3775 offs = GET_MODE_SIZE (mode) - offs;
3777 high = high + (low & 0x1000) - offs;
3782 /* Check for overflow. */
3783 if (high + low != val)
3786 cst = GEN_INT (high);
3787 if (!aarch64_uimm12_shift (high))
3788 cst = force_const_mem (Pmode, cst);
3790 /* Reload high part into base reg, leaving the low part
3791 in the mem instruction. */
3792 x = gen_rtx_PLUS (Pmode,
3793 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3796 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3797 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3798 opnum, (enum reload_type) type);
3807 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3809 enum machine_mode mode,
3810 secondary_reload_info *sri)
3812 /* Address expressions of the form PLUS (SP, large_offset) need two
3813 scratch registers, one for the constant, and one for holding a
3814 copy of SP, since SP cannot be used on the RHS of an add-reg
3817 && GET_CODE (x) == PLUS
3818 && XEXP (x, 0) == stack_pointer_rtx
3819 && CONST_INT_P (XEXP (x, 1))
3820 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3822 sri->icode = CODE_FOR_reload_sp_immediate;
3826 /* Without the TARGET_SIMD instructions we cannot move a Q register
3827 to a Q register directly. We need a scratch. */
3828 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3829 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3830 && reg_class_subset_p (rclass, FP_REGS))
3833 sri->icode = CODE_FOR_aarch64_reload_movtf;
3834 else if (mode == TImode)
3835 sri->icode = CODE_FOR_aarch64_reload_movti;
3839 /* A TFmode or TImode memory access should be handled via an FP_REGS
3840 because AArch64 has richer addressing modes for LDR/STR instructions
3841 than LDP/STP instructions. */
3842 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3843 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3846 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3853 aarch64_can_eliminate (const int from, const int to)
3855 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3856 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3858 if (frame_pointer_needed)
3860 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3862 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3864 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3865 && !cfun->calls_alloca)
3867 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3873 /* If we decided that we didn't need a frame pointer but then used
3874 LR in the function, then we do need a frame pointer after all, so
3875 prevent this elimination to ensure a frame pointer is used. */
3877 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3878 && df_regs_ever_live_p (LR_REGNUM))
3885 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3887 HOST_WIDE_INT frame_size;
3888 HOST_WIDE_INT offset;
3890 aarch64_layout_frame ();
3891 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3892 + crtl->outgoing_args_size
3893 + cfun->machine->saved_varargs_size);
3895 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3896 offset = frame_size;
3898 if (to == HARD_FRAME_POINTER_REGNUM)
3900 if (from == ARG_POINTER_REGNUM)
3901 return offset - crtl->outgoing_args_size;
3903 if (from == FRAME_POINTER_REGNUM)
3904 return cfun->machine->frame.saved_regs_size;
3907 if (to == STACK_POINTER_REGNUM)
3909 if (from == FRAME_POINTER_REGNUM)
3911 HOST_WIDE_INT elim = crtl->outgoing_args_size
3912 + cfun->machine->frame.saved_regs_size
3913 - cfun->machine->frame.fp_lr_offset;
3914 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3923 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
3927 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3931 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3936 aarch64_asm_trampoline_template (FILE *f)
3938 asm_fprintf (f, "\tldr\t%r, .+16\n", IP1_REGNUM);
3939 asm_fprintf (f, "\tldr\t%r, .+20\n", STATIC_CHAIN_REGNUM);
3940 asm_fprintf (f, "\tbr\t%r\n", IP1_REGNUM);
3941 assemble_aligned_integer (4, const0_rtx);
3942 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3943 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3947 aarch64_trampoline_size (void)
3949 return 32; /* 3 insns + padding + 2 dwords. */
3953 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3955 rtx fnaddr, mem, a_tramp;
3957 /* Don't need to copy the trailing D-words, we fill those in below. */
3958 emit_block_move (m_tramp, assemble_trampoline_template (),
3959 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3960 mem = adjust_address (m_tramp, DImode, 16);
3961 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962 emit_move_insn (mem, fnaddr);
3964 mem = adjust_address (m_tramp, DImode, 24);
3965 emit_move_insn (mem, chain_value);
3967 /* XXX We should really define a "clear_cache" pattern and use
3968 gen_clear_cache(). */
3969 a_tramp = XEXP (m_tramp, 0);
3970 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3971 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3972 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3975 static unsigned char
3976 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3987 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3988 (GET_MODE_SIZE (mode) + 7) / 8;
4002 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4004 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4005 ? GENERAL_REGS : regclass);
4009 aarch64_asm_output_labelref (FILE* f, const char *name)
4011 asm_fprintf (f, "%U%s", name);
4015 aarch64_elf_asm_constructor (rtx symbol, int priority)
4017 if (priority == DEFAULT_INIT_PRIORITY)
4018 default_ctor_section_asm_out_constructor (symbol, priority);
4023 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4024 s = get_section (buf, SECTION_WRITE, NULL);
4025 switch_to_section (s);
4026 assemble_align (POINTER_SIZE);
4027 fputs ("\t.dword\t", asm_out_file);
4028 output_addr_const (asm_out_file, symbol);
4029 fputc ('\n', asm_out_file);
4034 aarch64_elf_asm_destructor (rtx symbol, int priority)
4036 if (priority == DEFAULT_INIT_PRIORITY)
4037 default_dtor_section_asm_out_destructor (symbol, priority);
4042 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4043 s = get_section (buf, SECTION_WRITE, NULL);
4044 switch_to_section (s);
4045 assemble_align (POINTER_SIZE);
4046 fputs ("\t.dword\t", asm_out_file);
4047 output_addr_const (asm_out_file, symbol);
4048 fputc ('\n', asm_out_file);
4053 aarch64_output_casesi (rtx *operands)
4057 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4059 static const char *const patterns[4][2] =
4062 "ldrb\t%w3, [%0,%w1,uxtw]",
4063 "add\t%3, %4, %w3, sxtb #2"
4066 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4067 "add\t%3, %4, %w3, sxth #2"
4070 "ldr\t%w3, [%0,%w1,uxtw #2]",
4071 "add\t%3, %4, %w3, sxtw #2"
4073 /* We assume that DImode is only generated when not optimizing and
4074 that we don't really need 64-bit address offsets. That would
4075 imply an object file with 8GB of code in a single function! */
4077 "ldr\t%w3, [%0,%w1,uxtw #2]",
4078 "add\t%3, %4, %w3, sxtw #2"
4082 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4084 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4086 gcc_assert (index >= 0 && index <= 3);
4088 /* Need to implement table size reduction, by chaning the code below. */
4089 output_asm_insn (patterns[index][0], operands);
4090 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4091 snprintf (buf, sizeof (buf),
4092 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4093 output_asm_insn (buf, operands);
4094 output_asm_insn (patterns[index][1], operands);
4095 output_asm_insn ("br\t%3", operands);
4096 assemble_label (asm_out_file, label);
4101 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4102 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4106 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4108 if (shift >= 0 && shift <= 3)
4111 for (size = 8; size <= 32; size *= 2)
4113 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4114 if (mask == bits << shift)
4122 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4123 const_rtx x ATTRIBUTE_UNUSED)
4125 /* We can't use blocks for constants when we're using a per-function
4131 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4132 rtx x ATTRIBUTE_UNUSED,
4133 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4135 /* Force all constant pool entries into the current function section. */
4136 return function_section (current_function_decl);
4142 /* Helper function for rtx cost calculation. Strip a shift expression
4143 from X. Returns the inner operand if successful, or the original
4144 expression on failure. */
4146 aarch64_strip_shift (rtx x)
4150 if ((GET_CODE (op) == ASHIFT
4151 || GET_CODE (op) == ASHIFTRT
4152 || GET_CODE (op) == LSHIFTRT)
4153 && CONST_INT_P (XEXP (op, 1)))
4154 return XEXP (op, 0);
4156 if (GET_CODE (op) == MULT
4157 && CONST_INT_P (XEXP (op, 1))
4158 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4159 return XEXP (op, 0);
4164 /* Helper function for rtx cost calculation. Strip a shift or extend
4165 expression from X. Returns the inner operand if successful, or the
4166 original expression on failure. We deal with a number of possible
4167 canonicalization variations here. */
4169 aarch64_strip_shift_or_extend (rtx x)
4173 /* Zero and sign extraction of a widened value. */
4174 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4175 && XEXP (op, 2) == const0_rtx
4176 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4178 return XEXP (XEXP (op, 0), 0);
4180 /* It can also be represented (for zero-extend) as an AND with an
4182 if (GET_CODE (op) == AND
4183 && GET_CODE (XEXP (op, 0)) == MULT
4184 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4185 && CONST_INT_P (XEXP (op, 1))
4186 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4187 INTVAL (XEXP (op, 1))) != 0)
4188 return XEXP (XEXP (op, 0), 0);
4190 /* Now handle extended register, as this may also have an optional
4191 left shift by 1..4. */
4192 if (GET_CODE (op) == ASHIFT
4193 && CONST_INT_P (XEXP (op, 1))
4194 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4197 if (GET_CODE (op) == ZERO_EXTEND
4198 || GET_CODE (op) == SIGN_EXTEND)
4204 return aarch64_strip_shift (x);
4207 /* Calculate the cost of calculating X, storing it in *COST. Result
4208 is true if the total cost of the operation has now been calculated. */
4210 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4211 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4214 const struct cpu_rtx_cost_table *extra_cost
4215 = aarch64_tune_params->insn_extra_cost;
4223 switch (GET_CODE (op0))
4227 *cost += extra_cost->memory_store;
4229 if (op1 != const0_rtx)
4230 *cost += rtx_cost (op1, SET, 1, speed);
4234 if (! REG_P (SUBREG_REG (op0)))
4235 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4238 /* Cost is just the cost of the RHS of the set. */
4239 *cost += rtx_cost (op1, SET, 1, true);
4242 case ZERO_EXTRACT: /* Bit-field insertion. */
4244 /* Strip any redundant widening of the RHS to meet the width of
4246 if (GET_CODE (op1) == SUBREG)
4247 op1 = SUBREG_REG (op1);
4248 if ((GET_CODE (op1) == ZERO_EXTEND
4249 || GET_CODE (op1) == SIGN_EXTEND)
4250 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4251 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4252 >= INTVAL (XEXP (op0, 1))))
4253 op1 = XEXP (op1, 0);
4254 *cost += rtx_cost (op1, SET, 1, speed);
4264 *cost += extra_cost->memory_load;
4269 op0 = CONST0_RTX (GET_MODE (x));
4277 if (op1 == const0_rtx
4278 && GET_CODE (op0) == AND)
4284 /* Comparisons can work if the order is swapped.
4285 Canonicalization puts the more complex operation first, but
4286 we want it in op1. */
4288 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4300 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4301 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4302 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4304 if (op0 != const0_rtx)
4305 *cost += rtx_cost (op0, MINUS, 0, speed);
4307 if (CONST_INT_P (op1))
4309 if (!aarch64_uimm12_shift (INTVAL (op1)))
4310 *cost += rtx_cost (op1, MINUS, 1, speed);
4314 op1 = aarch64_strip_shift_or_extend (op1);
4315 *cost += rtx_cost (op1, MINUS, 1, speed);
4326 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4328 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4330 *cost += rtx_cost (op0, PLUS, 0, speed);
4334 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4337 && GET_CODE (op0) == MULT)
4339 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4340 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4341 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4342 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4344 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4346 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4348 + rtx_cost (op1, PLUS, 1, speed));
4350 *cost += extra_cost->int_multiply_extend_add;
4353 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4354 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4355 + rtx_cost (op1, PLUS, 1, speed));
4358 *cost += extra_cost->int_multiply_add;
4361 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4362 + rtx_cost (op1, PLUS, 1, speed));
4376 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4378 if (CONST_INT_P (op1)
4379 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4381 *cost += rtx_cost (op0, AND, 0, speed);
4385 if (GET_CODE (op0) == NOT)
4386 op0 = XEXP (op0, 0);
4387 op0 = aarch64_strip_shift (op0);
4388 *cost += (rtx_cost (op0, AND, 0, speed)
4389 + rtx_cost (op1, AND, 1, speed));
4396 if ((GET_MODE (x) == DImode
4397 && GET_MODE (XEXP (x, 0)) == SImode)
4398 || GET_CODE (XEXP (x, 0)) == MEM)
4400 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4406 if (GET_CODE (XEXP (x, 0)) == MEM)
4408 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4414 if (!CONST_INT_P (XEXP (x, 1)))
4415 *cost += COSTS_N_INSNS (2);
4422 /* Shifting by a register often takes an extra cycle. */
4423 if (speed && !CONST_INT_P (XEXP (x, 1)))
4424 *cost += extra_cost->register_shift;
4426 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4430 if (!CONSTANT_P (XEXP (x, 0)))
4431 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4435 if (!CONSTANT_P (XEXP (x, 1)))
4436 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4437 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4442 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4449 *cost = COSTS_N_INSNS (1);
4450 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4452 if (CONST_INT_P (op1)
4453 && exact_log2 (INTVAL (op1)) > 0)
4455 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4459 if ((GET_CODE (op0) == ZERO_EXTEND
4460 && GET_CODE (op1) == ZERO_EXTEND)
4461 || (GET_CODE (op0) == SIGN_EXTEND
4462 && GET_CODE (op1) == SIGN_EXTEND))
4464 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4465 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4467 *cost += extra_cost->int_multiply_extend;
4472 *cost += extra_cost->int_multiply;
4476 if (GET_MODE (x) == DFmode)
4477 *cost += extra_cost->double_multiply;
4478 else if (GET_MODE (x) == SFmode)
4479 *cost += extra_cost->float_multiply;
4482 return false; /* All arguments need to be in registers. */
4486 *cost = COSTS_N_INSNS (2);
4489 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4490 *cost += (extra_cost->int_multiply_add
4491 + extra_cost->int_divide);
4492 else if (GET_MODE (x) == DFmode)
4493 *cost += (extra_cost->double_multiply
4494 + extra_cost->double_divide);
4495 else if (GET_MODE (x) == SFmode)
4496 *cost += (extra_cost->float_multiply
4497 + extra_cost->float_divide);
4499 return false; /* All arguments need to be in registers. */
4503 *cost = COSTS_N_INSNS (1);
4506 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507 *cost += extra_cost->int_divide;
4508 else if (GET_MODE (x) == DFmode)
4509 *cost += extra_cost->double_divide;
4510 else if (GET_MODE (x) == SFmode)
4511 *cost += extra_cost->float_divide;
4513 return false; /* All arguments need to be in registers. */
4522 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4523 enum machine_mode mode ATTRIBUTE_UNUSED,
4524 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4526 enum rtx_code c = GET_CODE (x);
4527 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4529 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4530 return addr_cost->pre_modify;
4532 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4533 return addr_cost->post_modify;
4537 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4538 return addr_cost->imm_offset;
4539 else if (GET_CODE (XEXP (x, 0)) == MULT
4540 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4541 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4542 return addr_cost->register_extend;
4544 return addr_cost->register_offset;
4546 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4547 return addr_cost->imm_offset;
4553 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4554 reg_class_t from, reg_class_t to)
4556 const struct cpu_regmove_cost *regmove_cost
4557 = aarch64_tune_params->regmove_cost;
4559 if (from == GENERAL_REGS && to == GENERAL_REGS)
4560 return regmove_cost->GP2GP;
4561 else if (from == GENERAL_REGS)
4562 return regmove_cost->GP2FP;
4563 else if (to == GENERAL_REGS)
4564 return regmove_cost->FP2GP;
4566 /* When AdvSIMD instructions are disabled it is not possible to move
4567 a 128-bit value directly between Q registers. This is handled in
4568 secondary reload. A general register is used as a scratch to move
4569 the upper DI value and the lower DI value is moved directly,
4570 hence the cost is the sum of three moves. */
4572 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4573 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4575 return regmove_cost->FP2FP;
4579 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4580 reg_class_t rclass ATTRIBUTE_UNUSED,
4581 bool in ATTRIBUTE_UNUSED)
4583 return aarch64_tune_params->memmov_cost;
4586 static void initialize_aarch64_code_model (void);
4588 /* Parse the architecture extension string. */
4591 aarch64_parse_extension (char *str)
4593 /* The extension string is parsed left to right. */
4594 const struct aarch64_option_extension *opt = NULL;
4596 /* Flag to say whether we are adding or removing an extension. */
4597 int adding_ext = -1;
4599 while (str != NULL && *str != 0)
4605 ext = strchr (str, '+');
4612 if (len >= 2 && strncmp (str, "no", 2) == 0)
4623 error ("missing feature modifier after %qs", "+no");
4627 /* Scan over the extensions table trying to find an exact match. */
4628 for (opt = all_extensions; opt->name != NULL; opt++)
4630 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4632 /* Add or remove the extension. */
4634 aarch64_isa_flags |= opt->flags_on;
4636 aarch64_isa_flags &= ~(opt->flags_off);
4641 if (opt->name == NULL)
4643 /* Extension not found in list. */
4644 error ("unknown feature modifier %qs", str);
4654 /* Parse the ARCH string. */
4657 aarch64_parse_arch (void)
4660 const struct processor *arch;
4661 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4664 strcpy (str, aarch64_arch_string);
4666 ext = strchr (str, '+');
4675 error ("missing arch name in -march=%qs", str);
4679 /* Loop through the list of supported ARCHs to find a match. */
4680 for (arch = all_architectures; arch->name != NULL; arch++)
4682 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4684 selected_arch = arch;
4685 aarch64_isa_flags = selected_arch->flags;
4686 selected_cpu = &all_cores[selected_arch->core];
4690 /* ARCH string contains at least one extension. */
4691 aarch64_parse_extension (ext);
4698 /* ARCH name not found in list. */
4699 error ("unknown value %qs for -march", str);
4703 /* Parse the CPU string. */
4706 aarch64_parse_cpu (void)
4709 const struct processor *cpu;
4710 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4713 strcpy (str, aarch64_cpu_string);
4715 ext = strchr (str, '+');
4724 error ("missing cpu name in -mcpu=%qs", str);
4728 /* Loop through the list of supported CPUs to find a match. */
4729 for (cpu = all_cores; cpu->name != NULL; cpu++)
4731 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4734 aarch64_isa_flags = selected_cpu->flags;
4738 /* CPU string contains at least one extension. */
4739 aarch64_parse_extension (ext);
4746 /* CPU name not found in list. */
4747 error ("unknown value %qs for -mcpu", str);
4751 /* Parse the TUNE string. */
4754 aarch64_parse_tune (void)
4756 const struct processor *cpu;
4757 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4758 strcpy (str, aarch64_tune_string);
4760 /* Loop through the list of supported CPUs to find a match. */
4761 for (cpu = all_cores; cpu->name != NULL; cpu++)
4763 if (strcmp (cpu->name, str) == 0)
4765 selected_tune = cpu;
4770 /* CPU name not found in list. */
4771 error ("unknown value %qs for -mtune", str);
4776 /* Implement TARGET_OPTION_OVERRIDE. */
4779 aarch64_override_options (void)
4781 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4782 otherwise march remains undefined. mtune can be used with either march or
4785 if (aarch64_arch_string)
4787 aarch64_parse_arch ();
4788 aarch64_cpu_string = NULL;
4791 if (aarch64_cpu_string)
4793 aarch64_parse_cpu ();
4794 selected_arch = NULL;
4797 if (aarch64_tune_string)
4799 aarch64_parse_tune ();
4802 initialize_aarch64_code_model ();
4804 aarch64_build_bitmask_table ();
4806 /* This target defaults to strict volatile bitfields. */
4807 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4808 flag_strict_volatile_bitfields = 1;
4810 /* If the user did not specify a processor, choose the default
4811 one for them. This will be the CPU set during configuration using
4812 --with-cpu, otherwise it is "generic". */
4815 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4816 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4819 gcc_assert (selected_cpu);
4821 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4823 selected_tune = &all_cores[selected_cpu->core];
4825 aarch64_tune_flags = selected_tune->flags;
4826 aarch64_tune = selected_tune->core;
4827 aarch64_tune_params = selected_tune->tune;
4829 aarch64_override_options_after_change ();
4832 /* Implement targetm.override_options_after_change. */
4835 aarch64_override_options_after_change (void)
4837 faked_omit_frame_pointer = false;
4839 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4840 that aarch64_frame_pointer_required will be called. We need to remember
4841 whether flag_omit_frame_pointer was turned on normally or just faked. */
4843 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4845 flag_omit_frame_pointer = true;
4846 faked_omit_frame_pointer = true;
4850 static struct machine_function *
4851 aarch64_init_machine_status (void)
4853 struct machine_function *machine;
4854 machine = ggc_alloc_cleared_machine_function ();
4859 aarch64_init_expanders (void)
4861 init_machine_status = aarch64_init_machine_status;
4864 /* A checking mechanism for the implementation of the various code models. */
4866 initialize_aarch64_code_model (void)
4870 switch (aarch64_cmodel_var)
4872 case AARCH64_CMODEL_TINY:
4873 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4875 case AARCH64_CMODEL_SMALL:
4876 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4878 case AARCH64_CMODEL_LARGE:
4879 sorry ("code model %qs with -f%s", "large",
4880 flag_pic > 1 ? "PIC" : "pic");
4886 aarch64_cmodel = aarch64_cmodel_var;
4889 /* Return true if SYMBOL_REF X binds locally. */
4892 aarch64_symbol_binds_local_p (const_rtx x)
4894 return (SYMBOL_REF_DECL (x)
4895 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4896 : SYMBOL_REF_LOCAL_P (x));
4899 /* Return true if SYMBOL_REF X is thread local */
4901 aarch64_tls_symbol_p (rtx x)
4903 if (! TARGET_HAVE_TLS)
4906 if (GET_CODE (x) != SYMBOL_REF)
4909 return SYMBOL_REF_TLS_MODEL (x) != 0;
4912 /* Classify a TLS symbol into one of the TLS kinds. */
4913 enum aarch64_symbol_type
4914 aarch64_classify_tls_symbol (rtx x)
4916 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4920 case TLS_MODEL_GLOBAL_DYNAMIC:
4921 case TLS_MODEL_LOCAL_DYNAMIC:
4922 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4924 case TLS_MODEL_INITIAL_EXEC:
4925 return SYMBOL_SMALL_GOTTPREL;
4927 case TLS_MODEL_LOCAL_EXEC:
4928 return SYMBOL_SMALL_TPREL;
4930 case TLS_MODEL_EMULATED:
4931 case TLS_MODEL_NONE:
4932 return SYMBOL_FORCE_TO_MEM;
4939 /* Return the method that should be used to access SYMBOL_REF or
4940 LABEL_REF X in context CONTEXT. */
4941 enum aarch64_symbol_type
4942 aarch64_classify_symbol (rtx x,
4943 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4945 if (GET_CODE (x) == LABEL_REF)
4947 switch (aarch64_cmodel)
4949 case AARCH64_CMODEL_LARGE:
4950 return SYMBOL_FORCE_TO_MEM;
4952 case AARCH64_CMODEL_TINY_PIC:
4953 case AARCH64_CMODEL_TINY:
4954 case AARCH64_CMODEL_SMALL_PIC:
4955 case AARCH64_CMODEL_SMALL:
4956 return SYMBOL_SMALL_ABSOLUTE;
4963 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4965 switch (aarch64_cmodel)
4967 case AARCH64_CMODEL_LARGE:
4968 return SYMBOL_FORCE_TO_MEM;
4970 case AARCH64_CMODEL_TINY:
4971 case AARCH64_CMODEL_SMALL:
4973 /* This is needed to get DFmode, TImode constants to be loaded off
4974 the constant pool. Is it necessary to dump TImode values into
4975 the constant pool. We don't handle TImode constant loads properly
4976 yet and hence need to use the constant pool. */
4977 if (CONSTANT_POOL_ADDRESS_P (x))
4978 return SYMBOL_FORCE_TO_MEM;
4980 if (aarch64_tls_symbol_p (x))
4981 return aarch64_classify_tls_symbol (x);
4983 if (SYMBOL_REF_WEAK (x))
4984 return SYMBOL_FORCE_TO_MEM;
4986 return SYMBOL_SMALL_ABSOLUTE;
4988 case AARCH64_CMODEL_TINY_PIC:
4989 case AARCH64_CMODEL_SMALL_PIC:
4991 if (CONSTANT_POOL_ADDRESS_P (x))
4992 return SYMBOL_FORCE_TO_MEM;
4994 if (aarch64_tls_symbol_p (x))
4995 return aarch64_classify_tls_symbol (x);
4997 if (!aarch64_symbol_binds_local_p (x))
4998 return SYMBOL_SMALL_GOT;
5000 return SYMBOL_SMALL_ABSOLUTE;
5005 /* By default push everything into the constant pool. */
5006 return SYMBOL_FORCE_TO_MEM;
5009 /* Return true if X is a symbolic constant that can be used in context
5010 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5013 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5014 enum aarch64_symbol_type *symbol_type)
5017 split_const (x, &x, &offset);
5018 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5019 *symbol_type = aarch64_classify_symbol (x, context);
5023 /* No checking of offset at this point. */
5028 aarch64_constant_address_p (rtx x)
5030 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5034 aarch64_legitimate_pic_operand_p (rtx x)
5036 if (GET_CODE (x) == SYMBOL_REF
5037 || (GET_CODE (x) == CONST
5038 && GET_CODE (XEXP (x, 0)) == PLUS
5039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5045 /* Return true if X holds either a quarter-precision or
5046 floating-point +0.0 constant. */
5048 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5050 if (!CONST_DOUBLE_P (x))
5053 /* TODO: We could handle moving 0.0 to a TFmode register,
5054 but first we would like to refactor the movtf_aarch64
5055 to be more amicable to split moves properly and
5056 correctly gate on TARGET_SIMD. For now - reject all
5057 constants which are not to SFmode or DFmode registers. */
5058 if (!(mode == SFmode || mode == DFmode))
5061 if (aarch64_float_const_zero_rtx_p (x))
5063 return aarch64_float_const_representable_p (x);
5067 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5069 /* Do not allow vector struct mode constants. We could support
5070 0 and -1 easily, but they need support in aarch64-simd.md. */
5071 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5074 /* This could probably go away because
5075 we now decompose CONST_INTs according to expand_mov_immediate. */
5076 if ((GET_CODE (x) == CONST_VECTOR
5077 && aarch64_simd_valid_immediate (x, mode, false,
5078 NULL, NULL, NULL, NULL, NULL) != -1)
5079 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5080 return !targetm.cannot_force_const_mem (mode, x);
5082 if (GET_CODE (x) == HIGH
5083 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5086 return aarch64_constant_address_p (x);
5090 aarch64_load_tp (rtx target)
5093 || GET_MODE (target) != Pmode
5094 || !register_operand (target, Pmode))
5095 target = gen_reg_rtx (Pmode);
5097 /* Can return in any reg. */
5098 emit_insn (gen_aarch64_load_tp_hard (target));
5102 /* On AAPCS systems, this is the "struct __va_list". */
5103 static GTY(()) tree va_list_type;
5105 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5106 Return the type to use as __builtin_va_list.
5108 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5120 aarch64_build_builtin_va_list (void)
5123 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5125 /* Create the type. */
5126 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5127 /* Give it the required name. */
5128 va_list_name = build_decl (BUILTINS_LOCATION,
5130 get_identifier ("__va_list"),
5132 DECL_ARTIFICIAL (va_list_name) = 1;
5133 TYPE_NAME (va_list_type) = va_list_name;
5134 TYPE_STUB_DECL (va_list_type) = va_list_name;
5136 /* Create the fields. */
5137 f_stack = build_decl (BUILTINS_LOCATION,
5138 FIELD_DECL, get_identifier ("__stack"),
5140 f_grtop = build_decl (BUILTINS_LOCATION,
5141 FIELD_DECL, get_identifier ("__gr_top"),
5143 f_vrtop = build_decl (BUILTINS_LOCATION,
5144 FIELD_DECL, get_identifier ("__vr_top"),
5146 f_groff = build_decl (BUILTINS_LOCATION,
5147 FIELD_DECL, get_identifier ("__gr_offs"),
5149 f_vroff = build_decl (BUILTINS_LOCATION,
5150 FIELD_DECL, get_identifier ("__vr_offs"),
5153 DECL_ARTIFICIAL (f_stack) = 1;
5154 DECL_ARTIFICIAL (f_grtop) = 1;
5155 DECL_ARTIFICIAL (f_vrtop) = 1;
5156 DECL_ARTIFICIAL (f_groff) = 1;
5157 DECL_ARTIFICIAL (f_vroff) = 1;
5159 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5160 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5161 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5162 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5163 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5165 TYPE_FIELDS (va_list_type) = f_stack;
5166 DECL_CHAIN (f_stack) = f_grtop;
5167 DECL_CHAIN (f_grtop) = f_vrtop;
5168 DECL_CHAIN (f_vrtop) = f_groff;
5169 DECL_CHAIN (f_groff) = f_vroff;
5171 /* Compute its layout. */
5172 layout_type (va_list_type);
5174 return va_list_type;
5177 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5179 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5181 const CUMULATIVE_ARGS *cum;
5182 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5183 tree stack, grtop, vrtop, groff, vroff;
5185 int gr_save_area_size;
5186 int vr_save_area_size;
5189 cum = &crtl->args.info;
5191 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5193 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5195 if (TARGET_GENERAL_REGS_ONLY)
5197 if (cum->aapcs_nvrn > 0)
5198 sorry ("%qs and floating point or vector arguments",
5199 "-mgeneral-regs-only");
5200 vr_save_area_size = 0;
5203 f_stack = TYPE_FIELDS (va_list_type_node);
5204 f_grtop = DECL_CHAIN (f_stack);
5205 f_vrtop = DECL_CHAIN (f_grtop);
5206 f_groff = DECL_CHAIN (f_vrtop);
5207 f_vroff = DECL_CHAIN (f_groff);
5209 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5211 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5213 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5215 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5217 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5220 /* Emit code to initialize STACK, which points to the next varargs stack
5221 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5222 by named arguments. STACK is 8-byte aligned. */
5223 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5224 if (cum->aapcs_stack_size > 0)
5225 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5226 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5229 /* Emit code to initialize GRTOP, the top of the GR save area.
5230 virtual_incoming_args_rtx should have been 16 byte aligned. */
5231 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5232 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5235 /* Emit code to initialize VRTOP, the top of the VR save area.
5236 This address is gr_save_area_bytes below GRTOP, rounded
5237 down to the next 16-byte boundary. */
5238 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5239 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5240 STACK_BOUNDARY / BITS_PER_UNIT);
5243 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5244 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5247 /* Emit code to initialize GROFF, the offset from GRTOP of the
5248 next GPR argument. */
5249 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5250 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5253 /* Likewise emit code to initialize VROFF, the offset from FTOP
5254 of the next VR argument. */
5255 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5256 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5260 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5263 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5264 gimple_seq *post_p ATTRIBUTE_UNUSED)
5268 bool is_ha; /* is HFA or HVA. */
5269 bool dw_align; /* double-word align. */
5270 enum machine_mode ag_mode = VOIDmode;
5272 enum machine_mode mode;
5274 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5275 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5276 HOST_WIDE_INT size, rsize, adjust, align;
5277 tree t, u, cond1, cond2;
5279 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5281 type = build_pointer_type (type);
5283 mode = TYPE_MODE (type);
5285 f_stack = TYPE_FIELDS (va_list_type_node);
5286 f_grtop = DECL_CHAIN (f_stack);
5287 f_vrtop = DECL_CHAIN (f_grtop);
5288 f_groff = DECL_CHAIN (f_vrtop);
5289 f_vroff = DECL_CHAIN (f_groff);
5291 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5292 f_stack, NULL_TREE);
5293 size = int_size_in_bytes (type);
5294 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5298 if (aarch64_vfp_is_call_or_return_candidate (mode,
5304 /* TYPE passed in fp/simd registers. */
5305 if (TARGET_GENERAL_REGS_ONLY)
5306 sorry ("%qs and floating point or vector arguments",
5307 "-mgeneral-regs-only");
5309 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5310 unshare_expr (valist), f_vrtop, NULL_TREE);
5311 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5312 unshare_expr (valist), f_vroff, NULL_TREE);
5314 rsize = nregs * UNITS_PER_VREG;
5318 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5319 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5321 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5322 && size < UNITS_PER_VREG)
5324 adjust = UNITS_PER_VREG - size;
5329 /* TYPE passed in general registers. */
5330 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5331 unshare_expr (valist), f_grtop, NULL_TREE);
5332 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5333 unshare_expr (valist), f_groff, NULL_TREE);
5334 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5335 nregs = rsize / UNITS_PER_WORD;
5340 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5341 && size < UNITS_PER_WORD)
5343 adjust = UNITS_PER_WORD - size;
5347 /* Get a local temporary for the field value. */
5348 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5350 /* Emit code to branch if off >= 0. */
5351 t = build2 (GE_EXPR, boolean_type_node, off,
5352 build_int_cst (TREE_TYPE (off), 0));
5353 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5357 /* Emit: offs = (offs + 15) & -16. */
5358 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5359 build_int_cst (TREE_TYPE (off), 15));
5360 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5361 build_int_cst (TREE_TYPE (off), -16));
5362 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5367 /* Update ap.__[g|v]r_offs */
5368 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5369 build_int_cst (TREE_TYPE (off), rsize));
5370 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5374 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5376 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5377 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5378 build_int_cst (TREE_TYPE (f_off), 0));
5379 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5381 /* String up: make sure the assignment happens before the use. */
5382 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5383 COND_EXPR_ELSE (cond1) = t;
5385 /* Prepare the trees handling the argument that is passed on the stack;
5386 the top level node will store in ON_STACK. */
5387 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5390 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5391 t = fold_convert (intDI_type_node, arg);
5392 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5393 build_int_cst (TREE_TYPE (t), 15));
5394 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5395 build_int_cst (TREE_TYPE (t), -16));
5396 t = fold_convert (TREE_TYPE (arg), t);
5397 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5401 /* Advance ap.__stack */
5402 t = fold_convert (intDI_type_node, arg);
5403 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5404 build_int_cst (TREE_TYPE (t), size + 7));
5405 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5406 build_int_cst (TREE_TYPE (t), -8));
5407 t = fold_convert (TREE_TYPE (arg), t);
5408 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5409 /* String up roundup and advance. */
5411 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5412 /* String up with arg */
5413 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5414 /* Big-endianness related address adjustment. */
5415 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5416 && size < UNITS_PER_WORD)
5418 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5419 size_int (UNITS_PER_WORD - size));
5420 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5423 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5424 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5426 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5429 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5430 build_int_cst (TREE_TYPE (off), adjust));
5432 t = fold_convert (sizetype, t);
5433 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5437 /* type ha; // treat as "struct {ftype field[n];}"
5438 ... [computing offs]
5439 for (i = 0; i <nregs; ++i, offs += 16)
5440 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5443 tree tmp_ha, field_t, field_ptr_t;
5445 /* Declare a local variable. */
5446 tmp_ha = create_tmp_var_raw (type, "ha");
5447 gimple_add_tmp_var (tmp_ha);
5449 /* Establish the base type. */
5453 field_t = float_type_node;
5454 field_ptr_t = float_ptr_type_node;
5457 field_t = double_type_node;
5458 field_ptr_t = double_ptr_type_node;
5461 field_t = long_double_type_node;
5462 field_ptr_t = long_double_ptr_type_node;
5464 /* The half precision and quad precision are not fully supported yet. Enable
5465 the following code after the support is complete. Need to find the correct
5466 type node for __fp16 *. */
5469 field_t = float_type_node;
5470 field_ptr_t = float_ptr_type_node;
5476 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5477 field_t = build_vector_type_for_mode (innertype, ag_mode);
5478 field_ptr_t = build_pointer_type (field_t);
5485 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5486 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5488 t = fold_convert (field_ptr_t, addr);
5489 t = build2 (MODIFY_EXPR, field_t,
5490 build1 (INDIRECT_REF, field_t, tmp_ha),
5491 build1 (INDIRECT_REF, field_t, t));
5493 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5494 for (i = 1; i < nregs; ++i)
5496 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5497 u = fold_convert (field_ptr_t, addr);
5498 u = build2 (MODIFY_EXPR, field_t,
5499 build2 (MEM_REF, field_t, tmp_ha,
5500 build_int_cst (field_ptr_t,
5502 int_size_in_bytes (field_t)))),
5503 build1 (INDIRECT_REF, field_t, u));
5504 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5507 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5508 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5511 COND_EXPR_ELSE (cond2) = t;
5512 addr = fold_convert (build_pointer_type (type), cond1);
5513 addr = build_va_arg_indirect_ref (addr);
5516 addr = build_va_arg_indirect_ref (addr);
5521 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5524 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5525 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5528 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5529 CUMULATIVE_ARGS local_cum;
5530 int gr_saved, vr_saved;
5532 /* The caller has advanced CUM up to, but not beyond, the last named
5533 argument. Advance a local copy of CUM past the last "real" named
5534 argument, to find out how many registers are left over. */
5536 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5538 /* Found out how many registers we need to save. */
5539 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5540 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5542 if (TARGET_GENERAL_REGS_ONLY)
5544 if (local_cum.aapcs_nvrn > 0)
5545 sorry ("%qs and floating point or vector arguments",
5546 "-mgeneral-regs-only");
5556 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5557 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5558 - gr_saved * UNITS_PER_WORD);
5559 mem = gen_frame_mem (BLKmode, ptr);
5560 set_mem_alias_set (mem, get_varargs_alias_set ());
5562 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5567 /* We can't use move_block_from_reg, because it will use
5568 the wrong mode, storing D regs only. */
5569 enum machine_mode mode = TImode;
5572 /* Set OFF to the offset from virtual_incoming_args_rtx of
5573 the first vector register. The VR save area lies below
5574 the GR one, and is aligned to 16 bytes. */
5575 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5576 STACK_BOUNDARY / BITS_PER_UNIT);
5577 off -= vr_saved * UNITS_PER_VREG;
5579 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5583 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5584 mem = gen_frame_mem (mode, ptr);
5585 set_mem_alias_set (mem, get_varargs_alias_set ());
5586 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5587 off += UNITS_PER_VREG;
5592 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5593 any complication of having crtl->args.pretend_args_size changed. */
5594 cfun->machine->saved_varargs_size
5595 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5596 STACK_BOUNDARY / BITS_PER_UNIT)
5597 + vr_saved * UNITS_PER_VREG);
5601 aarch64_conditional_register_usage (void)
5606 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5609 call_used_regs[i] = 1;
5614 /* Walk down the type tree of TYPE counting consecutive base elements.
5615 If *MODEP is VOIDmode, then set it to the first valid floating point
5616 type. If a non-floating point type is found, or if a floating point
5617 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5618 otherwise return the count in the sub-tree. */
5620 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5622 enum machine_mode mode;
5625 switch (TREE_CODE (type))
5628 mode = TYPE_MODE (type);
5629 if (mode != DFmode && mode != SFmode && mode != TFmode)
5632 if (*modep == VOIDmode)
5641 mode = TYPE_MODE (TREE_TYPE (type));
5642 if (mode != DFmode && mode != SFmode && mode != TFmode)
5645 if (*modep == VOIDmode)
5654 /* Use V2SImode and V4SImode as representatives of all 64-bit
5655 and 128-bit vector types. */
5656 size = int_size_in_bytes (type);
5669 if (*modep == VOIDmode)
5672 /* Vector modes are considered to be opaque: two vectors are
5673 equivalent for the purposes of being homogeneous aggregates
5674 if they are the same size. */
5683 tree index = TYPE_DOMAIN (type);
5685 /* Can't handle incomplete types. */
5686 if (!COMPLETE_TYPE_P (type))
5689 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5692 || !TYPE_MAX_VALUE (index)
5693 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5694 || !TYPE_MIN_VALUE (index)
5695 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5699 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5700 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5702 /* There must be no padding. */
5703 if (!host_integerp (TYPE_SIZE (type), 1)
5704 || (tree_low_cst (TYPE_SIZE (type), 1)
5705 != count * GET_MODE_BITSIZE (*modep)))
5717 /* Can't handle incomplete types. */
5718 if (!COMPLETE_TYPE_P (type))
5721 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5723 if (TREE_CODE (field) != FIELD_DECL)
5726 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5732 /* There must be no padding. */
5733 if (!host_integerp (TYPE_SIZE (type), 1)
5734 || (tree_low_cst (TYPE_SIZE (type), 1)
5735 != count * GET_MODE_BITSIZE (*modep)))
5742 case QUAL_UNION_TYPE:
5744 /* These aren't very interesting except in a degenerate case. */
5749 /* Can't handle incomplete types. */
5750 if (!COMPLETE_TYPE_P (type))
5753 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5755 if (TREE_CODE (field) != FIELD_DECL)
5758 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5761 count = count > sub_count ? count : sub_count;
5764 /* There must be no padding. */
5765 if (!host_integerp (TYPE_SIZE (type), 1)
5766 || (tree_low_cst (TYPE_SIZE (type), 1)
5767 != count * GET_MODE_BITSIZE (*modep)))
5780 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5781 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5782 array types. The C99 floating-point complex types are also considered
5783 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5784 types, which are GCC extensions and out of the scope of AAPCS64, are
5785 treated as composite types here as well.
5787 Note that MODE itself is not sufficient in determining whether a type
5788 is such a composite type or not. This is because
5789 stor-layout.c:compute_record_mode may have already changed the MODE
5790 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5791 structure with only one field may have its MODE set to the mode of the
5792 field. Also an integer mode whose size matches the size of the
5793 RECORD_TYPE type may be used to substitute the original mode
5794 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5795 solely relied on. */
5798 aarch64_composite_type_p (const_tree type,
5799 enum machine_mode mode)
5801 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5805 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5806 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5812 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5813 type as described in AAPCS64 \S 4.1.2.
5815 See the comment above aarch64_composite_type_p for the notes on MODE. */
5818 aarch64_short_vector_p (const_tree type,
5819 enum machine_mode mode)
5821 HOST_WIDE_INT size = -1;
5823 if (type && TREE_CODE (type) == VECTOR_TYPE)
5824 size = int_size_in_bytes (type);
5825 else if (!aarch64_composite_type_p (type, mode)
5826 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5827 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5828 size = GET_MODE_SIZE (mode);
5830 return (size == 8 || size == 16) ? true : false;
5833 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5834 shall be passed or returned in simd/fp register(s) (providing these
5835 parameter passing registers are available).
5837 Upon successful return, *COUNT returns the number of needed registers,
5838 *BASE_MODE returns the mode of the individual register and when IS_HAF
5839 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5840 floating-point aggregate or a homogeneous short-vector aggregate. */
5843 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5845 enum machine_mode *base_mode,
5849 enum machine_mode new_mode = VOIDmode;
5850 bool composite_p = aarch64_composite_type_p (type, mode);
5852 if (is_ha != NULL) *is_ha = false;
5854 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5855 || aarch64_short_vector_p (type, mode))
5860 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5862 if (is_ha != NULL) *is_ha = true;
5864 new_mode = GET_MODE_INNER (mode);
5866 else if (type && composite_p)
5868 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5870 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5872 if (is_ha != NULL) *is_ha = true;
5881 *base_mode = new_mode;
5885 /* Implement TARGET_STRUCT_VALUE_RTX. */
5888 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5889 int incoming ATTRIBUTE_UNUSED)
5891 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5894 /* Implements target hook vector_mode_supported_p. */
5896 aarch64_vector_mode_supported_p (enum machine_mode mode)
5899 && (mode == V4SImode || mode == V8HImode
5900 || mode == V16QImode || mode == V2DImode
5901 || mode == V2SImode || mode == V4HImode
5902 || mode == V8QImode || mode == V2SFmode
5903 || mode == V4SFmode || mode == V2DFmode))
5909 /* Return quad mode as the preferred SIMD mode. */
5910 static enum machine_mode
5911 aarch64_preferred_simd_mode (enum machine_mode mode)
5935 /* Return the bitmask of possible vector sizes for the vectorizer
5938 aarch64_autovectorize_vector_sizes (void)
5943 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5944 vector types in order to conform to the AAPCS64 (see "Procedure
5945 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5946 qualify for emission with the mangled names defined in that document,
5947 a vector type must not only be of the correct mode but also be
5948 composed of AdvSIMD vector element types (e.g.
5949 _builtin_aarch64_simd_qi); these types are registered by
5950 aarch64_init_simd_builtins (). In other words, vector types defined
5951 in other ways e.g. via vector_size attribute will get default
5955 enum machine_mode mode;
5956 const char *element_type_name;
5957 const char *mangled_name;
5958 } aarch64_simd_mangle_map_entry;
5960 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5961 /* 64-bit containerized types. */
5962 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5963 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5964 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5965 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5966 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5967 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5968 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5969 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5970 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5971 /* 128-bit containerized types. */
5972 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5973 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5974 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5975 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5976 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5977 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5978 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5979 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5980 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5981 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
5982 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
5983 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
5984 { VOIDmode, NULL, NULL }
5987 /* Implement TARGET_MANGLE_TYPE. */
5990 aarch64_mangle_type (const_tree type)
5992 /* The AArch64 ABI documents say that "__va_list" has to be
5993 managled as if it is in the "std" namespace. */
5994 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
5995 return "St9__va_list";
5997 /* Check the mode of the vector type, and the name of the vector
5998 element type, against the table. */
5999 if (TREE_CODE (type) == VECTOR_TYPE)
6001 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6003 while (pos->mode != VOIDmode)
6005 tree elt_type = TREE_TYPE (type);
6007 if (pos->mode == TYPE_MODE (type)
6008 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6009 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6010 pos->element_type_name))
6011 return pos->mangled_name;
6017 /* Use the default mangling. */
6021 /* Return the equivalent letter for size. */
6022 static unsigned char
6023 sizetochar (int size)
6027 case 64: return 'd';
6028 case 32: return 's';
6029 case 16: return 'h';
6030 case 8 : return 'b';
6031 default: gcc_unreachable ();
6035 /* Return true iff x is a uniform vector of floating-point
6036 constants, and the constant can be represented in
6037 quarter-precision form. Note, as aarch64_float_const_representable
6038 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6040 aarch64_vect_float_const_representable_p (rtx x)
6043 REAL_VALUE_TYPE r0, ri;
6046 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6049 x0 = CONST_VECTOR_ELT (x, 0);
6050 if (!CONST_DOUBLE_P (x0))
6053 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6055 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6057 xi = CONST_VECTOR_ELT (x, i);
6058 if (!CONST_DOUBLE_P (xi))
6061 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6062 if (!REAL_VALUES_EQUAL (r0, ri))
6066 return aarch64_float_const_representable_p (x0);
6069 /* TODO: This function returns values similar to those
6070 returned by neon_valid_immediate in gcc/config/arm/arm.c
6071 but the API here is different enough that these magic numbers
6072 are not used. It should be sufficient to return true or false. */
6074 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6075 rtx *modconst, int *elementwidth,
6076 unsigned char *elementchar,
6077 int *mvn, int *shift)
6079 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6081 for (i = 0; i < idx; i += (STRIDE)) \
6086 immtype = (CLASS); \
6087 elsize = (ELSIZE); \
6088 elchar = sizetochar (elsize); \
6094 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6095 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6096 unsigned char bytes[16];
6097 unsigned char elchar = 0;
6098 int immtype = -1, matches;
6099 unsigned int invmask = inverse ? 0xff : 0;
6102 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6104 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6105 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6108 || aarch64_vect_float_const_representable_p (op)))
6112 *modconst = CONST_VECTOR_ELT (op, 0);
6115 *elementwidth = elem_width;
6118 *elementchar = sizetochar (elem_width);
6129 /* Splat vector constant out into a byte vector. */
6130 for (i = 0; i < n_elts; i++)
6132 rtx el = CONST_VECTOR_ELT (op, i);
6133 unsigned HOST_WIDE_INT elpart;
6134 unsigned int part, parts;
6136 if (GET_CODE (el) == CONST_INT)
6138 elpart = INTVAL (el);
6141 else if (GET_CODE (el) == CONST_DOUBLE)
6143 elpart = CONST_DOUBLE_LOW (el);
6149 for (part = 0; part < parts; part++)
6152 for (byte = 0; byte < innersize; byte++)
6154 bytes[idx++] = (elpart & 0xff) ^ invmask;
6155 elpart >>= BITS_PER_UNIT;
6157 if (GET_CODE (el) == CONST_DOUBLE)
6158 elpart = CONST_DOUBLE_HIGH (el);
6163 gcc_assert (idx == GET_MODE_SIZE (mode));
6167 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6168 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6170 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6171 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6173 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6174 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6176 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6177 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6179 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6181 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6183 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6184 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6186 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6187 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6189 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6190 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6192 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6193 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6195 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6197 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6199 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6200 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6202 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6203 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6205 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6206 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6208 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6209 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6211 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6213 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6214 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6218 /* TODO: Currently the assembler cannot handle types 12 to 15.
6219 And there is no way to specify cmode through the compiler.
6220 Disable them till there is support in the assembler. */
6222 || (immtype >= 12 && immtype <= 15)
6228 *elementwidth = elsize;
6231 *elementchar = elchar;
6241 unsigned HOST_WIDE_INT imm = 0;
6243 /* Un-invert bytes of recognized vector, if necessary. */
6245 for (i = 0; i < idx; i++)
6246 bytes[i] ^= invmask;
6250 /* FIXME: Broken on 32-bit H_W_I hosts. */
6251 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6253 for (i = 0; i < 8; i++)
6254 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6255 << (i * BITS_PER_UNIT);
6257 *modconst = GEN_INT (imm);
6261 unsigned HOST_WIDE_INT imm = 0;
6263 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6264 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6266 /* Construct 'abcdefgh' because the assembler cannot handle
6267 generic constants. */
6268 gcc_assert (shift != NULL && mvn != NULL);
6271 imm = (imm >> *shift) & 0xff;
6272 *modconst = GEN_INT (imm);
6280 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6281 (or, implicitly, MVNI) immediate. Write back width per element
6282 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6283 for a MOVI instruction) in *MODCONST. */
6285 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6286 rtx *modconst, int *elementwidth,
6287 unsigned char *elementchar,
6288 int *mvn, int *shift)
6292 unsigned char tmpwidthc;
6293 int tmpmvn = 0, tmpshift = 0;
6294 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6295 &tmpwidth, &tmpwidthc,
6296 &tmpmvn, &tmpshift);
6302 *modconst = tmpconst;
6305 *elementwidth = tmpwidth;
6308 *elementchar = tmpwidthc;
6320 aarch64_const_vec_all_same_int_p (rtx x,
6321 HOST_WIDE_INT minval,
6322 HOST_WIDE_INT maxval)
6324 HOST_WIDE_INT firstval;
6327 if (GET_CODE (x) != CONST_VECTOR
6328 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6331 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6332 if (firstval < minval || firstval > maxval)
6335 count = CONST_VECTOR_NUNITS (x);
6336 for (i = 1; i < count; i++)
6337 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6343 /* Check of immediate shift constants are within range. */
6345 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6347 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6349 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6351 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6354 /* Return true if X is a uniform vector where all elements
6355 are either the floating-point constant 0.0 or the
6356 integer constant 0. */
6358 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6360 return x == CONST0_RTX (mode);
6364 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6366 HOST_WIDE_INT imm = INTVAL (x);
6369 for (i = 0; i < 8; i++)
6371 unsigned int byte = imm & 0xff;
6372 if (byte != 0xff && byte != 0)
6380 /* Return a const_int vector of VAL. */
6382 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6384 int nunits = GET_MODE_NUNITS (mode);
6385 rtvec v = rtvec_alloc (nunits);
6388 for (i=0; i < nunits; i++)
6389 RTVEC_ELT (v, i) = GEN_INT (val);
6391 return gen_rtx_CONST_VECTOR (mode, v);
6394 /* Construct and return a PARALLEL RTX vector. */
6396 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6398 int nunits = GET_MODE_NUNITS (mode);
6399 rtvec v = rtvec_alloc (nunits / 2);
6400 int base = high ? nunits / 2 : 0;
6404 for (i=0; i < nunits / 2; i++)
6405 RTVEC_ELT (v, i) = GEN_INT (base + i);
6407 t1 = gen_rtx_PARALLEL (mode, v);
6411 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6412 HIGH (exclusive). */
6414 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6417 gcc_assert (GET_CODE (operand) == CONST_INT);
6418 lane = INTVAL (operand);
6420 if (lane < low || lane >= high)
6421 error ("lane out of range");
6425 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6427 gcc_assert (GET_CODE (operand) == CONST_INT);
6428 HOST_WIDE_INT lane = INTVAL (operand);
6430 if (lane < low || lane >= high)
6431 error ("constant out of range");
6434 /* Emit code to reinterpret one AdvSIMD type as another,
6435 without altering bits. */
6437 aarch64_simd_reinterpret (rtx dest, rtx src)
6439 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6442 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6445 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6446 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6449 rtx mem = gen_rtx_MEM (mode, destaddr);
6450 rtx tmp1 = gen_reg_rtx (mode);
6451 rtx tmp2 = gen_reg_rtx (mode);
6453 emit_insn (intfn (tmp1, op1, tmp2));
6455 emit_move_insn (mem, tmp1);
6456 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6457 emit_move_insn (mem, tmp2);
6460 /* Return TRUE if OP is a valid vector addressing mode. */
6462 aarch64_simd_mem_operand_p (rtx op)
6464 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6465 || GET_CODE (XEXP (op, 0)) == REG);
6468 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6469 not to early-clobber SRC registers in the process.
6471 We assume that the operands described by SRC and DEST represent a
6472 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6473 number of components into which the copy has been decomposed. */
6475 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6476 rtx *src, unsigned int count)
6480 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6481 || REGNO (operands[0]) < REGNO (operands[1]))
6483 for (i = 0; i < count; i++)
6485 operands[2 * i] = dest[i];
6486 operands[2 * i + 1] = src[i];
6491 for (i = 0; i < count; i++)
6493 operands[2 * i] = dest[count - i - 1];
6494 operands[2 * i + 1] = src[count - i - 1];
6499 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6500 one of VSTRUCT modes: OI, CI or XI. */
6502 aarch64_simd_attr_length_move (rtx insn)
6504 enum machine_mode mode;
6506 extract_insn_cached (insn);
6508 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6510 mode = GET_MODE (recog_data.operand[0]);
6526 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6527 alignment of a vector to 128 bits. */
6528 static HOST_WIDE_INT
6529 aarch64_simd_vector_alignment (const_tree type)
6531 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6532 return MIN (align, 128);
6535 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6537 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6542 /* We guarantee alignment for vectors up to 128-bits. */
6543 if (tree_int_cst_compare (TYPE_SIZE (type),
6544 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6547 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6551 static unsigned HOST_WIDE_INT
6552 aarch64_shift_truncation_mask (enum machine_mode mode)
6555 (aarch64_vector_mode_supported_p (mode)
6556 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6559 #ifndef TLS_SECTION_ASM_FLAG
6560 #define TLS_SECTION_ASM_FLAG 'T'
6564 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6565 tree decl ATTRIBUTE_UNUSED)
6567 char flagchars[10], *f = flagchars;
6569 /* If we have already declared this section, we can use an
6570 abbreviated form to switch back to it -- unless this section is
6571 part of a COMDAT groups, in which case GAS requires the full
6572 declaration every time. */
6573 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6574 && (flags & SECTION_DECLARED))
6576 fprintf (asm_out_file, "\t.section\t%s\n", name);
6580 if (!(flags & SECTION_DEBUG))
6582 if (flags & SECTION_WRITE)
6584 if (flags & SECTION_CODE)
6586 if (flags & SECTION_SMALL)
6588 if (flags & SECTION_MERGE)
6590 if (flags & SECTION_STRINGS)
6592 if (flags & SECTION_TLS)
6593 *f++ = TLS_SECTION_ASM_FLAG;
6594 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6598 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6600 if (!(flags & SECTION_NOTYPE))
6605 if (flags & SECTION_BSS)
6610 #ifdef TYPE_OPERAND_FMT
6611 format = "," TYPE_OPERAND_FMT;
6616 fprintf (asm_out_file, format, type);
6618 if (flags & SECTION_ENTSIZE)
6619 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6620 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6622 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6623 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6625 fprintf (asm_out_file, ",%s,comdat",
6626 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6630 putc ('\n', asm_out_file);
6633 /* Select a format to encode pointers in exception handling data. */
6635 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6638 switch (aarch64_cmodel)
6640 case AARCH64_CMODEL_TINY:
6641 case AARCH64_CMODEL_TINY_PIC:
6642 case AARCH64_CMODEL_SMALL:
6643 case AARCH64_CMODEL_SMALL_PIC:
6644 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6646 type = DW_EH_PE_sdata4;
6649 /* No assumptions here. 8-byte relocs required. */
6650 type = DW_EH_PE_sdata8;
6653 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6656 /* Emit load exclusive. */
6659 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6660 rtx mem, rtx model_rtx)
6662 rtx (*gen) (rtx, rtx, rtx);
6666 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6667 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6668 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6669 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6674 emit_insn (gen (rval, mem, model_rtx));
6677 /* Emit store exclusive. */
6680 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6681 rtx rval, rtx mem, rtx model_rtx)
6683 rtx (*gen) (rtx, rtx, rtx, rtx);
6687 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6688 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6689 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6690 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6695 emit_insn (gen (bval, rval, mem, model_rtx));
6698 /* Mark the previous jump instruction as unlikely. */
6701 aarch64_emit_unlikely_jump (rtx insn)
6703 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6705 insn = emit_jump_insn (insn);
6706 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6709 /* Expand a compare and swap pattern. */
6712 aarch64_expand_compare_and_swap (rtx operands[])
6714 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6715 enum machine_mode mode, cmp_mode;
6716 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6721 oldval = operands[3];
6722 newval = operands[4];
6723 is_weak = operands[5];
6724 mod_s = operands[6];
6725 mod_f = operands[7];
6726 mode = GET_MODE (mem);
6729 /* Normally the succ memory model must be stronger than fail, but in the
6730 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6731 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6733 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6734 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6735 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6741 /* For short modes, we're going to perform the comparison in SImode,
6742 so do the zero-extension now. */
6744 rval = gen_reg_rtx (SImode);
6745 oldval = convert_modes (SImode, mode, oldval, true);
6750 /* Force the value into a register if needed. */
6751 if (!aarch64_plus_operand (oldval, mode))
6752 oldval = force_reg (cmp_mode, oldval);
6761 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6762 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6763 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6764 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6769 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6771 if (mode == QImode || mode == HImode)
6772 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6774 x = gen_rtx_REG (CCmode, CC_REGNUM);
6775 x = gen_rtx_EQ (SImode, x, const0_rtx);
6776 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6779 /* Split a compare and swap pattern. */
6782 aarch64_split_compare_and_swap (rtx operands[])
6784 rtx rval, mem, oldval, newval, scratch;
6785 enum machine_mode mode;
6787 rtx label1, label2, x, cond;
6791 oldval = operands[2];
6792 newval = operands[3];
6793 is_weak = (operands[4] != const0_rtx);
6794 scratch = operands[7];
6795 mode = GET_MODE (mem);
6800 label1 = gen_label_rtx ();
6801 emit_label (label1);
6803 label2 = gen_label_rtx ();
6805 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6807 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6808 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6809 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6810 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6811 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6813 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6817 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6818 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6819 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6820 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6824 cond = gen_rtx_REG (CCmode, CC_REGNUM);
6825 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6826 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6829 emit_label (label2);
6832 /* Split an atomic operation. */
6835 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6836 rtx value, rtx model_rtx, rtx cond)
6838 enum machine_mode mode = GET_MODE (mem);
6839 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
6842 label = gen_label_rtx ();
6846 new_out = gen_lowpart (wmode, new_out);
6848 old_out = gen_lowpart (wmode, old_out);
6851 value = simplify_gen_subreg (wmode, value, mode, 0);
6853 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
6862 x = gen_rtx_AND (wmode, old_out, value);
6863 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6864 x = gen_rtx_NOT (wmode, new_out);
6865 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6869 if (CONST_INT_P (value))
6871 value = GEN_INT (-INTVAL (value));
6877 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
6878 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6882 aarch64_emit_store_exclusive (mode, cond, mem,
6883 gen_lowpart (mode, new_out), model_rtx);
6885 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6886 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6887 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
6888 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6892 aarch64_start_file (void)
6895 asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
6896 else if (selected_cpu)
6897 asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
6898 default_file_start();
6901 /* Target hook for c_mode_for_suffix. */
6902 static enum machine_mode
6903 aarch64_c_mode_for_suffix (char suffix)
6911 /* We can only represent floating point constants which will fit in
6912 "quarter-precision" values. These values are characterised by
6913 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
6916 (-1)^s * (n/16) * 2^r
6919 's' is the sign bit.
6920 'n' is an integer in the range 16 <= n <= 31.
6921 'r' is an integer in the range -3 <= r <= 4. */
6923 /* Return true iff X can be represented by a quarter-precision
6924 floating point immediate operand X. Note, we cannot represent 0.0. */
6926 aarch64_float_const_representable_p (rtx x)
6928 /* This represents our current view of how many bits
6929 make up the mantissa. */
6930 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6932 unsigned HOST_WIDE_INT mantissa, mask;
6933 HOST_WIDE_INT m1, m2;
6934 REAL_VALUE_TYPE r, m;
6936 if (!CONST_DOUBLE_P (x))
6939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6941 /* We cannot represent infinities, NaNs or +/-zero. We won't
6942 know if we have +zero until we analyse the mantissa, but we
6943 can reject the other invalid values. */
6944 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
6945 || REAL_VALUE_MINUS_ZERO (r))
6948 /* Extract sign and exponent. */
6949 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6950 r = real_value_abs (&r);
6951 exponent = REAL_EXP (&r);
6953 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6954 highest (sign) bit, with a fixed binary point at bit point_pos.
6955 m1 holds the low part of the mantissa, m2 the high part.
6956 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
6957 bits for the mantissa, this can fail (low bits will be lost). */
6958 real_ldexp (&m, &r, point_pos - exponent);
6959 REAL_VALUE_TO_INT (&m1, &m2, m);
6961 /* If the low part of the mantissa has bits set we cannot represent
6965 /* We have rejected the lower HOST_WIDE_INT, so update our
6966 understanding of how many bits lie in the mantissa and
6967 look only at the high HOST_WIDE_INT. */
6969 point_pos -= HOST_BITS_PER_WIDE_INT;
6971 /* We can only represent values with a mantissa of the form 1.xxxx. */
6972 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6973 if ((mantissa & mask) != 0)
6976 /* Having filtered unrepresentable values, we may now remove all
6977 but the highest 5 bits. */
6978 mantissa >>= point_pos - 5;
6980 /* We cannot represent the value 0.0, so reject it. This is handled
6985 /* Then, as bit 4 is always set, we can mask it off, leaving
6986 the mantissa in the range [0, 15]. */
6987 mantissa &= ~(1 << 4);
6988 gcc_assert (mantissa <= 15);
6990 /* GCC internally does not use IEEE754-like encoding (where normalized
6991 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
6992 Our mantissa values are shifted 4 places to the left relative to
6993 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
6994 by 5 places to correct for GCC's representation. */
6995 exponent = 5 - exponent;
6997 return (exponent >= 0 && exponent <= 7);
7001 aarch64_output_simd_mov_immediate (rtx *const_vector,
7002 enum machine_mode mode,
7006 unsigned char widthc;
7007 int lane_width_bits;
7008 static char templ[40];
7009 int shift = 0, mvn = 0;
7010 const char *mnemonic;
7011 unsigned int lane_count = 0;
7014 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7015 const_vector, &lane_width_bits,
7016 &widthc, &mvn, &shift);
7017 gcc_assert (is_valid);
7019 mode = GET_MODE_INNER (mode);
7020 if (mode == SFmode || mode == DFmode)
7023 aarch64_float_const_zero_rtx_p (*const_vector);
7024 gcc_assert (shift == 0);
7025 mnemonic = zero_p ? "movi" : "fmov";
7028 mnemonic = mvn ? "mvni" : "movi";
7030 gcc_assert (lane_width_bits != 0);
7031 lane_count = width / lane_width_bits;
7033 if (lane_count == 1)
7034 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7036 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7037 mnemonic, lane_count, widthc, shift);
7039 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7040 mnemonic, lane_count, widthc);
7044 /* Split operands into moves from op[1] + op[2] into op[0]. */
7047 aarch64_split_combinev16qi (rtx operands[3])
7049 unsigned int dest = REGNO (operands[0]);
7050 unsigned int src1 = REGNO (operands[1]);
7051 unsigned int src2 = REGNO (operands[2]);
7052 enum machine_mode halfmode = GET_MODE (operands[1]);
7053 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7056 gcc_assert (halfmode == V16QImode);
7058 if (src1 == dest && src2 == dest + halfregs)
7060 /* No-op move. Can't split to nothing; emit something. */
7061 emit_note (NOTE_INSN_DELETED);
7065 /* Preserve register attributes for variable tracking. */
7066 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7067 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7068 GET_MODE_SIZE (halfmode));
7070 /* Special case of reversed high/low parts. */
7071 if (reg_overlap_mentioned_p (operands[2], destlo)
7072 && reg_overlap_mentioned_p (operands[1], desthi))
7074 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7075 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7076 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7078 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7080 /* Try to avoid unnecessary moves if part of the result
7081 is in the right place already. */
7083 emit_move_insn (destlo, operands[1]);
7084 if (src2 != dest + halfregs)
7085 emit_move_insn (desthi, operands[2]);
7089 if (src2 != dest + halfregs)
7090 emit_move_insn (desthi, operands[2]);
7092 emit_move_insn (destlo, operands[1]);
7096 /* vec_perm support. */
7098 #define MAX_VECT_LEN 16
7100 struct expand_vec_perm_d
7102 rtx target, op0, op1;
7103 unsigned char perm[MAX_VECT_LEN];
7104 enum machine_mode vmode;
7110 /* Generate a variable permutation. */
7113 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7115 enum machine_mode vmode = GET_MODE (target);
7116 bool one_vector_p = rtx_equal_p (op0, op1);
7118 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7119 gcc_checking_assert (GET_MODE (op0) == vmode);
7120 gcc_checking_assert (GET_MODE (op1) == vmode);
7121 gcc_checking_assert (GET_MODE (sel) == vmode);
7122 gcc_checking_assert (TARGET_SIMD);
7126 if (vmode == V8QImode)
7128 /* Expand the argument to a V16QI mode by duplicating it. */
7129 rtx pair = gen_reg_rtx (V16QImode);
7130 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7131 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7135 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7142 if (vmode == V8QImode)
7144 pair = gen_reg_rtx (V16QImode);
7145 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7146 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7150 pair = gen_reg_rtx (OImode);
7151 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7152 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7158 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7160 enum machine_mode vmode = GET_MODE (target);
7161 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7162 bool one_vector_p = rtx_equal_p (op0, op1);
7163 rtx rmask[MAX_VECT_LEN], mask;
7165 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7167 /* The TBL instruction does not use a modulo index, so we must take care
7168 of that ourselves. */
7169 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7170 for (i = 0; i < nelt; ++i)
7172 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7173 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7175 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7178 /* Recognize patterns suitable for the TRN instructions. */
7180 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7182 unsigned int i, odd, mask, nelt = d->nelt;
7183 rtx out, in0, in1, x;
7184 rtx (*gen) (rtx, rtx, rtx);
7185 enum machine_mode vmode = d->vmode;
7187 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7190 /* Note that these are little-endian tests.
7191 We correct for big-endian later. */
7192 if (d->perm[0] == 0)
7194 else if (d->perm[0] == 1)
7198 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7200 for (i = 0; i < nelt; i += 2)
7202 if (d->perm[i] != i + odd)
7204 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7214 if (BYTES_BIG_ENDIAN)
7216 x = in0, in0 = in1, in1 = x;
7225 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7226 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7227 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7228 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7229 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7230 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7231 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7232 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7233 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7234 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7243 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7244 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7245 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7246 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7247 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7248 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7249 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7250 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7251 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7252 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7258 emit_insn (gen (out, in0, in1));
7262 /* Recognize patterns suitable for the UZP instructions. */
7264 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7266 unsigned int i, odd, mask, nelt = d->nelt;
7267 rtx out, in0, in1, x;
7268 rtx (*gen) (rtx, rtx, rtx);
7269 enum machine_mode vmode = d->vmode;
7271 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7274 /* Note that these are little-endian tests.
7275 We correct for big-endian later. */
7276 if (d->perm[0] == 0)
7278 else if (d->perm[0] == 1)
7282 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7284 for (i = 0; i < nelt; i++)
7286 unsigned elt = (i * 2 + odd) & mask;
7287 if (d->perm[i] != elt)
7297 if (BYTES_BIG_ENDIAN)
7299 x = in0, in0 = in1, in1 = x;
7308 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7309 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7310 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7311 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7312 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7313 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7314 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7315 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7316 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7317 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7326 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7327 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7328 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7329 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7330 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7331 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7332 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7333 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7334 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7335 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7341 emit_insn (gen (out, in0, in1));
7345 /* Recognize patterns suitable for the ZIP instructions. */
7347 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7349 unsigned int i, high, mask, nelt = d->nelt;
7350 rtx out, in0, in1, x;
7351 rtx (*gen) (rtx, rtx, rtx);
7352 enum machine_mode vmode = d->vmode;
7354 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7357 /* Note that these are little-endian tests.
7358 We correct for big-endian later. */
7360 if (d->perm[0] == high)
7363 else if (d->perm[0] == 0)
7367 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7369 for (i = 0; i < nelt / 2; i++)
7371 unsigned elt = (i + high) & mask;
7372 if (d->perm[i * 2] != elt)
7374 elt = (elt + nelt) & mask;
7375 if (d->perm[i * 2 + 1] != elt)
7385 if (BYTES_BIG_ENDIAN)
7387 x = in0, in0 = in1, in1 = x;
7396 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7397 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7398 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7399 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7400 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7401 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7402 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7403 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7404 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7405 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7414 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7415 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7416 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7417 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7418 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7419 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7420 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7421 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7422 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7423 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7429 emit_insn (gen (out, in0, in1));
7434 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7436 rtx rperm[MAX_VECT_LEN], sel;
7437 enum machine_mode vmode = d->vmode;
7438 unsigned int i, nelt = d->nelt;
7440 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7441 numbering of elements for big-endian, we must reverse the order. */
7442 if (BYTES_BIG_ENDIAN)
7448 /* Generic code will try constant permutation twice. Once with the
7449 original mode and again with the elements lowered to QImode.
7450 So wait and don't do the selector expansion ourselves. */
7451 if (vmode != V8QImode && vmode != V16QImode)
7454 for (i = 0; i < nelt; ++i)
7455 rperm[i] = GEN_INT (d->perm[i]);
7456 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7457 sel = force_reg (vmode, sel);
7459 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7464 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7466 /* The pattern matching functions above are written to look for a small
7467 number to begin the sequence (0, 1, N/2). If we begin with an index
7468 from the second operand, we can swap the operands. */
7469 if (d->perm[0] >= d->nelt)
7471 unsigned i, nelt = d->nelt;
7474 for (i = 0; i < nelt; ++i)
7475 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7484 if (aarch64_evpc_zip (d))
7486 else if (aarch64_evpc_uzp (d))
7488 else if (aarch64_evpc_trn (d))
7490 return aarch64_evpc_tbl (d);
7495 /* Expand a vec_perm_const pattern. */
7498 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7500 struct expand_vec_perm_d d;
7507 d.vmode = GET_MODE (target);
7508 gcc_assert (VECTOR_MODE_P (d.vmode));
7509 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7510 d.testing_p = false;
7512 for (i = which = 0; i < nelt; ++i)
7514 rtx e = XVECEXP (sel, 0, i);
7515 int ei = INTVAL (e) & (2 * nelt - 1);
7516 which |= (ei < nelt ? 1 : 2);
7526 d.one_vector_p = false;
7527 if (!rtx_equal_p (op0, op1))
7530 /* The elements of PERM do not suggest that only the first operand
7531 is used, but both operands are identical. Allow easier matching
7532 of the permutation by folding the permutation into the single
7536 for (i = 0; i < nelt; ++i)
7537 d.perm[i] &= nelt - 1;
7539 d.one_vector_p = true;
7544 d.one_vector_p = true;
7548 return aarch64_expand_vec_perm_const_1 (&d);
7552 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7553 const unsigned char *sel)
7555 struct expand_vec_perm_d d;
7556 unsigned int i, nelt, which;
7560 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7562 memcpy (d.perm, sel, nelt);
7564 /* Calculate whether all elements are in one vector. */
7565 for (i = which = 0; i < nelt; ++i)
7567 unsigned char e = d.perm[i];
7568 gcc_assert (e < 2 * nelt);
7569 which |= (e < nelt ? 1 : 2);
7572 /* If all elements are from the second vector, reindex as if from the
7575 for (i = 0; i < nelt; ++i)
7578 /* Check whether the mask can be applied to a single vector. */
7579 d.one_vector_p = (which != 3);
7581 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7582 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7583 if (!d.one_vector_p)
7584 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7587 ret = aarch64_expand_vec_perm_const_1 (&d);
7593 #undef TARGET_ADDRESS_COST
7594 #define TARGET_ADDRESS_COST aarch64_address_cost
7596 /* This hook will determines whether unnamed bitfields affect the alignment
7597 of the containing structure. The hook returns true if the structure
7598 should inherit the alignment requirements of an unnamed bitfield's
7600 #undef TARGET_ALIGN_ANON_BITFIELD
7601 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7603 #undef TARGET_ASM_ALIGNED_DI_OP
7604 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7606 #undef TARGET_ASM_ALIGNED_HI_OP
7607 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7609 #undef TARGET_ASM_ALIGNED_SI_OP
7610 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7612 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7613 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7614 hook_bool_const_tree_hwi_hwi_const_tree_true
7616 #undef TARGET_ASM_FILE_START
7617 #define TARGET_ASM_FILE_START aarch64_start_file
7619 #undef TARGET_ASM_OUTPUT_MI_THUNK
7620 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7622 #undef TARGET_ASM_SELECT_RTX_SECTION
7623 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7625 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7626 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7628 #undef TARGET_BUILD_BUILTIN_VA_LIST
7629 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7631 #undef TARGET_CALLEE_COPIES
7632 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7634 #undef TARGET_CAN_ELIMINATE
7635 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7637 #undef TARGET_CANNOT_FORCE_CONST_MEM
7638 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7640 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7641 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7643 /* Only the least significant bit is used for initialization guard
7645 #undef TARGET_CXX_GUARD_MASK_BIT
7646 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7648 #undef TARGET_C_MODE_FOR_SUFFIX
7649 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7651 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7652 #undef TARGET_DEFAULT_TARGET_FLAGS
7653 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7656 #undef TARGET_CLASS_MAX_NREGS
7657 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7659 #undef TARGET_BUILTIN_DECL
7660 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7662 #undef TARGET_EXPAND_BUILTIN
7663 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7665 #undef TARGET_EXPAND_BUILTIN_VA_START
7666 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7668 #undef TARGET_FUNCTION_ARG
7669 #define TARGET_FUNCTION_ARG aarch64_function_arg
7671 #undef TARGET_FUNCTION_ARG_ADVANCE
7672 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7674 #undef TARGET_FUNCTION_ARG_BOUNDARY
7675 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7677 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7678 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7680 #undef TARGET_FUNCTION_VALUE
7681 #define TARGET_FUNCTION_VALUE aarch64_function_value
7683 #undef TARGET_FUNCTION_VALUE_REGNO_P
7684 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7686 #undef TARGET_FRAME_POINTER_REQUIRED
7687 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7689 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7690 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7692 #undef TARGET_INIT_BUILTINS
7693 #define TARGET_INIT_BUILTINS aarch64_init_builtins
7695 #undef TARGET_LEGITIMATE_ADDRESS_P
7696 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7698 #undef TARGET_LEGITIMATE_CONSTANT_P
7699 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7701 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7702 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7704 #undef TARGET_MANGLE_TYPE
7705 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7707 #undef TARGET_MEMORY_MOVE_COST
7708 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7710 #undef TARGET_MUST_PASS_IN_STACK
7711 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7713 /* This target hook should return true if accesses to volatile bitfields
7714 should use the narrowest mode possible. It should return false if these
7715 accesses should use the bitfield container type. */
7716 #undef TARGET_NARROW_VOLATILE_BITFIELD
7717 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7719 #undef TARGET_OPTION_OVERRIDE
7720 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7722 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7723 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7724 aarch64_override_options_after_change
7726 #undef TARGET_PASS_BY_REFERENCE
7727 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7729 #undef TARGET_PREFERRED_RELOAD_CLASS
7730 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7732 #undef TARGET_SECONDARY_RELOAD
7733 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7735 #undef TARGET_SHIFT_TRUNCATION_MASK
7736 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7738 #undef TARGET_SETUP_INCOMING_VARARGS
7739 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7741 #undef TARGET_STRUCT_VALUE_RTX
7742 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7744 #undef TARGET_REGISTER_MOVE_COST
7745 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7747 #undef TARGET_RETURN_IN_MEMORY
7748 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7750 #undef TARGET_RETURN_IN_MSB
7751 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7753 #undef TARGET_RTX_COSTS
7754 #define TARGET_RTX_COSTS aarch64_rtx_costs
7756 #undef TARGET_TRAMPOLINE_INIT
7757 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7759 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7760 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7762 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7763 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7765 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7766 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7768 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7769 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7771 #undef TARGET_VECTORIZE_BUILTINS
7772 #define TARGET_VECTORIZE_BUILTINS
7774 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7775 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7776 aarch64_builtin_vectorized_function
7778 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7779 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7780 aarch64_autovectorize_vector_sizes
7782 /* Section anchor support. */
7784 #undef TARGET_MIN_ANCHOR_OFFSET
7785 #define TARGET_MIN_ANCHOR_OFFSET -256
7787 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7788 byte offset; we can do much more for larger data types, but have no way
7789 to determine the size of the access. We assume accesses are aligned. */
7790 #undef TARGET_MAX_ANCHOR_OFFSET
7791 #define TARGET_MAX_ANCHOR_OFFSET 4095
7793 #undef TARGET_VECTOR_ALIGNMENT
7794 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7796 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7797 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7798 aarch64_simd_vector_alignment_reachable
7800 /* vec_perm support. */
7802 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7803 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7804 aarch64_vectorize_vec_perm_const_ok
7807 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
7809 struct gcc_target targetm = TARGET_INITIALIZER;
7811 #include "gt-aarch64.h"