1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 enum machine_mode *, int *,
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
303 const char *const name;
304 enum aarch64_processor core;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
479 /* Return true if calls to DECL should be treated as
480 long-calls (ie called via a register). */
482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
487 /* Return true if calls to symbol-ref SYM should be treated as
488 long-calls (ie called via a register). */
490 aarch64_is_long_call_p (rtx sym)
492 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
495 /* Return true if the offsets to a zero/sign-extract operation
496 represent an expression that matches an extend operation. The
497 operands represent the paramters from
499 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
504 HOST_WIDE_INT mult_val, extract_val;
506 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
509 mult_val = INTVAL (mult_imm);
510 extract_val = INTVAL (extract_imm);
513 && extract_val < GET_MODE_BITSIZE (mode)
514 && exact_log2 (extract_val & ~7) > 0
515 && (extract_val & 7) <= 4
516 && mult_val == (1 << (extract_val & 7)))
522 /* Emit an insn that's a simple single-set. Both the operands must be
523 known to be valid. */
525 emit_set_insn (rtx x, rtx y)
527 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
530 /* X and Y are two things to compare using CODE. Emit the compare insn and
531 return the rtx for register 0 in the proper mode. */
533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
535 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
536 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
538 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
542 /* Build the SYMBOL_REF for __tls_get_addr. */
544 static GTY(()) rtx tls_get_addr_libfunc;
547 aarch64_tls_get_addr (void)
549 if (!tls_get_addr_libfunc)
550 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
551 return tls_get_addr_libfunc;
554 /* Return the TLS model to use for ADDR. */
556 static enum tls_model
557 tls_symbolic_operand_type (rtx addr)
559 enum tls_model tls_kind = TLS_MODEL_NONE;
562 if (GET_CODE (addr) == CONST)
564 split_const (addr, &sym, &addend);
565 if (GET_CODE (sym) == SYMBOL_REF)
566 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
568 else if (GET_CODE (addr) == SYMBOL_REF)
569 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
574 /* We'll allow lo_sum's in addresses in our legitimate addresses
575 so that combine would take care of combining addresses where
576 necessary, but for generation purposes, we'll generate the address
579 tmp = hi (symbol_ref); adrp x1, foo
580 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
584 adrp x1, :got:foo adrp tmp, :tlsgd:foo
585 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
589 Load TLS symbol, depending on TLS mechanism and TLS access model.
591 Global Dynamic - Traditional TLS:
593 add dest, tmp, #:tlsgd_lo12:imm
596 Global Dynamic - TLS Descriptors:
597 adrp dest, :tlsdesc:imm
598 ldr tmp, [dest, #:tlsdesc_lo12:imm]
599 add dest, dest, #:tlsdesc_lo12:imm
606 adrp tmp, :gottprel:imm
607 ldr dest, [tmp, #:gottprel_lo12:imm]
612 add t0, tp, #:tprel_hi12:imm
613 add t0, #:tprel_lo12_nc:imm
617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
618 enum aarch64_symbol_type type)
622 case SYMBOL_SMALL_ABSOLUTE:
624 /* In ILP32, the mode of dest can be either SImode or DImode. */
626 enum machine_mode mode = GET_MODE (dest);
628 gcc_assert (mode == Pmode || mode == ptr_mode);
630 if (can_create_pseudo_p ())
631 tmp_reg = gen_reg_rtx (mode);
633 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
634 emit_insn (gen_add_losym (dest, tmp_reg, imm));
638 case SYMBOL_TINY_ABSOLUTE:
639 emit_insn (gen_rtx_SET (Pmode, dest, imm));
642 case SYMBOL_SMALL_GOT:
644 /* In ILP32, the mode of dest can be either SImode or DImode,
645 while the got entry is always of SImode size. The mode of
646 dest depends on how dest is used: if dest is assigned to a
647 pointer (e.g. in the memory), it has SImode; it may have
648 DImode if dest is dereferenced to access the memeory.
649 This is why we have to handle three different ldr_got_small
650 patterns here (two patterns for ILP32). */
652 enum machine_mode mode = GET_MODE (dest);
654 if (can_create_pseudo_p ())
655 tmp_reg = gen_reg_rtx (mode);
657 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
658 if (mode == ptr_mode)
661 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
663 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
667 gcc_assert (mode == Pmode);
668 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
674 case SYMBOL_SMALL_TLSGD:
677 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
680 emit_call_insn (gen_tlsgd_small (result, imm));
681 insns = get_insns ();
684 RTL_CONST_CALL_P (insns) = 1;
685 emit_libcall_block (insns, dest, result, imm);
689 case SYMBOL_SMALL_TLSDESC:
691 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
694 emit_insn (gen_tlsdesc_small (imm));
695 tp = aarch64_load_tp (NULL);
696 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
697 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
701 case SYMBOL_SMALL_GOTTPREL:
703 rtx tmp_reg = gen_reg_rtx (Pmode);
704 rtx tp = aarch64_load_tp (NULL);
705 emit_insn (gen_tlsie_small (tmp_reg, imm));
706 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
707 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
711 case SYMBOL_SMALL_TPREL:
713 rtx tp = aarch64_load_tp (NULL);
715 if (GET_MODE (dest) != Pmode)
716 tp = gen_lowpart (GET_MODE (dest), tp);
718 emit_insn (gen_tlsle_small (dest, tp, imm));
719 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
723 case SYMBOL_TINY_GOT:
724 emit_insn (gen_ldr_got_tiny (dest, imm));
732 /* Emit a move from SRC to DEST. Assume that the move expanders can
733 handle all moves if !can_create_pseudo_p (). The distinction is
734 important because, unlike emit_move_insn, the move expanders know
735 how to force Pmode objects into the constant pool even when the
736 constant pool address is not itself legitimate. */
738 aarch64_emit_move (rtx dest, rtx src)
740 return (can_create_pseudo_p ()
741 ? emit_move_insn (dest, src)
742 : emit_move_insn_1 (dest, src));
745 /* Split a 128-bit move operation into two 64-bit move operations,
746 taking care to handle partial overlap of register to register
747 copies. Special cases are needed when moving between GP regs and
748 FP regs. SRC can be a register, constant or memory; DST a register
749 or memory. If either operand is memory it must not have any side
752 aarch64_split_128bit_move (rtx dst, rtx src)
757 enum machine_mode mode = GET_MODE (dst);
759 gcc_assert (mode == TImode || mode == TFmode);
760 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
761 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
763 if (REG_P (dst) && REG_P (src))
765 int src_regno = REGNO (src);
766 int dst_regno = REGNO (dst);
768 /* Handle FP <-> GP regs. */
769 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
771 src_lo = gen_lowpart (word_mode, src);
772 src_hi = gen_highpart (word_mode, src);
776 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
777 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
781 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
782 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
786 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
788 dst_lo = gen_lowpart (word_mode, dst);
789 dst_hi = gen_highpart (word_mode, dst);
793 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
794 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
798 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
799 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
805 dst_lo = gen_lowpart (word_mode, dst);
806 dst_hi = gen_highpart (word_mode, dst);
807 src_lo = gen_lowpart (word_mode, src);
808 src_hi = gen_highpart_mode (word_mode, mode, src);
810 /* At most one pairing may overlap. */
811 if (reg_overlap_mentioned_p (dst_lo, src_hi))
813 aarch64_emit_move (dst_hi, src_hi);
814 aarch64_emit_move (dst_lo, src_lo);
818 aarch64_emit_move (dst_lo, src_lo);
819 aarch64_emit_move (dst_hi, src_hi);
824 aarch64_split_128bit_move_p (rtx dst, rtx src)
826 return (! REG_P (src)
827 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
830 /* Split a complex SIMD combine. */
833 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
835 enum machine_mode src_mode = GET_MODE (src1);
836 enum machine_mode dst_mode = GET_MODE (dst);
838 gcc_assert (VECTOR_MODE_P (dst_mode));
840 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
842 rtx (*gen) (rtx, rtx, rtx);
847 gen = gen_aarch64_simd_combinev8qi;
850 gen = gen_aarch64_simd_combinev4hi;
853 gen = gen_aarch64_simd_combinev2si;
856 gen = gen_aarch64_simd_combinev2sf;
859 gen = gen_aarch64_simd_combinedi;
862 gen = gen_aarch64_simd_combinedf;
868 emit_insn (gen (dst, src1, src2));
873 /* Split a complex SIMD move. */
876 aarch64_split_simd_move (rtx dst, rtx src)
878 enum machine_mode src_mode = GET_MODE (src);
879 enum machine_mode dst_mode = GET_MODE (dst);
881 gcc_assert (VECTOR_MODE_P (dst_mode));
883 if (REG_P (dst) && REG_P (src))
885 rtx (*gen) (rtx, rtx);
887 gcc_assert (VECTOR_MODE_P (src_mode));
892 gen = gen_aarch64_split_simd_movv16qi;
895 gen = gen_aarch64_split_simd_movv8hi;
898 gen = gen_aarch64_split_simd_movv4si;
901 gen = gen_aarch64_split_simd_movv2di;
904 gen = gen_aarch64_split_simd_movv4sf;
907 gen = gen_aarch64_split_simd_movv2df;
913 emit_insn (gen (dst, src));
919 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
921 if (can_create_pseudo_p ())
922 return force_reg (mode, value);
925 x = aarch64_emit_move (x, value);
932 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
934 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
937 /* Load the full offset into a register. This
938 might be improvable in the future. */
939 high = GEN_INT (offset);
941 high = aarch64_force_temporary (mode, temp, high);
942 reg = aarch64_force_temporary (mode, temp,
943 gen_rtx_PLUS (mode, high, reg));
945 return plus_constant (mode, reg, offset);
949 aarch64_expand_mov_immediate (rtx dest, rtx imm)
951 enum machine_mode mode = GET_MODE (dest);
952 unsigned HOST_WIDE_INT mask;
955 unsigned HOST_WIDE_INT val;
958 int one_match, zero_match;
960 gcc_assert (mode == SImode || mode == DImode);
962 /* Check on what type of symbol it is. */
963 if (GET_CODE (imm) == SYMBOL_REF
964 || GET_CODE (imm) == LABEL_REF
965 || GET_CODE (imm) == CONST)
967 rtx mem, base, offset;
968 enum aarch64_symbol_type sty;
970 /* If we have (const (plus symbol offset)), separate out the offset
971 before we start classifying the symbol. */
972 split_const (imm, &base, &offset);
974 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
977 case SYMBOL_FORCE_TO_MEM:
978 if (offset != const0_rtx
979 && targetm.cannot_force_const_mem (mode, imm))
981 gcc_assert (can_create_pseudo_p ());
982 base = aarch64_force_temporary (mode, dest, base);
983 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
984 aarch64_emit_move (dest, base);
987 mem = force_const_mem (ptr_mode, imm);
989 if (mode != ptr_mode)
990 mem = gen_rtx_ZERO_EXTEND (mode, mem);
991 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
994 case SYMBOL_SMALL_TLSGD:
995 case SYMBOL_SMALL_TLSDESC:
996 case SYMBOL_SMALL_GOTTPREL:
997 case SYMBOL_SMALL_GOT:
998 case SYMBOL_TINY_GOT:
999 if (offset != const0_rtx)
1001 gcc_assert(can_create_pseudo_p ());
1002 base = aarch64_force_temporary (mode, dest, base);
1003 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1004 aarch64_emit_move (dest, base);
1009 case SYMBOL_SMALL_TPREL:
1010 case SYMBOL_SMALL_ABSOLUTE:
1011 case SYMBOL_TINY_ABSOLUTE:
1012 aarch64_load_symref_appropriately (dest, imm, sty);
1020 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1022 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1026 if (!CONST_INT_P (imm))
1028 if (GET_CODE (imm) == HIGH)
1029 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1032 rtx mem = force_const_mem (mode, imm);
1034 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 /* We know we can't do this in 1 insn, and we must be able to do it
1043 in two; so don't mess around looking for sequences that don't buy
1045 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1046 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1047 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1051 /* Remaining cases are all for DImode. */
1054 subtargets = optimize && can_create_pseudo_p ();
1060 for (i = 0; i < 64; i += 16, mask <<= 16)
1062 if ((val & mask) == 0)
1064 else if ((val & mask) == mask)
1071 for (i = 0; i < 64; i += 16, mask <<= 16)
1073 if ((val & mask) != mask)
1075 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1076 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1077 GEN_INT ((val >> i) & 0xffff)));
1084 if (zero_match == 2)
1085 goto simple_sequence;
1087 mask = 0x0ffff0000UL;
1088 for (i = 16; i < 64; i += 16, mask <<= 16)
1090 HOST_WIDE_INT comp = mask & ~(mask - 1);
1092 if (aarch64_uimm12_shift (val - (val & mask)))
1094 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1096 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1097 emit_insn (gen_adddi3 (dest, subtarget,
1098 GEN_INT (val - (val & mask))));
1101 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1103 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1105 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1106 GEN_INT ((val + comp) & mask)));
1107 emit_insn (gen_adddi3 (dest, subtarget,
1108 GEN_INT (val - ((val + comp) & mask))));
1111 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1113 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1115 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1116 GEN_INT ((val - comp) | ~mask)));
1117 emit_insn (gen_adddi3 (dest, subtarget,
1118 GEN_INT (val - ((val - comp) | ~mask))));
1121 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1123 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1125 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1126 GEN_INT (val | ~mask)));
1127 emit_insn (gen_adddi3 (dest, subtarget,
1128 GEN_INT (val - (val | ~mask))));
1133 /* See if we can do it by arithmetically combining two
1135 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1140 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1141 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1143 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1144 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1145 GEN_INT (aarch64_bitmasks[i])));
1146 emit_insn (gen_adddi3 (dest, subtarget,
1147 GEN_INT (val - aarch64_bitmasks[i])));
1151 for (j = 0; j < 64; j += 16, mask <<= 16)
1153 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1155 emit_insn (gen_rtx_SET (VOIDmode, dest,
1156 GEN_INT (aarch64_bitmasks[i])));
1157 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1158 GEN_INT ((val >> j) & 0xffff)));
1164 /* See if we can do it by logically combining two immediates. */
1165 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1167 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1171 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1172 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1174 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (aarch64_bitmasks[i])));
1177 emit_insn (gen_iordi3 (dest, subtarget,
1178 GEN_INT (aarch64_bitmasks[j])));
1182 else if ((val & aarch64_bitmasks[i]) == val)
1186 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1187 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1190 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1191 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1192 GEN_INT (aarch64_bitmasks[j])));
1193 emit_insn (gen_anddi3 (dest, subtarget,
1194 GEN_INT (aarch64_bitmasks[i])));
1203 for (i = 0; i < 64; i += 16, mask <<= 16)
1205 if ((val & mask) != 0)
1209 emit_insn (gen_rtx_SET (VOIDmode, dest,
1210 GEN_INT (val & mask)));
1214 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1215 GEN_INT ((val >> i) & 0xffff)));
1221 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1223 /* Indirect calls are not currently supported. */
1227 /* Cannot tail-call to long-calls, since these are outside of the
1228 range of a branch instruction (we could handle this if we added
1229 support for indirect tail-calls. */
1230 if (aarch64_decl_is_long_call_p (decl))
1236 /* Implement TARGET_PASS_BY_REFERENCE. */
1239 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1240 enum machine_mode mode,
1242 bool named ATTRIBUTE_UNUSED)
1245 enum machine_mode dummymode;
1248 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1249 size = (mode == BLKmode && type)
1250 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1252 /* Aggregates are passed by reference based on their size. */
1253 if (type && AGGREGATE_TYPE_P (type))
1255 size = int_size_in_bytes (type);
1258 /* Variable sized arguments are always returned by reference. */
1262 /* Can this be a candidate to be passed in fp/simd register(s)? */
1263 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1268 /* Arguments which are variable sized or larger than 2 registers are
1269 passed by reference unless they are a homogenous floating point
1271 return size > 2 * UNITS_PER_WORD;
1274 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1276 aarch64_return_in_msb (const_tree valtype)
1278 enum machine_mode dummy_mode;
1281 /* Never happens in little-endian mode. */
1282 if (!BYTES_BIG_ENDIAN)
1285 /* Only composite types smaller than or equal to 16 bytes can
1286 be potentially returned in registers. */
1287 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1288 || int_size_in_bytes (valtype) <= 0
1289 || int_size_in_bytes (valtype) > 16)
1292 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1293 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1294 is always passed/returned in the least significant bits of fp/simd
1296 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1297 &dummy_mode, &dummy_int, NULL))
1303 /* Implement TARGET_FUNCTION_VALUE.
1304 Define how to find the value returned by a function. */
1307 aarch64_function_value (const_tree type, const_tree func,
1308 bool outgoing ATTRIBUTE_UNUSED)
1310 enum machine_mode mode;
1313 enum machine_mode ag_mode;
1315 mode = TYPE_MODE (type);
1316 if (INTEGRAL_TYPE_P (type))
1317 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1319 if (aarch64_return_in_msb (type))
1321 HOST_WIDE_INT size = int_size_in_bytes (type);
1323 if (size % UNITS_PER_WORD != 0)
1325 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1326 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1330 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1331 &ag_mode, &count, NULL))
1333 if (!aarch64_composite_type_p (type, mode))
1335 gcc_assert (count == 1 && mode == ag_mode);
1336 return gen_rtx_REG (mode, V0_REGNUM);
1343 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1344 for (i = 0; i < count; i++)
1346 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1347 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1348 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1349 XVECEXP (par, 0, i) = tmp;
1355 return gen_rtx_REG (mode, R0_REGNUM);
1358 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1359 Return true if REGNO is the number of a hard register in which the values
1360 of called function may come back. */
1363 aarch64_function_value_regno_p (const unsigned int regno)
1365 /* Maximum of 16 bytes can be returned in the general registers. Examples
1366 of 16-byte return values are: 128-bit integers and 16-byte small
1367 structures (excluding homogeneous floating-point aggregates). */
1368 if (regno == R0_REGNUM || regno == R1_REGNUM)
1371 /* Up to four fp/simd registers can return a function value, e.g. a
1372 homogeneous floating-point aggregate having four members. */
1373 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1374 return !TARGET_GENERAL_REGS_ONLY;
1379 /* Implement TARGET_RETURN_IN_MEMORY.
1381 If the type T of the result of a function is such that
1383 would require that arg be passed as a value in a register (or set of
1384 registers) according to the parameter passing rules, then the result
1385 is returned in the same registers as would be used for such an
1389 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1392 enum machine_mode ag_mode;
1395 if (!AGGREGATE_TYPE_P (type)
1396 && TREE_CODE (type) != COMPLEX_TYPE
1397 && TREE_CODE (type) != VECTOR_TYPE)
1398 /* Simple scalar types always returned in registers. */
1401 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1408 /* Types larger than 2 registers returned in memory. */
1409 size = int_size_in_bytes (type);
1410 return (size < 0 || size > 2 * UNITS_PER_WORD);
1414 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1415 const_tree type, int *nregs)
1417 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1418 return aarch64_vfp_is_call_or_return_candidate (mode,
1420 &pcum->aapcs_vfp_rmode,
1425 /* Given MODE and TYPE of a function argument, return the alignment in
1426 bits. The idea is to suppress any stronger alignment requested by
1427 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1428 This is a helper function for local use only. */
1431 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1433 unsigned int alignment;
1437 if (!integer_zerop (TYPE_SIZE (type)))
1439 if (TYPE_MODE (type) == mode)
1440 alignment = TYPE_ALIGN (type);
1442 alignment = GET_MODE_ALIGNMENT (mode);
1448 alignment = GET_MODE_ALIGNMENT (mode);
1453 /* Layout a function argument according to the AAPCS64 rules. The rule
1454 numbers refer to the rule numbers in the AAPCS64. */
1457 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1459 bool named ATTRIBUTE_UNUSED)
1461 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1462 int ncrn, nvrn, nregs;
1463 bool allocate_ncrn, allocate_nvrn;
1466 /* We need to do this once per argument. */
1467 if (pcum->aapcs_arg_processed)
1470 pcum->aapcs_arg_processed = true;
1472 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1474 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1477 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1478 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1483 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1484 The following code thus handles passing by SIMD/FP registers first. */
1486 nvrn = pcum->aapcs_nvrn;
1488 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1489 and homogenous short-vector aggregates (HVA). */
1492 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1494 pcum->aapcs_nextnvrn = nvrn + nregs;
1495 if (!aarch64_composite_type_p (type, mode))
1497 gcc_assert (nregs == 1);
1498 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1504 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1505 for (i = 0; i < nregs; i++)
1507 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1508 V0_REGNUM + nvrn + i);
1509 tmp = gen_rtx_EXPR_LIST
1511 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1512 XVECEXP (par, 0, i) = tmp;
1514 pcum->aapcs_reg = par;
1520 /* C.3 NSRN is set to 8. */
1521 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1526 ncrn = pcum->aapcs_ncrn;
1527 nregs = size / UNITS_PER_WORD;
1529 /* C6 - C9. though the sign and zero extension semantics are
1530 handled elsewhere. This is the case where the argument fits
1531 entirely general registers. */
1532 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1534 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1536 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1538 /* C.8 if the argument has an alignment of 16 then the NGRN is
1539 rounded up to the next even number. */
1540 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1543 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1545 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1546 A reg is still generated for it, but the caller should be smart
1547 enough not to use it. */
1548 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1550 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1557 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1558 for (i = 0; i < nregs; i++)
1560 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1561 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1562 GEN_INT (i * UNITS_PER_WORD));
1563 XVECEXP (par, 0, i) = tmp;
1565 pcum->aapcs_reg = par;
1568 pcum->aapcs_nextncrn = ncrn + nregs;
1573 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1575 /* The argument is passed on stack; record the needed number of words for
1576 this argument and align the total size if necessary. */
1578 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1579 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1580 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1581 16 / UNITS_PER_WORD);
1585 /* Implement TARGET_FUNCTION_ARG. */
1588 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1589 const_tree type, bool named)
1591 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1592 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1594 if (mode == VOIDmode)
1597 aarch64_layout_arg (pcum_v, mode, type, named);
1598 return pcum->aapcs_reg;
1602 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1603 const_tree fntype ATTRIBUTE_UNUSED,
1604 rtx libname ATTRIBUTE_UNUSED,
1605 const_tree fndecl ATTRIBUTE_UNUSED,
1606 unsigned n_named ATTRIBUTE_UNUSED)
1608 pcum->aapcs_ncrn = 0;
1609 pcum->aapcs_nvrn = 0;
1610 pcum->aapcs_nextncrn = 0;
1611 pcum->aapcs_nextnvrn = 0;
1612 pcum->pcs_variant = ARM_PCS_AAPCS64;
1613 pcum->aapcs_reg = NULL_RTX;
1614 pcum->aapcs_arg_processed = false;
1615 pcum->aapcs_stack_words = 0;
1616 pcum->aapcs_stack_size = 0;
1622 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1623 enum machine_mode mode,
1627 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1628 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1630 aarch64_layout_arg (pcum_v, mode, type, named);
1631 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1632 != (pcum->aapcs_stack_words != 0));
1633 pcum->aapcs_arg_processed = false;
1634 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1635 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1636 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1637 pcum->aapcs_stack_words = 0;
1638 pcum->aapcs_reg = NULL_RTX;
1643 aarch64_function_arg_regno_p (unsigned regno)
1645 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1646 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1649 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1650 PARM_BOUNDARY bits of alignment, but will be given anything up
1651 to STACK_BOUNDARY bits if the type requires it. This makes sure
1652 that both before and after the layout of each argument, the Next
1653 Stacked Argument Address (NSAA) will have a minimum alignment of
1657 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1659 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1661 if (alignment < PARM_BOUNDARY)
1662 alignment = PARM_BOUNDARY;
1663 if (alignment > STACK_BOUNDARY)
1664 alignment = STACK_BOUNDARY;
1668 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1670 Return true if an argument passed on the stack should be padded upwards,
1671 i.e. if the least-significant byte of the stack slot has useful data.
1673 Small aggregate types are placed in the lowest memory address.
1675 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1678 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1680 /* On little-endian targets, the least significant byte of every stack
1681 argument is passed at the lowest byte address of the stack slot. */
1682 if (!BYTES_BIG_ENDIAN)
1685 /* Otherwise, integral, floating-point and pointer types are padded downward:
1686 the least significant byte of a stack argument is passed at the highest
1687 byte address of the stack slot. */
1689 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1690 || POINTER_TYPE_P (type))
1691 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1694 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1698 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1700 It specifies padding for the last (may also be the only)
1701 element of a block move between registers and memory. If
1702 assuming the block is in the memory, padding upward means that
1703 the last element is padded after its highest significant byte,
1704 while in downward padding, the last element is padded at the
1705 its least significant byte side.
1707 Small aggregates and small complex types are always padded
1710 We don't need to worry about homogeneous floating-point or
1711 short-vector aggregates; their move is not affected by the
1712 padding direction determined here. Regardless of endianness,
1713 each element of such an aggregate is put in the least
1714 significant bits of a fp/simd register.
1716 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1717 register has useful data, and return the opposite if the most
1718 significant byte does. */
1721 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1722 bool first ATTRIBUTE_UNUSED)
1725 /* Small composite types are always padded upward. */
1726 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1728 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1729 : GET_MODE_SIZE (mode));
1730 if (size < 2 * UNITS_PER_WORD)
1734 /* Otherwise, use the default padding. */
1735 return !BYTES_BIG_ENDIAN;
1738 static enum machine_mode
1739 aarch64_libgcc_cmp_return_mode (void)
1745 aarch64_frame_pointer_required (void)
1747 /* If the function contains dynamic stack allocations, we need to
1748 use the frame pointer to access the static parts of the frame. */
1749 if (cfun->calls_alloca)
1752 /* In aarch64_override_options_after_change
1753 flag_omit_leaf_frame_pointer turns off the frame pointer by
1754 default. Turn it back on now if we've not got a leaf
1756 if (flag_omit_leaf_frame_pointer
1757 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1763 /* Mark the registers that need to be saved by the callee and calculate
1764 the size of the callee-saved registers area and frame record (both FP
1765 and LR may be omitted). */
1767 aarch64_layout_frame (void)
1769 HOST_WIDE_INT offset = 0;
1772 if (reload_completed && cfun->machine->frame.laid_out)
1775 cfun->machine->frame.fp_lr_offset = 0;
1777 /* First mark all the registers that really need to be saved... */
1778 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1779 cfun->machine->frame.reg_offset[regno] = -1;
1781 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1782 cfun->machine->frame.reg_offset[regno] = -1;
1784 /* ... that includes the eh data registers (if needed)... */
1785 if (crtl->calls_eh_return)
1786 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1787 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1789 /* ... and any callee saved register that dataflow says is live. */
1790 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1791 if (df_regs_ever_live_p (regno)
1792 && !call_used_regs[regno])
1793 cfun->machine->frame.reg_offset[regno] = 0;
1795 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1796 if (df_regs_ever_live_p (regno)
1797 && !call_used_regs[regno])
1798 cfun->machine->frame.reg_offset[regno] = 0;
1800 if (frame_pointer_needed)
1802 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1803 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1804 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1807 /* Now assign stack slots for them. */
1808 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1809 if (cfun->machine->frame.reg_offset[regno] != -1)
1811 cfun->machine->frame.reg_offset[regno] = offset;
1812 offset += UNITS_PER_WORD;
1815 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1816 if (cfun->machine->frame.reg_offset[regno] != -1)
1818 cfun->machine->frame.reg_offset[regno] = offset;
1819 offset += UNITS_PER_WORD;
1822 if (frame_pointer_needed)
1824 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1825 offset += UNITS_PER_WORD;
1826 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1829 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1831 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1832 offset += UNITS_PER_WORD;
1833 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1836 cfun->machine->frame.padding0 =
1837 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1838 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1840 cfun->machine->frame.saved_regs_size = offset;
1841 cfun->machine->frame.laid_out = true;
1844 /* Make the last instruction frame-related and note that it performs
1845 the operation described by FRAME_PATTERN. */
1848 aarch64_set_frame_expr (rtx frame_pattern)
1852 insn = get_last_insn ();
1853 RTX_FRAME_RELATED_P (insn) = 1;
1854 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1855 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1861 aarch64_register_saved_on_entry (int regno)
1863 return cfun->machine->frame.reg_offset[regno] != -1;
1868 aarch64_save_or_restore_fprs (int start_offset, int increment,
1869 bool restore, rtx base_rtx)
1875 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1876 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1879 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1881 if (aarch64_register_saved_on_entry (regno))
1884 mem = gen_mem_ref (DFmode,
1885 plus_constant (Pmode,
1889 for (regno2 = regno + 1;
1890 regno2 <= V31_REGNUM
1891 && !aarch64_register_saved_on_entry (regno2);
1896 if (regno2 <= V31_REGNUM &&
1897 aarch64_register_saved_on_entry (regno2))
1900 /* Next highest register to be saved. */
1901 mem2 = gen_mem_ref (DFmode,
1905 start_offset + increment));
1906 if (restore == false)
1909 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1910 mem2, gen_rtx_REG (DFmode, regno2)));
1916 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1917 gen_rtx_REG (DFmode, regno2), mem2));
1919 add_reg_note (insn, REG_CFA_RESTORE,
1920 gen_rtx_REG (DFmode, regno));
1921 add_reg_note (insn, REG_CFA_RESTORE,
1922 gen_rtx_REG (DFmode, regno2));
1925 /* The first part of a frame-related parallel insn
1926 is always assumed to be relevant to the frame
1927 calculations; subsequent parts, are only
1928 frame-related if explicitly marked. */
1929 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1931 start_offset += increment * 2;
1935 if (restore == false)
1936 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1939 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1940 add_reg_note (insn, REG_CFA_RESTORE,
1941 gen_rtx_REG (DImode, regno));
1943 start_offset += increment;
1945 RTX_FRAME_RELATED_P (insn) = 1;
1952 /* offset from the stack pointer of where the saves and
1953 restore's have to happen. */
1955 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1959 rtx base_rtx = stack_pointer_rtx;
1960 HOST_WIDE_INT start_offset = offset;
1961 HOST_WIDE_INT increment = UNITS_PER_WORD;
1962 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1963 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1967 for (regno = R0_REGNUM; regno <= limit; regno++)
1969 if (aarch64_register_saved_on_entry (regno))
1972 mem = gen_mem_ref (Pmode,
1973 plus_constant (Pmode,
1977 for (regno2 = regno + 1;
1979 && !aarch64_register_saved_on_entry (regno2);
1984 if (regno2 <= limit &&
1985 aarch64_register_saved_on_entry (regno2))
1988 /* Next highest register to be saved. */
1989 mem2 = gen_mem_ref (Pmode,
1993 start_offset + increment));
1994 if (restore == false)
1997 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1998 mem2, gen_rtx_REG (DImode, regno2)));
2004 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2005 gen_rtx_REG (DImode, regno2), mem2));
2007 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2008 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2011 /* The first part of a frame-related parallel insn
2012 is always assumed to be relevant to the frame
2013 calculations; subsequent parts, are only
2014 frame-related if explicitly marked. */
2015 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2018 start_offset += increment * 2;
2022 if (restore == false)
2023 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2026 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2027 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2029 start_offset += increment;
2031 RTX_FRAME_RELATED_P (insn) = 1;
2035 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2039 /* AArch64 stack frames generated by this compiler look like:
2041 +-------------------------------+
2043 | incoming stack arguments |
2045 +-------------------------------+ <-- arg_pointer_rtx
2047 | callee-allocated save area |
2048 | for register varargs |
2050 +-------------------------------+ <-- frame_pointer_rtx
2054 +-------------------------------+
2056 +-------------------------------+ |
2059 | callee-saved registers | | frame.saved_regs_size
2061 +-------------------------------+ |
2063 +-------------------------------+ |
2065 P +-------------------------------+ <-- hard_frame_pointer_rtx
2066 | dynamic allocation |
2067 +-------------------------------+
2069 | outgoing stack arguments |
2071 +-------------------------------+ <-- stack_pointer_rtx
2073 Dynamic stack allocations such as alloca insert data at point P.
2074 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2075 hard_frame_pointer_rtx unchanged. */
2077 /* Generate the prologue instructions for entry into a function.
2078 Establish the stack frame by decreasing the stack pointer with a
2079 properly calculated size and, if necessary, create a frame record
2080 filled with the values of LR and previous frame pointer. The
2081 current FP is also set up if it is in use. */
2084 aarch64_expand_prologue (void)
2086 /* sub sp, sp, #<frame_size>
2087 stp {fp, lr}, [sp, #<frame_size> - 16]
2088 add fp, sp, #<frame_size> - hardfp_offset
2089 stp {cs_reg}, [fp, #-16] etc.
2091 sub sp, sp, <final_adjustment_if_any>
2093 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2094 HOST_WIDE_INT frame_size, offset;
2095 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2098 aarch64_layout_frame ();
2099 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2100 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2101 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2102 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2103 + crtl->outgoing_args_size);
2104 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2105 STACK_BOUNDARY / BITS_PER_UNIT);
2107 if (flag_stack_usage_info)
2108 current_function_static_stack_size = frame_size;
2111 - original_frame_size
2112 - cfun->machine->frame.saved_regs_size);
2114 /* Store pairs and load pairs have a range only -512 to 504. */
2117 /* When the frame has a large size, an initial decrease is done on
2118 the stack pointer to jump over the callee-allocated save area for
2119 register varargs, the local variable area and/or the callee-saved
2120 register area. This will allow the pre-index write-back
2121 store pair instructions to be used for setting up the stack frame
2123 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2125 offset = cfun->machine->frame.saved_regs_size;
2127 frame_size -= (offset + crtl->outgoing_args_size);
2130 if (frame_size >= 0x1000000)
2132 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2133 emit_move_insn (op0, GEN_INT (-frame_size));
2134 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2135 aarch64_set_frame_expr (gen_rtx_SET
2136 (Pmode, stack_pointer_rtx,
2137 plus_constant (Pmode,
2141 else if (frame_size > 0)
2143 if ((frame_size & 0xfff) != frame_size)
2145 insn = emit_insn (gen_add2_insn
2147 GEN_INT (-(frame_size
2148 & ~(HOST_WIDE_INT)0xfff))));
2149 RTX_FRAME_RELATED_P (insn) = 1;
2151 if ((frame_size & 0xfff) != 0)
2153 insn = emit_insn (gen_add2_insn
2155 GEN_INT (-(frame_size
2156 & (HOST_WIDE_INT)0xfff))));
2157 RTX_FRAME_RELATED_P (insn) = 1;
2166 /* Save the frame pointer and lr if the frame pointer is needed
2167 first. Make the frame pointer point to the location of the
2168 old frame pointer on the stack. */
2169 if (frame_pointer_needed)
2175 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2176 GEN_INT (-offset)));
2177 RTX_FRAME_RELATED_P (insn) = 1;
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode, stack_pointer_rtx,
2180 gen_rtx_MINUS (Pmode,
2182 GEN_INT (offset))));
2183 mem_fp = gen_frame_mem (DImode,
2184 plus_constant (Pmode,
2187 mem_lr = gen_frame_mem (DImode,
2188 plus_constant (Pmode,
2192 insn = emit_insn (gen_store_pairdi (mem_fp,
2193 hard_frame_pointer_rtx,
2195 gen_rtx_REG (DImode,
2200 insn = emit_insn (gen_storewb_pairdi_di
2201 (stack_pointer_rtx, stack_pointer_rtx,
2202 hard_frame_pointer_rtx,
2203 gen_rtx_REG (DImode, LR_REGNUM),
2205 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2206 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2209 /* The first part of a frame-related parallel insn is always
2210 assumed to be relevant to the frame calculations;
2211 subsequent parts, are only frame-related if explicitly
2213 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2214 RTX_FRAME_RELATED_P (insn) = 1;
2216 /* Set up frame pointer to point to the location of the
2217 previous frame pointer on the stack. */
2218 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2220 GEN_INT (fp_offset)));
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode, hard_frame_pointer_rtx,
2223 plus_constant (Pmode,
2226 RTX_FRAME_RELATED_P (insn) = 1;
2227 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2228 hard_frame_pointer_rtx));
2232 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2233 GEN_INT (-offset)));
2234 RTX_FRAME_RELATED_P (insn) = 1;
2237 aarch64_save_or_restore_callee_save_registers
2238 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2241 /* when offset >= 512,
2242 sub sp, sp, #<outgoing_args_size> */
2243 if (frame_size > -1)
2245 if (crtl->outgoing_args_size > 0)
2247 insn = emit_insn (gen_add2_insn
2249 GEN_INT (- crtl->outgoing_args_size)));
2250 RTX_FRAME_RELATED_P (insn) = 1;
2255 /* Generate the epilogue instructions for returning from a function. */
2257 aarch64_expand_epilogue (bool for_sibcall)
2259 HOST_WIDE_INT original_frame_size, frame_size, offset;
2260 HOST_WIDE_INT fp_offset;
2264 aarch64_layout_frame ();
2265 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2266 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2267 + crtl->outgoing_args_size);
2268 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2269 STACK_BOUNDARY / BITS_PER_UNIT);
2272 - original_frame_size
2273 - cfun->machine->frame.saved_regs_size);
2275 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2277 /* Store pairs and load pairs have a range only -512 to 504. */
2280 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2282 offset = cfun->machine->frame.saved_regs_size;
2284 frame_size -= (offset + crtl->outgoing_args_size);
2286 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2288 insn = emit_insn (gen_add2_insn
2290 GEN_INT (crtl->outgoing_args_size)));
2291 RTX_FRAME_RELATED_P (insn) = 1;
2297 /* If there were outgoing arguments or we've done dynamic stack
2298 allocation, then restore the stack pointer from the frame
2299 pointer. This is at most one insn and more efficient than using
2300 GCC's internal mechanism. */
2301 if (frame_pointer_needed
2302 && (crtl->outgoing_args_size || cfun->calls_alloca))
2304 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2305 hard_frame_pointer_rtx,
2306 GEN_INT (- fp_offset)));
2307 RTX_FRAME_RELATED_P (insn) = 1;
2308 /* As SP is set to (FP - fp_offset), according to the rules in
2309 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2310 from the value of SP from now on. */
2311 cfa_reg = stack_pointer_rtx;
2314 aarch64_save_or_restore_callee_save_registers
2315 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2317 /* Restore the frame pointer and lr if the frame pointer is needed. */
2320 if (frame_pointer_needed)
2326 mem_fp = gen_frame_mem (DImode,
2327 plus_constant (Pmode,
2330 mem_lr = gen_frame_mem (DImode,
2331 plus_constant (Pmode,
2335 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2337 gen_rtx_REG (DImode,
2343 insn = emit_insn (gen_loadwb_pairdi_di
2346 hard_frame_pointer_rtx,
2347 gen_rtx_REG (DImode, LR_REGNUM),
2349 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2350 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2351 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2352 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2353 plus_constant (Pmode, cfa_reg,
2357 /* The first part of a frame-related parallel insn
2358 is always assumed to be relevant to the frame
2359 calculations; subsequent parts, are only
2360 frame-related if explicitly marked. */
2361 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2362 RTX_FRAME_RELATED_P (insn) = 1;
2363 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2364 add_reg_note (insn, REG_CFA_RESTORE,
2365 gen_rtx_REG (DImode, LR_REGNUM));
2369 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2371 RTX_FRAME_RELATED_P (insn) = 1;
2376 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2378 RTX_FRAME_RELATED_P (insn) = 1;
2382 /* Stack adjustment for exception handler. */
2383 if (crtl->calls_eh_return)
2385 /* We need to unwind the stack by the offset computed by
2386 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2387 based on SP. Ideally we would update the SP and define the
2388 CFA along the lines of:
2390 SP = SP + EH_RETURN_STACKADJ_RTX
2391 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2393 However the dwarf emitter only understands a constant
2396 The solution chosen here is to use the otherwise unused IP0
2397 as a temporary register to hold the current SP value. The
2398 CFA is described using IP0 then SP is modified. */
2400 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2402 insn = emit_move_insn (ip0, stack_pointer_rtx);
2403 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2404 RTX_FRAME_RELATED_P (insn) = 1;
2406 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2408 /* Ensure the assignment to IP0 does not get optimized away. */
2412 if (frame_size > -1)
2414 if (frame_size >= 0x1000000)
2416 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2417 emit_move_insn (op0, GEN_INT (frame_size));
2418 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2419 aarch64_set_frame_expr (gen_rtx_SET
2420 (Pmode, stack_pointer_rtx,
2421 plus_constant (Pmode,
2425 else if (frame_size > 0)
2427 if ((frame_size & 0xfff) != 0)
2429 insn = emit_insn (gen_add2_insn
2431 GEN_INT ((frame_size
2432 & (HOST_WIDE_INT) 0xfff))));
2433 RTX_FRAME_RELATED_P (insn) = 1;
2435 if ((frame_size & 0xfff) != frame_size)
2437 insn = emit_insn (gen_add2_insn
2439 GEN_INT ((frame_size
2440 & ~ (HOST_WIDE_INT) 0xfff))));
2441 RTX_FRAME_RELATED_P (insn) = 1;
2445 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2446 plus_constant (Pmode,
2451 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2453 emit_jump_insn (ret_rtx);
2456 /* Return the place to copy the exception unwinding return address to.
2457 This will probably be a stack slot, but could (in theory be the
2458 return register). */
2460 aarch64_final_eh_return_addr (void)
2462 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2463 aarch64_layout_frame ();
2464 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2465 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2466 + crtl->outgoing_args_size);
2467 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2468 STACK_BOUNDARY / BITS_PER_UNIT);
2470 - original_frame_size
2471 - cfun->machine->frame.saved_regs_size;
2473 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2474 return gen_rtx_REG (DImode, LR_REGNUM);
2476 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2477 result in a store to save LR introduced by builtin_eh_return () being
2478 incorrectly deleted because the alias is not detected.
2479 So in the calculation of the address to copy the exception unwinding
2480 return address to, we note 2 cases.
2481 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2482 we return a SP-relative location since all the addresses are SP-relative
2483 in this case. This prevents the store from being optimized away.
2484 If the fp_offset is not 0, then the addresses will be FP-relative and
2485 therefore we return a FP-relative location. */
2487 if (frame_pointer_needed)
2490 return gen_frame_mem (DImode,
2491 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2493 return gen_frame_mem (DImode,
2494 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2497 /* If FP is not needed, we calculate the location of LR, which would be
2498 at the top of the saved registers block. */
2500 return gen_frame_mem (DImode,
2501 plus_constant (Pmode,
2504 + cfun->machine->frame.saved_regs_size
2505 - 2 * UNITS_PER_WORD));
2508 /* Possibly output code to build up a constant in a register. For
2509 the benefit of the costs infrastructure, returns the number of
2510 instructions which would be emitted. GENERATE inhibits or
2511 enables code generation. */
2514 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2518 if (aarch64_bitmask_imm (val, DImode))
2521 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2529 HOST_WIDE_INT valp = val >> 16;
2533 for (i = 16; i < 64; i += 16)
2535 valm = (valp & 0xffff);
2546 /* zcount contains the number of additional MOVK instructions
2547 required if the constant is built up with an initial MOVZ instruction,
2548 while ncount is the number of MOVK instructions required if starting
2549 with a MOVN instruction. Choose the sequence that yields the fewest
2550 number of instructions, preferring MOVZ instructions when they are both
2552 if (ncount < zcount)
2555 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2556 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2563 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2564 GEN_INT (val & 0xffff));
2571 for (i = 16; i < 64; i += 16)
2573 if ((val & 0xffff) != tval)
2576 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2578 GEN_INT (val & 0xffff)));
2588 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2590 HOST_WIDE_INT mdelta = delta;
2591 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2592 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2597 if (mdelta >= 4096 * 4096)
2599 (void) aarch64_build_constant (scratchreg, delta, true);
2600 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2602 else if (mdelta > 0)
2606 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2607 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2609 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2610 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2612 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2613 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2615 if (mdelta % 4096 != 0)
2617 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2618 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2619 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2624 /* Output code to add DELTA to the first argument, and then jump
2625 to FUNCTION. Used for C++ multiple inheritance. */
2627 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2628 HOST_WIDE_INT delta,
2629 HOST_WIDE_INT vcall_offset,
2632 /* The this pointer is always in x0. Note that this differs from
2633 Arm where the this pointer maybe bumped to r1 if r0 is required
2634 to return a pointer to an aggregate. On AArch64 a result value
2635 pointer will be in x8. */
2636 int this_regno = R0_REGNUM;
2637 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2639 reload_completed = 1;
2640 emit_note (NOTE_INSN_PROLOGUE_END);
2642 if (vcall_offset == 0)
2643 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2646 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2648 this_rtx = gen_rtx_REG (Pmode, this_regno);
2649 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2650 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2655 if (delta >= -256 && delta < 256)
2656 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2657 plus_constant (Pmode, this_rtx, delta));
2659 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2662 if (Pmode == ptr_mode)
2663 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2665 aarch64_emit_move (temp0,
2666 gen_rtx_ZERO_EXTEND (Pmode,
2667 gen_rtx_MEM (ptr_mode, addr)));
2669 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2670 addr = plus_constant (Pmode, temp0, vcall_offset);
2673 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2674 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2677 if (Pmode == ptr_mode)
2678 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2680 aarch64_emit_move (temp1,
2681 gen_rtx_SIGN_EXTEND (Pmode,
2682 gen_rtx_MEM (ptr_mode, addr)));
2684 emit_insn (gen_add2_insn (this_rtx, temp1));
2687 /* Generate a tail call to the target function. */
2688 if (!TREE_USED (function))
2690 assemble_external (function);
2691 TREE_USED (function) = 1;
2693 funexp = XEXP (DECL_RTL (function), 0);
2694 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2695 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2696 SIBLING_CALL_P (insn) = 1;
2698 insn = get_insns ();
2699 shorten_branches (insn);
2700 final_start_function (insn, file, 1);
2701 final (insn, file, 1);
2702 final_end_function ();
2704 /* Stop pretending to be a post-reload pass. */
2705 reload_completed = 0;
2709 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2711 if (GET_CODE (*x) == SYMBOL_REF)
2712 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2714 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2715 TLS offsets, not real symbol references. */
2716 if (GET_CODE (*x) == UNSPEC
2717 && XINT (*x, 1) == UNSPEC_TLS)
2724 aarch64_tls_referenced_p (rtx x)
2726 if (!TARGET_HAVE_TLS)
2729 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2734 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2736 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2737 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2748 aarch64_build_bitmask_table (void)
2750 unsigned HOST_WIDE_INT mask, imm;
2751 unsigned int log_e, e, s, r;
2752 unsigned int nimms = 0;
2754 for (log_e = 1; log_e <= 6; log_e++)
2758 mask = ~(HOST_WIDE_INT) 0;
2760 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2761 for (s = 1; s < e; s++)
2763 for (r = 0; r < e; r++)
2765 /* set s consecutive bits to 1 (s < 64) */
2766 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2767 /* rotate right by r */
2769 imm = ((imm >> r) | (imm << (e - r))) & mask;
2770 /* replicate the constant depending on SIMD size */
2772 case 1: imm |= (imm << 2);
2773 case 2: imm |= (imm << 4);
2774 case 3: imm |= (imm << 8);
2775 case 4: imm |= (imm << 16);
2776 case 5: imm |= (imm << 32);
2782 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2783 aarch64_bitmasks[nimms++] = imm;
2788 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2789 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2790 aarch64_bitmasks_cmp);
2794 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2795 a left shift of 0 or 12 bits. */
2797 aarch64_uimm12_shift (HOST_WIDE_INT val)
2799 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2800 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2805 /* Return true if val is an immediate that can be loaded into a
2806 register by a MOVZ instruction. */
2808 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2810 if (GET_MODE_SIZE (mode) > 4)
2812 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2813 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2818 /* Ignore sign extension. */
2819 val &= (HOST_WIDE_INT) 0xffffffff;
2821 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2822 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2826 /* Return true if val is a valid bitmask immediate. */
2828 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2830 if (GET_MODE_SIZE (mode) < 8)
2832 /* Replicate bit pattern. */
2833 val &= (HOST_WIDE_INT) 0xffffffff;
2836 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2837 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2841 /* Return true if val is an immediate that can be loaded into a
2842 register in a single instruction. */
2844 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2846 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2848 return aarch64_bitmask_imm (val, mode);
2852 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2856 if (GET_CODE (x) == HIGH)
2859 split_const (x, &base, &offset);
2860 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2862 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
2863 != SYMBOL_FORCE_TO_MEM)
2866 /* Avoid generating a 64-bit relocation in ILP32; leave
2867 to aarch64_expand_mov_immediate to handle it properly. */
2868 return mode != ptr_mode;
2871 return aarch64_tls_referenced_p (x);
2874 /* Return true if register REGNO is a valid index register.
2875 STRICT_P is true if REG_OK_STRICT is in effect. */
2878 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2880 if (!HARD_REGISTER_NUM_P (regno))
2888 regno = reg_renumber[regno];
2890 return GP_REGNUM_P (regno);
2893 /* Return true if register REGNO is a valid base register for mode MODE.
2894 STRICT_P is true if REG_OK_STRICT is in effect. */
2897 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2899 if (!HARD_REGISTER_NUM_P (regno))
2907 regno = reg_renumber[regno];
2910 /* The fake registers will be eliminated to either the stack or
2911 hard frame pointer, both of which are usually valid base registers.
2912 Reload deals with the cases where the eliminated form isn't valid. */
2913 return (GP_REGNUM_P (regno)
2914 || regno == SP_REGNUM
2915 || regno == FRAME_POINTER_REGNUM
2916 || regno == ARG_POINTER_REGNUM);
2919 /* Return true if X is a valid base register for mode MODE.
2920 STRICT_P is true if REG_OK_STRICT is in effect. */
2923 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2925 if (!strict_p && GET_CODE (x) == SUBREG)
2928 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2931 /* Return true if address offset is a valid index. If it is, fill in INFO
2932 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2935 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2936 enum machine_mode mode, bool strict_p)
2938 enum aarch64_address_type type;
2943 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2944 && GET_MODE (x) == Pmode)
2946 type = ADDRESS_REG_REG;
2950 /* (sign_extend:DI (reg:SI)) */
2951 else if ((GET_CODE (x) == SIGN_EXTEND
2952 || GET_CODE (x) == ZERO_EXTEND)
2953 && GET_MODE (x) == DImode
2954 && GET_MODE (XEXP (x, 0)) == SImode)
2956 type = (GET_CODE (x) == SIGN_EXTEND)
2957 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2958 index = XEXP (x, 0);
2961 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2962 else if (GET_CODE (x) == MULT
2963 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2964 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2965 && GET_MODE (XEXP (x, 0)) == DImode
2966 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2967 && CONST_INT_P (XEXP (x, 1)))
2969 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2970 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2971 index = XEXP (XEXP (x, 0), 0);
2972 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2974 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2975 else if (GET_CODE (x) == ASHIFT
2976 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2977 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2978 && GET_MODE (XEXP (x, 0)) == DImode
2979 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2980 && CONST_INT_P (XEXP (x, 1)))
2982 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2983 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2984 index = XEXP (XEXP (x, 0), 0);
2985 shift = INTVAL (XEXP (x, 1));
2987 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2988 else if ((GET_CODE (x) == SIGN_EXTRACT
2989 || GET_CODE (x) == ZERO_EXTRACT)
2990 && GET_MODE (x) == DImode
2991 && GET_CODE (XEXP (x, 0)) == MULT
2992 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2993 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2995 type = (GET_CODE (x) == SIGN_EXTRACT)
2996 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2997 index = XEXP (XEXP (x, 0), 0);
2998 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2999 if (INTVAL (XEXP (x, 1)) != 32 + shift
3000 || INTVAL (XEXP (x, 2)) != 0)
3003 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3004 (const_int 0xffffffff<<shift)) */
3005 else if (GET_CODE (x) == AND
3006 && GET_MODE (x) == DImode
3007 && GET_CODE (XEXP (x, 0)) == MULT
3008 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3009 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3010 && CONST_INT_P (XEXP (x, 1)))
3012 type = ADDRESS_REG_UXTW;
3013 index = XEXP (XEXP (x, 0), 0);
3014 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3015 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3018 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3019 else if ((GET_CODE (x) == SIGN_EXTRACT
3020 || GET_CODE (x) == ZERO_EXTRACT)
3021 && GET_MODE (x) == DImode
3022 && GET_CODE (XEXP (x, 0)) == ASHIFT
3023 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3024 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3026 type = (GET_CODE (x) == SIGN_EXTRACT)
3027 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3028 index = XEXP (XEXP (x, 0), 0);
3029 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3030 if (INTVAL (XEXP (x, 1)) != 32 + shift
3031 || INTVAL (XEXP (x, 2)) != 0)
3034 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3035 (const_int 0xffffffff<<shift)) */
3036 else if (GET_CODE (x) == AND
3037 && GET_MODE (x) == DImode
3038 && GET_CODE (XEXP (x, 0)) == ASHIFT
3039 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3040 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3041 && CONST_INT_P (XEXP (x, 1)))
3043 type = ADDRESS_REG_UXTW;
3044 index = XEXP (XEXP (x, 0), 0);
3045 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3046 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3049 /* (mult:P (reg:P) (const_int scale)) */
3050 else if (GET_CODE (x) == MULT
3051 && GET_MODE (x) == Pmode
3052 && GET_MODE (XEXP (x, 0)) == Pmode
3053 && CONST_INT_P (XEXP (x, 1)))
3055 type = ADDRESS_REG_REG;
3056 index = XEXP (x, 0);
3057 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3059 /* (ashift:P (reg:P) (const_int shift)) */
3060 else if (GET_CODE (x) == ASHIFT
3061 && GET_MODE (x) == Pmode
3062 && GET_MODE (XEXP (x, 0)) == Pmode
3063 && CONST_INT_P (XEXP (x, 1)))
3065 type = ADDRESS_REG_REG;
3066 index = XEXP (x, 0);
3067 shift = INTVAL (XEXP (x, 1));
3072 if (GET_CODE (index) == SUBREG)
3073 index = SUBREG_REG (index);
3076 (shift > 0 && shift <= 3
3077 && (1 << shift) == GET_MODE_SIZE (mode)))
3079 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3082 info->offset = index;
3083 info->shift = shift;
3091 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3093 return (offset >= -64 * GET_MODE_SIZE (mode)
3094 && offset < 64 * GET_MODE_SIZE (mode)
3095 && offset % GET_MODE_SIZE (mode) == 0);
3099 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3100 HOST_WIDE_INT offset)
3102 return offset >= -256 && offset < 256;
3106 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3109 && offset < 4096 * GET_MODE_SIZE (mode)
3110 && offset % GET_MODE_SIZE (mode) == 0);
3113 /* Return true if X is a valid address for machine mode MODE. If it is,
3114 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3115 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3118 aarch64_classify_address (struct aarch64_address_info *info,
3119 rtx x, enum machine_mode mode,
3120 RTX_CODE outer_code, bool strict_p)
3122 enum rtx_code code = GET_CODE (x);
3124 bool allow_reg_index_p =
3125 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3127 /* Don't support anything other than POST_INC or REG addressing for
3129 if (aarch64_vector_mode_p (mode)
3130 && (code != POST_INC && code != REG))
3137 info->type = ADDRESS_REG_IMM;
3139 info->offset = const0_rtx;
3140 return aarch64_base_register_rtx_p (x, strict_p);
3145 if (GET_MODE_SIZE (mode) != 0
3146 && CONST_INT_P (op1)
3147 && aarch64_base_register_rtx_p (op0, strict_p))
3149 HOST_WIDE_INT offset = INTVAL (op1);
3151 info->type = ADDRESS_REG_IMM;
3155 /* TImode and TFmode values are allowed in both pairs of X
3156 registers and individual Q registers. The available
3158 X,X: 7-bit signed scaled offset
3159 Q: 9-bit signed offset
3160 We conservatively require an offset representable in either mode.
3162 if (mode == TImode || mode == TFmode)
3163 return (offset_7bit_signed_scaled_p (mode, offset)
3164 && offset_9bit_signed_unscaled_p (mode, offset));
3166 if (outer_code == PARALLEL)
3167 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3168 && offset_7bit_signed_scaled_p (mode, offset));
3170 return (offset_9bit_signed_unscaled_p (mode, offset)
3171 || offset_12bit_unsigned_scaled_p (mode, offset));
3174 if (allow_reg_index_p)
3176 /* Look for base + (scaled/extended) index register. */
3177 if (aarch64_base_register_rtx_p (op0, strict_p)
3178 && aarch64_classify_index (info, op1, mode, strict_p))
3183 if (aarch64_base_register_rtx_p (op1, strict_p)
3184 && aarch64_classify_index (info, op0, mode, strict_p))
3197 info->type = ADDRESS_REG_WB;
3198 info->base = XEXP (x, 0);
3199 info->offset = NULL_RTX;
3200 return aarch64_base_register_rtx_p (info->base, strict_p);
3204 info->type = ADDRESS_REG_WB;
3205 info->base = XEXP (x, 0);
3206 if (GET_CODE (XEXP (x, 1)) == PLUS
3207 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3208 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3209 && aarch64_base_register_rtx_p (info->base, strict_p))
3211 HOST_WIDE_INT offset;
3212 info->offset = XEXP (XEXP (x, 1), 1);
3213 offset = INTVAL (info->offset);
3215 /* TImode and TFmode values are allowed in both pairs of X
3216 registers and individual Q registers. The available
3218 X,X: 7-bit signed scaled offset
3219 Q: 9-bit signed offset
3220 We conservatively require an offset representable in either mode.
3222 if (mode == TImode || mode == TFmode)
3223 return (offset_7bit_signed_scaled_p (mode, offset)
3224 && offset_9bit_signed_unscaled_p (mode, offset));
3226 if (outer_code == PARALLEL)
3227 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3228 && offset_7bit_signed_scaled_p (mode, offset));
3230 return offset_9bit_signed_unscaled_p (mode, offset);
3237 /* load literal: pc-relative constant pool entry. Only supported
3238 for SI mode or larger. */
3239 info->type = ADDRESS_SYMBOLIC;
3240 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3244 split_const (x, &sym, &addend);
3245 return (GET_CODE (sym) == LABEL_REF
3246 || (GET_CODE (sym) == SYMBOL_REF
3247 && CONSTANT_POOL_ADDRESS_P (sym)));
3252 info->type = ADDRESS_LO_SUM;
3253 info->base = XEXP (x, 0);
3254 info->offset = XEXP (x, 1);
3255 if (allow_reg_index_p
3256 && aarch64_base_register_rtx_p (info->base, strict_p))
3259 split_const (info->offset, &sym, &offs);
3260 if (GET_CODE (sym) == SYMBOL_REF
3261 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3262 == SYMBOL_SMALL_ABSOLUTE))
3264 /* The symbol and offset must be aligned to the access size. */
3266 unsigned int ref_size;
3268 if (CONSTANT_POOL_ADDRESS_P (sym))
3269 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3270 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3272 tree exp = SYMBOL_REF_DECL (sym);
3273 align = TYPE_ALIGN (TREE_TYPE (exp));
3274 align = CONSTANT_ALIGNMENT (exp, align);
3276 else if (SYMBOL_REF_DECL (sym))
3277 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3278 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3279 && SYMBOL_REF_BLOCK (sym) != NULL)
3280 align = SYMBOL_REF_BLOCK (sym)->alignment;
3282 align = BITS_PER_UNIT;
3284 ref_size = GET_MODE_SIZE (mode);
3286 ref_size = GET_MODE_SIZE (DImode);
3288 return ((INTVAL (offs) & (ref_size - 1)) == 0
3289 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3300 aarch64_symbolic_address_p (rtx x)
3304 split_const (x, &x, &offset);
3305 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3308 /* Classify the base of symbolic expression X, given that X appears in
3311 enum aarch64_symbol_type
3312 aarch64_classify_symbolic_expression (rtx x,
3313 enum aarch64_symbol_context context)
3317 split_const (x, &x, &offset);
3318 return aarch64_classify_symbol (x, offset, context);
3322 /* Return TRUE if X is a legitimate address for accessing memory in
3325 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3327 struct aarch64_address_info addr;
3329 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3332 /* Return TRUE if X is a legitimate address for accessing memory in
3333 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3336 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3337 RTX_CODE outer_code, bool strict_p)
3339 struct aarch64_address_info addr;
3341 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3344 /* Return TRUE if rtx X is immediate constant 0.0 */
3346 aarch64_float_const_zero_rtx_p (rtx x)
3350 if (GET_MODE (x) == VOIDmode)
3353 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3354 if (REAL_VALUE_MINUS_ZERO (r))
3355 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3356 return REAL_VALUES_EQUAL (r, dconst0);
3359 /* Return the fixed registers used for condition codes. */
3362 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3365 *p2 = INVALID_REGNUM;
3370 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3372 /* All floating point compares return CCFP if it is an equality
3373 comparison, and CCFPE otherwise. */
3374 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3401 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3403 && (code == EQ || code == NE || code == LT || code == GE)
3404 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3405 || GET_CODE (x) == NEG))
3408 /* A compare with a shifted operand. Because of canonicalization,
3409 the comparison will have to be swapped when we emit the assembly
3411 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3412 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3413 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3414 || GET_CODE (x) == LSHIFTRT
3415 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3418 /* Similarly for a negated operand, but we can only do this for
3420 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3421 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3422 && (code == EQ || code == NE)
3423 && GET_CODE (x) == NEG)
3426 /* A compare of a mode narrower than SI mode against zero can be done
3427 by extending the value in the comparison. */
3428 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3430 /* Only use sign-extension if we really need it. */
3431 return ((code == GT || code == GE || code == LE || code == LT)
3432 ? CC_SESWPmode : CC_ZESWPmode);
3434 /* For everything else, return CCmode. */
3439 aarch64_get_condition_code (rtx x)
3441 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3442 enum rtx_code comp_code = GET_CODE (x);
3444 if (GET_MODE_CLASS (mode) != MODE_CC)
3445 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3453 case GE: return AARCH64_GE;
3454 case GT: return AARCH64_GT;
3455 case LE: return AARCH64_LS;
3456 case LT: return AARCH64_MI;
3457 case NE: return AARCH64_NE;
3458 case EQ: return AARCH64_EQ;
3459 case ORDERED: return AARCH64_VC;
3460 case UNORDERED: return AARCH64_VS;
3461 case UNLT: return AARCH64_LT;
3462 case UNLE: return AARCH64_LE;
3463 case UNGT: return AARCH64_HI;
3464 case UNGE: return AARCH64_PL;
3465 default: gcc_unreachable ();
3472 case NE: return AARCH64_NE;
3473 case EQ: return AARCH64_EQ;
3474 case GE: return AARCH64_GE;
3475 case GT: return AARCH64_GT;
3476 case LE: return AARCH64_LE;
3477 case LT: return AARCH64_LT;
3478 case GEU: return AARCH64_CS;
3479 case GTU: return AARCH64_HI;
3480 case LEU: return AARCH64_LS;
3481 case LTU: return AARCH64_CC;
3482 default: gcc_unreachable ();
3491 case NE: return AARCH64_NE;
3492 case EQ: return AARCH64_EQ;
3493 case GE: return AARCH64_LE;
3494 case GT: return AARCH64_LT;
3495 case LE: return AARCH64_GE;
3496 case LT: return AARCH64_GT;
3497 case GEU: return AARCH64_LS;
3498 case GTU: return AARCH64_CC;
3499 case LEU: return AARCH64_CS;
3500 case LTU: return AARCH64_HI;
3501 default: gcc_unreachable ();
3508 case NE: return AARCH64_NE;
3509 case EQ: return AARCH64_EQ;
3510 case GE: return AARCH64_PL;
3511 case LT: return AARCH64_MI;
3512 default: gcc_unreachable ();
3519 case NE: return AARCH64_NE;
3520 case EQ: return AARCH64_EQ;
3521 default: gcc_unreachable ();
3532 bit_count (unsigned HOST_WIDE_INT value)
3546 aarch64_print_operand (FILE *f, rtx x, char code)
3550 /* An integer or symbol address without a preceding # sign. */
3552 switch (GET_CODE (x))
3555 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3559 output_addr_const (f, x);
3563 if (GET_CODE (XEXP (x, 0)) == PLUS
3564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3566 output_addr_const (f, x);
3572 output_operand_lossage ("Unsupported operand for code '%c'", code);
3577 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3581 if (GET_CODE (x) != CONST_INT
3582 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3584 output_operand_lossage ("invalid operand for '%%%c'", code);
3600 output_operand_lossage ("invalid operand for '%%%c'", code);
3610 /* Print N such that 2^N == X. */
3611 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3613 output_operand_lossage ("invalid operand for '%%%c'", code);
3617 asm_fprintf (f, "%d", n);
3622 /* Print the number of non-zero bits in X (a const_int). */
3623 if (GET_CODE (x) != CONST_INT)
3625 output_operand_lossage ("invalid operand for '%%%c'", code);
3629 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3633 /* Print the higher numbered register of a pair (TImode) of regs. */
3634 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3636 output_operand_lossage ("invalid operand for '%%%c'", code);
3640 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3644 /* Print a condition (eq, ne, etc). */
3646 /* CONST_TRUE_RTX means always -- that's the default. */
3647 if (x == const_true_rtx)
3650 if (!COMPARISON_P (x))
3652 output_operand_lossage ("invalid operand for '%%%c'", code);
3656 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3660 /* Print the inverse of a condition (eq <-> ne, etc). */
3662 /* CONST_TRUE_RTX means never -- that's the default. */
3663 if (x == const_true_rtx)
3669 if (!COMPARISON_P (x))
3671 output_operand_lossage ("invalid operand for '%%%c'", code);
3675 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3676 (aarch64_get_condition_code (x))], f);
3684 /* Print a scalar FP/SIMD register name. */
3685 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3687 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3690 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3697 /* Print the first FP/SIMD register name in a list. */
3698 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3700 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3703 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3707 /* Print bottom 16 bits of integer constant in hex. */
3708 if (GET_CODE (x) != CONST_INT)
3710 output_operand_lossage ("invalid operand for '%%%c'", code);
3713 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3718 /* Print a general register name or the zero register (32-bit or
3721 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3723 asm_fprintf (f, "%czr", code);
3727 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3729 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3733 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3735 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3742 /* Print a normal operand, if it's a general register, then we
3746 output_operand_lossage ("missing operand");
3750 switch (GET_CODE (x))
3753 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3757 aarch64_memory_reference_mode = GET_MODE (x);
3758 output_address (XEXP (x, 0));
3763 output_addr_const (asm_out_file, x);
3767 asm_fprintf (f, "%wd", INTVAL (x));
3771 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3773 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3775 HOST_WIDE_INT_MAX));
3776 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3778 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3787 /* CONST_DOUBLE can represent a double-width integer.
3788 In this case, the mode of x is VOIDmode. */
3789 if (GET_MODE (x) == VOIDmode)
3791 else if (aarch64_float_const_zero_rtx_p (x))
3796 else if (aarch64_float_const_representable_p (x))
3799 char float_buf[buf_size] = {'\0'};
3801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3802 real_to_decimal_for_mode (float_buf, &r,
3805 asm_fprintf (asm_out_file, "%s", float_buf);
3809 output_operand_lossage ("invalid constant");
3812 output_operand_lossage ("invalid operand");
3818 if (GET_CODE (x) == HIGH)
3821 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3823 case SYMBOL_SMALL_GOT:
3824 asm_fprintf (asm_out_file, ":got:");
3827 case SYMBOL_SMALL_TLSGD:
3828 asm_fprintf (asm_out_file, ":tlsgd:");
3831 case SYMBOL_SMALL_TLSDESC:
3832 asm_fprintf (asm_out_file, ":tlsdesc:");
3835 case SYMBOL_SMALL_GOTTPREL:
3836 asm_fprintf (asm_out_file, ":gottprel:");
3839 case SYMBOL_SMALL_TPREL:
3840 asm_fprintf (asm_out_file, ":tprel:");
3843 case SYMBOL_TINY_GOT:
3850 output_addr_const (asm_out_file, x);
3854 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3856 case SYMBOL_SMALL_GOT:
3857 asm_fprintf (asm_out_file, ":lo12:");
3860 case SYMBOL_SMALL_TLSGD:
3861 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3864 case SYMBOL_SMALL_TLSDESC:
3865 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3868 case SYMBOL_SMALL_GOTTPREL:
3869 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3872 case SYMBOL_SMALL_TPREL:
3873 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3876 case SYMBOL_TINY_GOT:
3877 asm_fprintf (asm_out_file, ":got:");
3883 output_addr_const (asm_out_file, x);
3888 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3890 case SYMBOL_SMALL_TPREL:
3891 asm_fprintf (asm_out_file, ":tprel_hi12:");
3896 output_addr_const (asm_out_file, x);
3900 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3906 aarch64_print_operand_address (FILE *f, rtx x)
3908 struct aarch64_address_info addr;
3910 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3914 case ADDRESS_REG_IMM:
3915 if (addr.offset == const0_rtx)
3916 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3918 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3919 INTVAL (addr.offset));
3922 case ADDRESS_REG_REG:
3923 if (addr.shift == 0)
3924 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3925 reg_names [REGNO (addr.offset)]);
3927 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3928 reg_names [REGNO (addr.offset)], addr.shift);
3931 case ADDRESS_REG_UXTW:
3932 if (addr.shift == 0)
3933 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3934 REGNO (addr.offset) - R0_REGNUM);
3936 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3937 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3940 case ADDRESS_REG_SXTW:
3941 if (addr.shift == 0)
3942 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3943 REGNO (addr.offset) - R0_REGNUM);
3945 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3946 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3949 case ADDRESS_REG_WB:
3950 switch (GET_CODE (x))
3953 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3954 GET_MODE_SIZE (aarch64_memory_reference_mode));
3957 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3958 GET_MODE_SIZE (aarch64_memory_reference_mode));
3961 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3962 GET_MODE_SIZE (aarch64_memory_reference_mode));
3965 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3966 GET_MODE_SIZE (aarch64_memory_reference_mode));
3969 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3970 INTVAL (addr.offset));
3973 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3974 INTVAL (addr.offset));
3981 case ADDRESS_LO_SUM:
3982 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3983 output_addr_const (f, addr.offset);
3984 asm_fprintf (f, "]");
3987 case ADDRESS_SYMBOLIC:
3991 output_addr_const (f, x);
3995 aarch64_label_mentioned_p (rtx x)
4000 if (GET_CODE (x) == LABEL_REF)
4003 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4004 referencing instruction, but they are constant offsets, not
4006 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4009 fmt = GET_RTX_FORMAT (GET_CODE (x));
4010 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4016 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4017 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4020 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4027 /* Implement REGNO_REG_CLASS. */
4030 aarch64_regno_regclass (unsigned regno)
4032 if (GP_REGNUM_P (regno))
4035 if (regno == SP_REGNUM)
4038 if (regno == FRAME_POINTER_REGNUM
4039 || regno == ARG_POINTER_REGNUM)
4040 return POINTER_REGS;
4042 if (FP_REGNUM_P (regno))
4043 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4048 /* Try a machine-dependent way of reloading an illegitimate address
4049 operand. If we find one, push the reload and return the new rtx. */
4052 aarch64_legitimize_reload_address (rtx *x_p,
4053 enum machine_mode mode,
4054 int opnum, int type,
4055 int ind_levels ATTRIBUTE_UNUSED)
4059 /* Do not allow mem (plus (reg, const)) if vector mode. */
4060 if (aarch64_vector_mode_p (mode)
4061 && GET_CODE (x) == PLUS
4062 && REG_P (XEXP (x, 0))
4063 && CONST_INT_P (XEXP (x, 1)))
4067 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4068 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4069 opnum, (enum reload_type) type);
4073 /* We must recognize output that we have already generated ourselves. */
4074 if (GET_CODE (x) == PLUS
4075 && GET_CODE (XEXP (x, 0)) == PLUS
4076 && REG_P (XEXP (XEXP (x, 0), 0))
4077 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4078 && CONST_INT_P (XEXP (x, 1)))
4080 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4081 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4082 opnum, (enum reload_type) type);
4086 /* We wish to handle large displacements off a base register by splitting
4087 the addend across an add and the mem insn. This can cut the number of
4088 extra insns needed from 3 to 1. It is only useful for load/store of a
4089 single register with 12 bit offset field. */
4090 if (GET_CODE (x) == PLUS
4091 && REG_P (XEXP (x, 0))
4092 && CONST_INT_P (XEXP (x, 1))
4093 && HARD_REGISTER_P (XEXP (x, 0))
4096 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4098 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4099 HOST_WIDE_INT low = val & 0xfff;
4100 HOST_WIDE_INT high = val - low;
4103 enum machine_mode xmode = GET_MODE (x);
4105 /* In ILP32, xmode can be either DImode or SImode. */
4106 gcc_assert (xmode == DImode || xmode == SImode);
4108 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4109 BLKmode alignment. */
4110 if (GET_MODE_SIZE (mode) == 0)
4113 offs = low % GET_MODE_SIZE (mode);
4115 /* Align misaligned offset by adjusting high part to compensate. */
4118 if (aarch64_uimm12_shift (high + offs))
4127 offs = GET_MODE_SIZE (mode) - offs;
4129 high = high + (low & 0x1000) - offs;
4134 /* Check for overflow. */
4135 if (high + low != val)
4138 cst = GEN_INT (high);
4139 if (!aarch64_uimm12_shift (high))
4140 cst = force_const_mem (xmode, cst);
4142 /* Reload high part into base reg, leaving the low part
4143 in the mem instruction.
4144 Note that replacing this gen_rtx_PLUS with plus_constant is
4145 wrong in this case because we rely on the
4146 (plus (plus reg c1) c2) structure being preserved so that
4147 XEXP (*p, 0) in push_reload below uses the correct term. */
4148 x = gen_rtx_PLUS (xmode,
4149 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4152 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4153 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4154 opnum, (enum reload_type) type);
4163 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4165 enum machine_mode mode,
4166 secondary_reload_info *sri)
4168 /* Without the TARGET_SIMD instructions we cannot move a Q register
4169 to a Q register directly. We need a scratch. */
4170 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4171 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4172 && reg_class_subset_p (rclass, FP_REGS))
4175 sri->icode = CODE_FOR_aarch64_reload_movtf;
4176 else if (mode == TImode)
4177 sri->icode = CODE_FOR_aarch64_reload_movti;
4181 /* A TFmode or TImode memory access should be handled via an FP_REGS
4182 because AArch64 has richer addressing modes for LDR/STR instructions
4183 than LDP/STP instructions. */
4184 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4185 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4188 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4195 aarch64_can_eliminate (const int from, const int to)
4197 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4198 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4200 if (frame_pointer_needed)
4202 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4204 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4206 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4207 && !cfun->calls_alloca)
4209 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4219 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4221 HOST_WIDE_INT frame_size;
4222 HOST_WIDE_INT offset;
4224 aarch64_layout_frame ();
4225 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4226 + crtl->outgoing_args_size
4227 + cfun->machine->saved_varargs_size);
4229 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4230 offset = frame_size;
4232 if (to == HARD_FRAME_POINTER_REGNUM)
4234 if (from == ARG_POINTER_REGNUM)
4235 return offset - crtl->outgoing_args_size;
4237 if (from == FRAME_POINTER_REGNUM)
4238 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4241 if (to == STACK_POINTER_REGNUM)
4243 if (from == FRAME_POINTER_REGNUM)
4245 HOST_WIDE_INT elim = crtl->outgoing_args_size
4246 + cfun->machine->frame.saved_regs_size
4248 - cfun->machine->frame.fp_lr_offset;
4249 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4258 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4262 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4266 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4271 aarch64_asm_trampoline_template (FILE *f)
4275 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4276 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4280 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4281 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4283 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4284 assemble_aligned_integer (4, const0_rtx);
4285 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4286 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4290 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4292 rtx fnaddr, mem, a_tramp;
4293 const int tramp_code_sz = 16;
4295 /* Don't need to copy the trailing D-words, we fill those in below. */
4296 emit_block_move (m_tramp, assemble_trampoline_template (),
4297 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4298 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4299 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4300 if (GET_MODE (fnaddr) != ptr_mode)
4301 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4302 emit_move_insn (mem, fnaddr);
4304 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4305 emit_move_insn (mem, chain_value);
4307 /* XXX We should really define a "clear_cache" pattern and use
4308 gen_clear_cache(). */
4309 a_tramp = XEXP (m_tramp, 0);
4310 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4311 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4312 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4316 static unsigned char
4317 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4328 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4329 (GET_MODE_SIZE (mode) + 7) / 8;
4343 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4345 if (regclass == POINTER_REGS)
4346 return GENERAL_REGS;
4348 if (regclass == STACK_REG)
4351 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4357 /* If it's an integer immediate that MOVI can't handle, then
4358 FP_REGS is not an option, so we return NO_REGS instead. */
4359 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4360 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4363 /* Register eliminiation can result in a request for
4364 SP+constant->FP_REGS. We cannot support such operations which
4365 use SP as source and an FP_REG as destination, so reject out
4367 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4369 rtx lhs = XEXP (x, 0);
4371 /* Look through a possible SUBREG introduced by ILP32. */
4372 if (GET_CODE (lhs) == SUBREG)
4373 lhs = SUBREG_REG (lhs);
4375 gcc_assert (REG_P (lhs));
4376 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4385 aarch64_asm_output_labelref (FILE* f, const char *name)
4387 asm_fprintf (f, "%U%s", name);
4391 aarch64_elf_asm_constructor (rtx symbol, int priority)
4393 if (priority == DEFAULT_INIT_PRIORITY)
4394 default_ctor_section_asm_out_constructor (symbol, priority);
4399 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4400 s = get_section (buf, SECTION_WRITE, NULL);
4401 switch_to_section (s);
4402 assemble_align (POINTER_SIZE);
4403 assemble_aligned_integer (POINTER_BYTES, symbol);
4408 aarch64_elf_asm_destructor (rtx symbol, int priority)
4410 if (priority == DEFAULT_INIT_PRIORITY)
4411 default_dtor_section_asm_out_destructor (symbol, priority);
4416 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4417 s = get_section (buf, SECTION_WRITE, NULL);
4418 switch_to_section (s);
4419 assemble_align (POINTER_SIZE);
4420 assemble_aligned_integer (POINTER_BYTES, symbol);
4425 aarch64_output_casesi (rtx *operands)
4429 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4431 static const char *const patterns[4][2] =
4434 "ldrb\t%w3, [%0,%w1,uxtw]",
4435 "add\t%3, %4, %w3, sxtb #2"
4438 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4439 "add\t%3, %4, %w3, sxth #2"
4442 "ldr\t%w3, [%0,%w1,uxtw #2]",
4443 "add\t%3, %4, %w3, sxtw #2"
4445 /* We assume that DImode is only generated when not optimizing and
4446 that we don't really need 64-bit address offsets. That would
4447 imply an object file with 8GB of code in a single function! */
4449 "ldr\t%w3, [%0,%w1,uxtw #2]",
4450 "add\t%3, %4, %w3, sxtw #2"
4454 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4456 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4458 gcc_assert (index >= 0 && index <= 3);
4460 /* Need to implement table size reduction, by chaning the code below. */
4461 output_asm_insn (patterns[index][0], operands);
4462 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4463 snprintf (buf, sizeof (buf),
4464 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4465 output_asm_insn (buf, operands);
4466 output_asm_insn (patterns[index][1], operands);
4467 output_asm_insn ("br\t%3", operands);
4468 assemble_label (asm_out_file, label);
4473 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4474 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4478 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4480 if (shift >= 0 && shift <= 3)
4483 for (size = 8; size <= 32; size *= 2)
4485 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4486 if (mask == bits << shift)
4494 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4495 const_rtx x ATTRIBUTE_UNUSED)
4497 /* We can't use blocks for constants when we're using a per-function
4503 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4504 rtx x ATTRIBUTE_UNUSED,
4505 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4507 /* Force all constant pool entries into the current function section. */
4508 return function_section (current_function_decl);
4514 /* Helper function for rtx cost calculation. Strip a shift expression
4515 from X. Returns the inner operand if successful, or the original
4516 expression on failure. */
4518 aarch64_strip_shift (rtx x)
4522 if ((GET_CODE (op) == ASHIFT
4523 || GET_CODE (op) == ASHIFTRT
4524 || GET_CODE (op) == LSHIFTRT)
4525 && CONST_INT_P (XEXP (op, 1)))
4526 return XEXP (op, 0);
4528 if (GET_CODE (op) == MULT
4529 && CONST_INT_P (XEXP (op, 1))
4530 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4531 return XEXP (op, 0);
4536 /* Helper function for rtx cost calculation. Strip an extend
4537 expression from X. Returns the inner operand if successful, or the
4538 original expression on failure. We deal with a number of possible
4539 canonicalization variations here. */
4541 aarch64_strip_extend (rtx x)
4545 /* Zero and sign extraction of a widened value. */
4546 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4547 && XEXP (op, 2) == const0_rtx
4548 && GET_CODE (XEXP (op, 0)) == MULT
4549 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4551 return XEXP (XEXP (op, 0), 0);
4553 /* It can also be represented (for zero-extend) as an AND with an
4555 if (GET_CODE (op) == AND
4556 && GET_CODE (XEXP (op, 0)) == MULT
4557 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4558 && CONST_INT_P (XEXP (op, 1))
4559 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4560 INTVAL (XEXP (op, 1))) != 0)
4561 return XEXP (XEXP (op, 0), 0);
4563 /* Now handle extended register, as this may also have an optional
4564 left shift by 1..4. */
4565 if (GET_CODE (op) == ASHIFT
4566 && CONST_INT_P (XEXP (op, 1))
4567 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4570 if (GET_CODE (op) == ZERO_EXTEND
4571 || GET_CODE (op) == SIGN_EXTEND)
4580 /* Helper function for rtx cost calculation. Calculate the cost of
4581 a MULT, which may be part of a multiply-accumulate rtx. Return
4582 the calculated cost of the expression, recursing manually in to
4583 operands where needed. */
4586 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4589 const struct cpu_cost_table *extra_cost
4590 = aarch64_tune_params->insn_extra_cost;
4592 bool maybe_fma = (outer == PLUS || outer == MINUS);
4593 enum machine_mode mode = GET_MODE (x);
4595 gcc_checking_assert (code == MULT);
4600 if (VECTOR_MODE_P (mode))
4601 mode = GET_MODE_INNER (mode);
4603 /* Integer multiply/fma. */
4604 if (GET_MODE_CLASS (mode) == MODE_INT)
4606 /* The multiply will be canonicalized as a shift, cost it as such. */
4607 if (CONST_INT_P (op1)
4608 && exact_log2 (INTVAL (op1)) > 0)
4613 /* ADD (shifted register). */
4614 cost += extra_cost->alu.arith_shift;
4616 /* LSL (immediate). */
4617 cost += extra_cost->alu.shift;
4620 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4625 /* Integer multiplies or FMAs have zero/sign extending variants. */
4626 if ((GET_CODE (op0) == ZERO_EXTEND
4627 && GET_CODE (op1) == ZERO_EXTEND)
4628 || (GET_CODE (op0) == SIGN_EXTEND
4629 && GET_CODE (op1) == SIGN_EXTEND))
4631 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4632 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4637 /* MADD/SMADDL/UMADDL. */
4638 cost += extra_cost->mult[0].extend_add;
4640 /* MUL/SMULL/UMULL. */
4641 cost += extra_cost->mult[0].extend;
4647 /* This is either an integer multiply or an FMA. In both cases
4648 we want to recurse and cost the operands. */
4649 cost += rtx_cost (op0, MULT, 0, speed)
4650 + rtx_cost (op1, MULT, 1, speed);
4656 cost += extra_cost->mult[mode == DImode].add;
4659 cost += extra_cost->mult[mode == DImode].simple;
4668 /* Floating-point FMA can also support negations of the
4670 if (GET_CODE (op0) == NEG)
4673 op0 = XEXP (op0, 0);
4675 if (GET_CODE (op1) == NEG)
4678 op1 = XEXP (op1, 0);
4682 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4683 cost += extra_cost->fp[mode == DFmode].fma;
4686 cost += extra_cost->fp[mode == DFmode].mult;
4689 cost += rtx_cost (op0, MULT, 0, speed)
4690 + rtx_cost (op1, MULT, 1, speed);
4696 aarch64_address_cost (rtx x,
4697 enum machine_mode mode,
4698 addr_space_t as ATTRIBUTE_UNUSED,
4701 enum rtx_code c = GET_CODE (x);
4702 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4703 struct aarch64_address_info info;
4707 if (!aarch64_classify_address (&info, x, mode, c, false))
4709 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4711 /* This is a CONST or SYMBOL ref which will be split
4712 in a different way depending on the code model in use.
4713 Cost it through the generic infrastructure. */
4714 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4715 /* Divide through by the cost of one instruction to
4716 bring it to the same units as the address costs. */
4717 cost_symbol_ref /= COSTS_N_INSNS (1);
4718 /* The cost is then the cost of preparing the address,
4719 followed by an immediate (possibly 0) offset. */
4720 return cost_symbol_ref + addr_cost->imm_offset;
4724 /* This is most likely a jump table from a case
4726 return addr_cost->register_offset;
4732 case ADDRESS_LO_SUM:
4733 case ADDRESS_SYMBOLIC:
4734 case ADDRESS_REG_IMM:
4735 cost += addr_cost->imm_offset;
4738 case ADDRESS_REG_WB:
4739 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4740 cost += addr_cost->pre_modify;
4741 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4742 cost += addr_cost->post_modify;
4748 case ADDRESS_REG_REG:
4749 cost += addr_cost->register_offset;
4752 case ADDRESS_REG_UXTW:
4753 case ADDRESS_REG_SXTW:
4754 cost += addr_cost->register_extend;
4764 /* For the sake of calculating the cost of the shifted register
4765 component, we can treat same sized modes in the same way. */
4766 switch (GET_MODE_BITSIZE (mode))
4769 cost += addr_cost->addr_scale_costs.hi;
4773 cost += addr_cost->addr_scale_costs.si;
4777 cost += addr_cost->addr_scale_costs.di;
4780 /* We can't tell, or this is a 128-bit vector. */
4782 cost += addr_cost->addr_scale_costs.ti;
4790 /* Calculate the cost of calculating X, storing it in *COST. Result
4791 is true if the total cost of the operation has now been calculated. */
4793 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4794 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4797 const struct cpu_cost_table *extra_cost
4798 = aarch64_tune_params->insn_extra_cost;
4799 enum machine_mode mode = GET_MODE (x);
4801 /* By default, assume that everything has equivalent cost to the
4802 cheapest instruction. Any additional costs are applied as a delta
4803 above this default. */
4804 *cost = COSTS_N_INSNS (1);
4806 /* TODO: The cost infrastructure currently does not handle
4807 vector operations. Assume that all vector operations
4808 are equally expensive. */
4809 if (VECTOR_MODE_P (mode))
4812 *cost += extra_cost->vect.alu;
4819 /* The cost depends entirely on the operands to SET. */
4824 switch (GET_CODE (op0))
4829 rtx address = XEXP (op0, 0);
4830 if (GET_MODE_CLASS (mode) == MODE_INT)
4831 *cost += extra_cost->ldst.store;
4832 else if (mode == SFmode)
4833 *cost += extra_cost->ldst.storef;
4834 else if (mode == DFmode)
4835 *cost += extra_cost->ldst.stored;
4838 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4842 *cost += rtx_cost (op1, SET, 1, speed);
4846 if (! REG_P (SUBREG_REG (op0)))
4847 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4851 /* const0_rtx is in general free, but we will use an
4852 instruction to set a register to 0. */
4853 if (REG_P (op1) || op1 == const0_rtx)
4855 /* The cost is 1 per register copied. */
4856 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4858 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4861 /* Cost is just the cost of the RHS of the set. */
4862 *cost += rtx_cost (op1, SET, 1, speed);
4867 /* Bit-field insertion. Strip any redundant widening of
4868 the RHS to meet the width of the target. */
4869 if (GET_CODE (op1) == SUBREG)
4870 op1 = SUBREG_REG (op1);
4871 if ((GET_CODE (op1) == ZERO_EXTEND
4872 || GET_CODE (op1) == SIGN_EXTEND)
4873 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4874 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4875 >= INTVAL (XEXP (op0, 1))))
4876 op1 = XEXP (op1, 0);
4878 if (CONST_INT_P (op1))
4880 /* MOV immediate is assumed to always be cheap. */
4881 *cost = COSTS_N_INSNS (1);
4887 *cost += extra_cost->alu.bfi;
4888 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4894 /* We can't make sense of this, assume default cost. */
4895 *cost = COSTS_N_INSNS (1);
4901 /* If an instruction can incorporate a constant within the
4902 instruction, the instruction's expression avoids calling
4903 rtx_cost() on the constant. If rtx_cost() is called on a
4904 constant, then it is usually because the constant must be
4905 moved into a register by one or more instructions.
4907 The exception is constant 0, which can be expressed
4908 as XZR/WZR and is therefore free. The exception to this is
4909 if we have (set (reg) (const0_rtx)) in which case we must cost
4910 the move. However, we can catch that when we cost the SET, so
4911 we don't need to consider that here. */
4912 if (x == const0_rtx)
4916 /* To an approximation, building any other constant is
4917 proportionally expensive to the number of instructions
4918 required to build that constant. This is true whether we
4919 are compiling for SPEED or otherwise. */
4920 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4929 /* mov[df,sf]_aarch64. */
4930 if (aarch64_float_const_representable_p (x))
4931 /* FMOV (scalar immediate). */
4932 *cost += extra_cost->fp[mode == DFmode].fpconst;
4933 else if (!aarch64_float_const_zero_rtx_p (x))
4935 /* This will be a load from memory. */
4937 *cost += extra_cost->ldst.loadd;
4939 *cost += extra_cost->ldst.loadf;
4942 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4943 or MOV v0.s[0], wzr - neither of which are modeled by the
4944 cost tables. Just use the default cost. */
4954 /* For loads we want the base cost of a load, plus an
4955 approximation for the additional cost of the addressing
4957 rtx address = XEXP (x, 0);
4958 if (GET_MODE_CLASS (mode) == MODE_INT)
4959 *cost += extra_cost->ldst.load;
4960 else if (mode == SFmode)
4961 *cost += extra_cost->ldst.loadf;
4962 else if (mode == DFmode)
4963 *cost += extra_cost->ldst.loadd;
4966 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4975 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4977 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4978 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4981 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4985 /* Cost this as SUB wzr, X. */
4986 op0 = CONST0_RTX (GET_MODE (x));
4991 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4993 /* Support (neg(fma...)) as a single instruction only if
4994 sign of zeros is unimportant. This matches the decision
4995 making in aarch64.md. */
4996 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4999 *cost = rtx_cost (op0, NEG, 0, speed);
5004 *cost += extra_cost->fp[mode == DFmode].neg;
5014 if (op1 == const0_rtx
5015 && GET_CODE (op0) == AND)
5021 /* Comparisons can work if the order is swapped.
5022 Canonicalization puts the more complex operation first, but
5023 we want it in op1. */
5025 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5038 /* Detect valid immediates. */
5039 if ((GET_MODE_CLASS (mode) == MODE_INT
5040 || (GET_MODE_CLASS (mode) == MODE_CC
5041 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5042 && CONST_INT_P (op1)
5043 && aarch64_uimm12_shift (INTVAL (op1)))
5045 *cost += rtx_cost (op0, MINUS, 0, speed);
5048 /* SUB(S) (immediate). */
5049 *cost += extra_cost->alu.arith;
5054 rtx new_op1 = aarch64_strip_extend (op1);
5056 /* Cost this as an FMA-alike operation. */
5057 if ((GET_CODE (new_op1) == MULT
5058 || GET_CODE (new_op1) == ASHIFT)
5061 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5062 (enum rtx_code) code,
5064 *cost += rtx_cost (op0, MINUS, 0, speed);
5068 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5072 if (GET_MODE_CLASS (mode) == MODE_INT)
5074 *cost += extra_cost->alu.arith;
5075 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5077 *cost += extra_cost->fp[mode == DFmode].addsub;
5089 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5090 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5093 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5094 *cost += rtx_cost (op1, PLUS, 1, speed);
5098 if (GET_MODE_CLASS (mode) == MODE_INT
5099 && CONST_INT_P (op1)
5100 && aarch64_uimm12_shift (INTVAL (op1)))
5102 *cost += rtx_cost (op0, PLUS, 0, speed);
5105 /* ADD (immediate). */
5106 *cost += extra_cost->alu.arith;
5110 /* Strip any extend, leave shifts behind as we will
5111 cost them through mult_cost. */
5112 new_op0 = aarch64_strip_extend (op0);
5114 if (GET_CODE (new_op0) == MULT
5115 || GET_CODE (new_op0) == ASHIFT)
5117 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5119 *cost += rtx_cost (op1, PLUS, 1, speed);
5123 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5124 + rtx_cost (op1, PLUS, 1, speed));
5128 if (GET_MODE_CLASS (mode) == MODE_INT)
5130 *cost += extra_cost->alu.arith;
5131 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5133 *cost += extra_cost->fp[mode == DFmode].addsub;
5146 && GET_CODE (op0) == MULT
5147 && CONST_INT_P (XEXP (op0, 1))
5148 && CONST_INT_P (op1)
5149 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5152 /* This is a UBFM/SBFM. */
5153 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5155 *cost += extra_cost->alu.bfx;
5159 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5161 /* We possibly get the immediate for free, this is not
5163 if (CONST_INT_P (op1)
5164 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5166 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5169 *cost += extra_cost->alu.logical;
5177 /* Handle ORN, EON, or BIC. */
5178 if (GET_CODE (op0) == NOT)
5179 op0 = XEXP (op0, 0);
5181 new_op0 = aarch64_strip_shift (op0);
5183 /* If we had a shift on op0 then this is a logical-shift-
5184 by-register/immediate operation. Otherwise, this is just
5185 a logical operation. */
5190 /* Shift by immediate. */
5191 if (CONST_INT_P (XEXP (op0, 1)))
5192 *cost += extra_cost->alu.log_shift;
5194 *cost += extra_cost->alu.log_shift_reg;
5197 *cost += extra_cost->alu.logical;
5200 /* In both cases we want to cost both operands. */
5201 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5202 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5212 *cost += extra_cost->alu.logical;
5214 /* The logical instruction could have the shifted register form,
5215 but the cost is the same if the shift is processed as a separate
5216 instruction, so we don't bother with it here. */
5222 /* If a value is written in SI mode, then zero extended to DI
5223 mode, the operation will in general be free as a write to
5224 a 'w' register implicitly zeroes the upper bits of an 'x'
5225 register. However, if this is
5227 (set (reg) (zero_extend (reg)))
5229 we must cost the explicit register move. */
5231 && GET_MODE (op0) == SImode
5234 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5236 if (!op_cost && speed)
5238 *cost += extra_cost->alu.extend;
5240 /* Free, the cost is that of the SI mode operation. */
5245 else if (MEM_P (XEXP (x, 0)))
5247 /* All loads can zero extend to any size for free. */
5248 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5254 *cost += extra_cost->alu.extend;
5259 if (MEM_P (XEXP (x, 0)))
5264 rtx address = XEXP (XEXP (x, 0), 0);
5265 *cost += extra_cost->ldst.load_sign_extend;
5268 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5275 *cost += extra_cost->alu.extend;
5279 if (!CONST_INT_P (XEXP (x, 1)))
5280 *cost += COSTS_N_INSNS (2);
5287 /* Shifting by a register often takes an extra cycle. */
5288 if (speed && !CONST_INT_P (XEXP (x, 1)))
5289 *cost += extra_cost->alu.arith_shift_reg;
5291 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5295 if (!CONSTANT_P (XEXP (x, 0)))
5296 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5300 if (!CONSTANT_P (XEXP (x, 1)))
5301 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5302 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5307 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5311 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5312 /* aarch64_rtx_mult_cost always handles recursion to its
5318 *cost = COSTS_N_INSNS (2);
5321 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5322 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5323 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5324 else if (GET_MODE (x) == DFmode)
5325 *cost += (extra_cost->fp[1].mult
5326 + extra_cost->fp[1].div);
5327 else if (GET_MODE (x) == SFmode)
5328 *cost += (extra_cost->fp[0].mult
5329 + extra_cost->fp[0].div);
5331 return false; /* All arguments need to be in registers. */
5335 *cost = COSTS_N_INSNS (1);
5338 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5339 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5340 else if (GET_MODE (x) == DFmode)
5341 *cost += extra_cost->fp[1].div;
5342 else if (GET_MODE (x) == SFmode)
5343 *cost += extra_cost->fp[0].div;
5345 return false; /* All arguments need to be in registers. */
5353 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5354 calculated for X. This cost is stored in *COST. Returns true
5355 if the total cost of X was calculated. */
5357 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5358 int param, int *cost, bool speed)
5360 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5362 if (dump_file && (dump_flags & TDF_DETAILS))
5364 print_rtl_single (dump_file, x);
5365 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5366 speed ? "Hot" : "Cold",
5367 *cost, result ? "final" : "partial");
5374 aarch64_register_move_cost (enum machine_mode mode,
5375 reg_class_t from_i, reg_class_t to_i)
5377 enum reg_class from = (enum reg_class) from_i;
5378 enum reg_class to = (enum reg_class) to_i;
5379 const struct cpu_regmove_cost *regmove_cost
5380 = aarch64_tune_params->regmove_cost;
5382 /* Moving between GPR and stack cost is the same as GP2GP. */
5383 if ((from == GENERAL_REGS && to == STACK_REG)
5384 || (to == GENERAL_REGS && from == STACK_REG))
5385 return regmove_cost->GP2GP;
5387 /* To/From the stack register, we move via the gprs. */
5388 if (to == STACK_REG || from == STACK_REG)
5389 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5390 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5392 if (from == GENERAL_REGS && to == GENERAL_REGS)
5393 return regmove_cost->GP2GP;
5394 else if (from == GENERAL_REGS)
5395 return regmove_cost->GP2FP;
5396 else if (to == GENERAL_REGS)
5397 return regmove_cost->FP2GP;
5399 /* When AdvSIMD instructions are disabled it is not possible to move
5400 a 128-bit value directly between Q registers. This is handled in
5401 secondary reload. A general register is used as a scratch to move
5402 the upper DI value and the lower DI value is moved directly,
5403 hence the cost is the sum of three moves. */
5404 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5405 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5407 return regmove_cost->FP2FP;
5411 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5412 reg_class_t rclass ATTRIBUTE_UNUSED,
5413 bool in ATTRIBUTE_UNUSED)
5415 return aarch64_tune_params->memmov_cost;
5418 /* Return the number of instructions that can be issued per cycle. */
5420 aarch64_sched_issue_rate (void)
5422 return aarch64_tune_params->issue_rate;
5425 /* Vectorizer cost model target hooks. */
5427 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5429 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5431 int misalign ATTRIBUTE_UNUSED)
5435 switch (type_of_cost)
5438 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5441 return aarch64_tune_params->vec_costs->scalar_load_cost;
5444 return aarch64_tune_params->vec_costs->scalar_store_cost;
5447 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5450 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5453 return aarch64_tune_params->vec_costs->vec_store_cost;
5456 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5459 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5461 case unaligned_load:
5462 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5464 case unaligned_store:
5465 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5467 case cond_branch_taken:
5468 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5470 case cond_branch_not_taken:
5471 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5474 case vec_promote_demote:
5475 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5478 elements = TYPE_VECTOR_SUBPARTS (vectype);
5479 return elements / 2 + 1;
5486 /* Implement targetm.vectorize.add_stmt_cost. */
5488 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5489 struct _stmt_vec_info *stmt_info, int misalign,
5490 enum vect_cost_model_location where)
5492 unsigned *cost = (unsigned *) data;
5493 unsigned retval = 0;
5495 if (flag_vect_cost_model)
5497 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5499 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5501 /* Statements in an inner loop relative to the loop being
5502 vectorized are weighted more heavily. The value here is
5503 a function (linear for now) of the loop nest level. */
5504 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5506 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5507 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5508 unsigned nest_level = loop_depth (loop);
5510 count *= nest_level;
5513 retval = (unsigned) (count * stmt_cost);
5514 cost[where] += retval;
5520 static void initialize_aarch64_code_model (void);
5522 /* Parse the architecture extension string. */
5525 aarch64_parse_extension (char *str)
5527 /* The extension string is parsed left to right. */
5528 const struct aarch64_option_extension *opt = NULL;
5530 /* Flag to say whether we are adding or removing an extension. */
5531 int adding_ext = -1;
5533 while (str != NULL && *str != 0)
5539 ext = strchr (str, '+');
5546 if (len >= 2 && strncmp (str, "no", 2) == 0)
5557 error ("missing feature modifier after %qs", "+no");
5561 /* Scan over the extensions table trying to find an exact match. */
5562 for (opt = all_extensions; opt->name != NULL; opt++)
5564 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5566 /* Add or remove the extension. */
5568 aarch64_isa_flags |= opt->flags_on;
5570 aarch64_isa_flags &= ~(opt->flags_off);
5575 if (opt->name == NULL)
5577 /* Extension not found in list. */
5578 error ("unknown feature modifier %qs", str);
5588 /* Parse the ARCH string. */
5591 aarch64_parse_arch (void)
5594 const struct processor *arch;
5595 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5598 strcpy (str, aarch64_arch_string);
5600 ext = strchr (str, '+');
5609 error ("missing arch name in -march=%qs", str);
5613 /* Loop through the list of supported ARCHs to find a match. */
5614 for (arch = all_architectures; arch->name != NULL; arch++)
5616 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5618 selected_arch = arch;
5619 aarch64_isa_flags = selected_arch->flags;
5622 selected_cpu = &all_cores[selected_arch->core];
5626 /* ARCH string contains at least one extension. */
5627 aarch64_parse_extension (ext);
5630 if (strcmp (selected_arch->arch, selected_cpu->arch))
5632 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5633 selected_cpu->name, selected_arch->name);
5640 /* ARCH name not found in list. */
5641 error ("unknown value %qs for -march", str);
5645 /* Parse the CPU string. */
5648 aarch64_parse_cpu (void)
5651 const struct processor *cpu;
5652 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5655 strcpy (str, aarch64_cpu_string);
5657 ext = strchr (str, '+');
5666 error ("missing cpu name in -mcpu=%qs", str);
5670 /* Loop through the list of supported CPUs to find a match. */
5671 for (cpu = all_cores; cpu->name != NULL; cpu++)
5673 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5676 aarch64_isa_flags = selected_cpu->flags;
5680 /* CPU string contains at least one extension. */
5681 aarch64_parse_extension (ext);
5688 /* CPU name not found in list. */
5689 error ("unknown value %qs for -mcpu", str);
5693 /* Parse the TUNE string. */
5696 aarch64_parse_tune (void)
5698 const struct processor *cpu;
5699 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5700 strcpy (str, aarch64_tune_string);
5702 /* Loop through the list of supported CPUs to find a match. */
5703 for (cpu = all_cores; cpu->name != NULL; cpu++)
5705 if (strcmp (cpu->name, str) == 0)
5707 selected_tune = cpu;
5712 /* CPU name not found in list. */
5713 error ("unknown value %qs for -mtune", str);
5718 /* Implement TARGET_OPTION_OVERRIDE. */
5721 aarch64_override_options (void)
5723 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5724 If either of -march or -mtune is given, they override their
5725 respective component of -mcpu.
5727 So, first parse AARCH64_CPU_STRING, then the others, be careful
5728 with -march as, if -mcpu is not present on the command line, march
5729 must set a sensible default CPU. */
5730 if (aarch64_cpu_string)
5732 aarch64_parse_cpu ();
5735 if (aarch64_arch_string)
5737 aarch64_parse_arch ();
5740 if (aarch64_tune_string)
5742 aarch64_parse_tune ();
5745 #ifndef HAVE_AS_MABI_OPTION
5746 /* The compiler may have been configured with 2.23.* binutils, which does
5747 not have support for ILP32. */
5749 error ("Assembler does not support -mabi=ilp32");
5752 initialize_aarch64_code_model ();
5754 aarch64_build_bitmask_table ();
5756 /* This target defaults to strict volatile bitfields. */
5757 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5758 flag_strict_volatile_bitfields = 1;
5760 /* If the user did not specify a processor, choose the default
5761 one for them. This will be the CPU set during configuration using
5762 --with-cpu, otherwise it is "generic". */
5765 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5766 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5769 gcc_assert (selected_cpu);
5772 selected_tune = selected_cpu;
5774 aarch64_tune_flags = selected_tune->flags;
5775 aarch64_tune = selected_tune->core;
5776 aarch64_tune_params = selected_tune->tune;
5778 if (aarch64_fix_a53_err835769 == 2)
5780 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5781 aarch64_fix_a53_err835769 = 1;
5783 aarch64_fix_a53_err835769 = 0;
5787 aarch64_override_options_after_change ();
5790 /* Implement targetm.override_options_after_change. */
5793 aarch64_override_options_after_change (void)
5795 if (flag_omit_frame_pointer)
5796 flag_omit_leaf_frame_pointer = false;
5797 else if (flag_omit_leaf_frame_pointer)
5798 flag_omit_frame_pointer = true;
5801 static struct machine_function *
5802 aarch64_init_machine_status (void)
5804 struct machine_function *machine;
5805 machine = ggc_alloc_cleared_machine_function ();
5810 aarch64_init_expanders (void)
5812 init_machine_status = aarch64_init_machine_status;
5815 /* A checking mechanism for the implementation of the various code models. */
5817 initialize_aarch64_code_model (void)
5821 switch (aarch64_cmodel_var)
5823 case AARCH64_CMODEL_TINY:
5824 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5826 case AARCH64_CMODEL_SMALL:
5827 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5829 case AARCH64_CMODEL_LARGE:
5830 sorry ("code model %qs with -f%s", "large",
5831 flag_pic > 1 ? "PIC" : "pic");
5837 aarch64_cmodel = aarch64_cmodel_var;
5840 /* Return true if SYMBOL_REF X binds locally. */
5843 aarch64_symbol_binds_local_p (const_rtx x)
5845 return (SYMBOL_REF_DECL (x)
5846 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5847 : SYMBOL_REF_LOCAL_P (x));
5850 /* Return true if SYMBOL_REF X is thread local */
5852 aarch64_tls_symbol_p (rtx x)
5854 if (! TARGET_HAVE_TLS)
5857 if (GET_CODE (x) != SYMBOL_REF)
5860 return SYMBOL_REF_TLS_MODEL (x) != 0;
5863 /* Classify a TLS symbol into one of the TLS kinds. */
5864 enum aarch64_symbol_type
5865 aarch64_classify_tls_symbol (rtx x)
5867 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5871 case TLS_MODEL_GLOBAL_DYNAMIC:
5872 case TLS_MODEL_LOCAL_DYNAMIC:
5873 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5875 case TLS_MODEL_INITIAL_EXEC:
5876 return SYMBOL_SMALL_GOTTPREL;
5878 case TLS_MODEL_LOCAL_EXEC:
5879 return SYMBOL_SMALL_TPREL;
5881 case TLS_MODEL_EMULATED:
5882 case TLS_MODEL_NONE:
5883 return SYMBOL_FORCE_TO_MEM;
5890 /* Return the method that should be used to access SYMBOL_REF or
5891 LABEL_REF X in context CONTEXT. */
5893 enum aarch64_symbol_type
5894 aarch64_classify_symbol (rtx x, rtx offset,
5895 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5897 if (GET_CODE (x) == LABEL_REF)
5899 switch (aarch64_cmodel)
5901 case AARCH64_CMODEL_LARGE:
5902 return SYMBOL_FORCE_TO_MEM;
5904 case AARCH64_CMODEL_TINY_PIC:
5905 case AARCH64_CMODEL_TINY:
5906 return SYMBOL_TINY_ABSOLUTE;
5908 case AARCH64_CMODEL_SMALL_PIC:
5909 case AARCH64_CMODEL_SMALL:
5910 return SYMBOL_SMALL_ABSOLUTE;
5917 if (GET_CODE (x) == SYMBOL_REF)
5919 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5920 return SYMBOL_FORCE_TO_MEM;
5922 if (aarch64_tls_symbol_p (x))
5923 return aarch64_classify_tls_symbol (x);
5925 switch (aarch64_cmodel)
5927 case AARCH64_CMODEL_TINY:
5928 /* When we retreive symbol + offset address, we have to make sure
5929 the offset does not cause overflow of the final address. But
5930 we have no way of knowing the address of symbol at compile time
5931 so we can't accurately say if the distance between the PC and
5932 symbol + offset is outside the addressible range of +/-1M in the
5933 TINY code model. So we rely on images not being greater than
5934 1M and cap the offset at 1M and anything beyond 1M will have to
5935 be loaded using an alternative mechanism. */
5936 if (SYMBOL_REF_WEAK (x)
5937 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
5938 return SYMBOL_FORCE_TO_MEM;
5939 return SYMBOL_TINY_ABSOLUTE;
5941 case AARCH64_CMODEL_SMALL:
5942 /* Same reasoning as the tiny code model, but the offset cap here is
5944 if (SYMBOL_REF_WEAK (x)
5945 || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
5946 || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
5947 return SYMBOL_FORCE_TO_MEM;
5948 return SYMBOL_SMALL_ABSOLUTE;
5950 case AARCH64_CMODEL_TINY_PIC:
5951 if (!aarch64_symbol_binds_local_p (x))
5952 return SYMBOL_TINY_GOT;
5953 return SYMBOL_TINY_ABSOLUTE;
5955 case AARCH64_CMODEL_SMALL_PIC:
5956 if (!aarch64_symbol_binds_local_p (x))
5957 return SYMBOL_SMALL_GOT;
5958 return SYMBOL_SMALL_ABSOLUTE;
5965 /* By default push everything into the constant pool. */
5966 return SYMBOL_FORCE_TO_MEM;
5970 aarch64_constant_address_p (rtx x)
5972 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5976 aarch64_legitimate_pic_operand_p (rtx x)
5978 if (GET_CODE (x) == SYMBOL_REF
5979 || (GET_CODE (x) == CONST
5980 && GET_CODE (XEXP (x, 0)) == PLUS
5981 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5987 /* Return true if X holds either a quarter-precision or
5988 floating-point +0.0 constant. */
5990 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5992 if (!CONST_DOUBLE_P (x))
5995 /* TODO: We could handle moving 0.0 to a TFmode register,
5996 but first we would like to refactor the movtf_aarch64
5997 to be more amicable to split moves properly and
5998 correctly gate on TARGET_SIMD. For now - reject all
5999 constants which are not to SFmode or DFmode registers. */
6000 if (!(mode == SFmode || mode == DFmode))
6003 if (aarch64_float_const_zero_rtx_p (x))
6005 return aarch64_float_const_representable_p (x);
6009 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6011 /* Do not allow vector struct mode constants. We could support
6012 0 and -1 easily, but they need support in aarch64-simd.md. */
6013 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6016 /* This could probably go away because
6017 we now decompose CONST_INTs according to expand_mov_immediate. */
6018 if ((GET_CODE (x) == CONST_VECTOR
6019 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6020 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6021 return !targetm.cannot_force_const_mem (mode, x);
6023 if (GET_CODE (x) == HIGH
6024 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6027 return aarch64_constant_address_p (x);
6031 aarch64_load_tp (rtx target)
6034 || GET_MODE (target) != Pmode
6035 || !register_operand (target, Pmode))
6036 target = gen_reg_rtx (Pmode);
6038 /* Can return in any reg. */
6039 emit_insn (gen_aarch64_load_tp_hard (target));
6043 /* On AAPCS systems, this is the "struct __va_list". */
6044 static GTY(()) tree va_list_type;
6046 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6047 Return the type to use as __builtin_va_list.
6049 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6061 aarch64_build_builtin_va_list (void)
6064 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6066 /* Create the type. */
6067 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6068 /* Give it the required name. */
6069 va_list_name = build_decl (BUILTINS_LOCATION,
6071 get_identifier ("__va_list"),
6073 DECL_ARTIFICIAL (va_list_name) = 1;
6074 TYPE_NAME (va_list_type) = va_list_name;
6075 TYPE_STUB_DECL (va_list_type) = va_list_name;
6077 /* Create the fields. */
6078 f_stack = build_decl (BUILTINS_LOCATION,
6079 FIELD_DECL, get_identifier ("__stack"),
6081 f_grtop = build_decl (BUILTINS_LOCATION,
6082 FIELD_DECL, get_identifier ("__gr_top"),
6084 f_vrtop = build_decl (BUILTINS_LOCATION,
6085 FIELD_DECL, get_identifier ("__vr_top"),
6087 f_groff = build_decl (BUILTINS_LOCATION,
6088 FIELD_DECL, get_identifier ("__gr_offs"),
6090 f_vroff = build_decl (BUILTINS_LOCATION,
6091 FIELD_DECL, get_identifier ("__vr_offs"),
6094 DECL_ARTIFICIAL (f_stack) = 1;
6095 DECL_ARTIFICIAL (f_grtop) = 1;
6096 DECL_ARTIFICIAL (f_vrtop) = 1;
6097 DECL_ARTIFICIAL (f_groff) = 1;
6098 DECL_ARTIFICIAL (f_vroff) = 1;
6100 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6101 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6102 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6103 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6104 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6106 TYPE_FIELDS (va_list_type) = f_stack;
6107 DECL_CHAIN (f_stack) = f_grtop;
6108 DECL_CHAIN (f_grtop) = f_vrtop;
6109 DECL_CHAIN (f_vrtop) = f_groff;
6110 DECL_CHAIN (f_groff) = f_vroff;
6112 /* Compute its layout. */
6113 layout_type (va_list_type);
6115 return va_list_type;
6118 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6120 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6122 const CUMULATIVE_ARGS *cum;
6123 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6124 tree stack, grtop, vrtop, groff, vroff;
6126 int gr_save_area_size;
6127 int vr_save_area_size;
6130 cum = &crtl->args.info;
6132 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6134 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6136 if (TARGET_GENERAL_REGS_ONLY)
6138 if (cum->aapcs_nvrn > 0)
6139 sorry ("%qs and floating point or vector arguments",
6140 "-mgeneral-regs-only");
6141 vr_save_area_size = 0;
6144 f_stack = TYPE_FIELDS (va_list_type_node);
6145 f_grtop = DECL_CHAIN (f_stack);
6146 f_vrtop = DECL_CHAIN (f_grtop);
6147 f_groff = DECL_CHAIN (f_vrtop);
6148 f_vroff = DECL_CHAIN (f_groff);
6150 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6152 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6154 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6156 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6158 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6161 /* Emit code to initialize STACK, which points to the next varargs stack
6162 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6163 by named arguments. STACK is 8-byte aligned. */
6164 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6165 if (cum->aapcs_stack_size > 0)
6166 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6167 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6168 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6170 /* Emit code to initialize GRTOP, the top of the GR save area.
6171 virtual_incoming_args_rtx should have been 16 byte aligned. */
6172 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6173 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6176 /* Emit code to initialize VRTOP, the top of the VR save area.
6177 This address is gr_save_area_bytes below GRTOP, rounded
6178 down to the next 16-byte boundary. */
6179 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6180 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6181 STACK_BOUNDARY / BITS_PER_UNIT);
6184 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6185 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6186 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6188 /* Emit code to initialize GROFF, the offset from GRTOP of the
6189 next GPR argument. */
6190 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6191 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6194 /* Likewise emit code to initialize VROFF, the offset from FTOP
6195 of the next VR argument. */
6196 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6197 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6201 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6204 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6205 gimple_seq *post_p ATTRIBUTE_UNUSED)
6209 bool is_ha; /* is HFA or HVA. */
6210 bool dw_align; /* double-word align. */
6211 enum machine_mode ag_mode = VOIDmode;
6213 enum machine_mode mode;
6215 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6216 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6217 HOST_WIDE_INT size, rsize, adjust, align;
6218 tree t, u, cond1, cond2;
6220 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6222 type = build_pointer_type (type);
6224 mode = TYPE_MODE (type);
6226 f_stack = TYPE_FIELDS (va_list_type_node);
6227 f_grtop = DECL_CHAIN (f_stack);
6228 f_vrtop = DECL_CHAIN (f_grtop);
6229 f_groff = DECL_CHAIN (f_vrtop);
6230 f_vroff = DECL_CHAIN (f_groff);
6232 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6233 f_stack, NULL_TREE);
6234 size = int_size_in_bytes (type);
6235 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6239 if (aarch64_vfp_is_call_or_return_candidate (mode,
6245 /* TYPE passed in fp/simd registers. */
6246 if (TARGET_GENERAL_REGS_ONLY)
6247 sorry ("%qs and floating point or vector arguments",
6248 "-mgeneral-regs-only");
6250 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6251 unshare_expr (valist), f_vrtop, NULL_TREE);
6252 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6253 unshare_expr (valist), f_vroff, NULL_TREE);
6255 rsize = nregs * UNITS_PER_VREG;
6259 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6260 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6262 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6263 && size < UNITS_PER_VREG)
6265 adjust = UNITS_PER_VREG - size;
6270 /* TYPE passed in general registers. */
6271 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6272 unshare_expr (valist), f_grtop, NULL_TREE);
6273 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6274 unshare_expr (valist), f_groff, NULL_TREE);
6275 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6276 nregs = rsize / UNITS_PER_WORD;
6281 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6282 && size < UNITS_PER_WORD)
6284 adjust = UNITS_PER_WORD - size;
6288 /* Get a local temporary for the field value. */
6289 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6291 /* Emit code to branch if off >= 0. */
6292 t = build2 (GE_EXPR, boolean_type_node, off,
6293 build_int_cst (TREE_TYPE (off), 0));
6294 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6298 /* Emit: offs = (offs + 15) & -16. */
6299 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6300 build_int_cst (TREE_TYPE (off), 15));
6301 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6302 build_int_cst (TREE_TYPE (off), -16));
6303 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6308 /* Update ap.__[g|v]r_offs */
6309 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6310 build_int_cst (TREE_TYPE (off), rsize));
6311 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6315 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6317 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6318 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6319 build_int_cst (TREE_TYPE (f_off), 0));
6320 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6322 /* String up: make sure the assignment happens before the use. */
6323 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6324 COND_EXPR_ELSE (cond1) = t;
6326 /* Prepare the trees handling the argument that is passed on the stack;
6327 the top level node will store in ON_STACK. */
6328 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6331 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6332 t = fold_convert (intDI_type_node, arg);
6333 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6334 build_int_cst (TREE_TYPE (t), 15));
6335 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6336 build_int_cst (TREE_TYPE (t), -16));
6337 t = fold_convert (TREE_TYPE (arg), t);
6338 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6342 /* Advance ap.__stack */
6343 t = fold_convert (intDI_type_node, arg);
6344 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6345 build_int_cst (TREE_TYPE (t), size + 7));
6346 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6347 build_int_cst (TREE_TYPE (t), -8));
6348 t = fold_convert (TREE_TYPE (arg), t);
6349 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6350 /* String up roundup and advance. */
6352 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6353 /* String up with arg */
6354 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6355 /* Big-endianness related address adjustment. */
6356 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6357 && size < UNITS_PER_WORD)
6359 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6360 size_int (UNITS_PER_WORD - size));
6361 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6364 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6365 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6367 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6370 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6371 build_int_cst (TREE_TYPE (off), adjust));
6373 t = fold_convert (sizetype, t);
6374 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6378 /* type ha; // treat as "struct {ftype field[n];}"
6379 ... [computing offs]
6380 for (i = 0; i <nregs; ++i, offs += 16)
6381 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6384 tree tmp_ha, field_t, field_ptr_t;
6386 /* Declare a local variable. */
6387 tmp_ha = create_tmp_var_raw (type, "ha");
6388 gimple_add_tmp_var (tmp_ha);
6390 /* Establish the base type. */
6394 field_t = float_type_node;
6395 field_ptr_t = float_ptr_type_node;
6398 field_t = double_type_node;
6399 field_ptr_t = double_ptr_type_node;
6402 field_t = long_double_type_node;
6403 field_ptr_t = long_double_ptr_type_node;
6405 /* The half precision and quad precision are not fully supported yet. Enable
6406 the following code after the support is complete. Need to find the correct
6407 type node for __fp16 *. */
6410 field_t = float_type_node;
6411 field_ptr_t = float_ptr_type_node;
6417 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6418 field_t = build_vector_type_for_mode (innertype, ag_mode);
6419 field_ptr_t = build_pointer_type (field_t);
6426 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6427 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6429 t = fold_convert (field_ptr_t, addr);
6430 t = build2 (MODIFY_EXPR, field_t,
6431 build1 (INDIRECT_REF, field_t, tmp_ha),
6432 build1 (INDIRECT_REF, field_t, t));
6434 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6435 for (i = 1; i < nregs; ++i)
6437 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6438 u = fold_convert (field_ptr_t, addr);
6439 u = build2 (MODIFY_EXPR, field_t,
6440 build2 (MEM_REF, field_t, tmp_ha,
6441 build_int_cst (field_ptr_t,
6443 int_size_in_bytes (field_t)))),
6444 build1 (INDIRECT_REF, field_t, u));
6445 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6448 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6449 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6452 COND_EXPR_ELSE (cond2) = t;
6453 addr = fold_convert (build_pointer_type (type), cond1);
6454 addr = build_va_arg_indirect_ref (addr);
6457 addr = build_va_arg_indirect_ref (addr);
6462 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6465 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6466 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6469 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6470 CUMULATIVE_ARGS local_cum;
6471 int gr_saved, vr_saved;
6473 /* The caller has advanced CUM up to, but not beyond, the last named
6474 argument. Advance a local copy of CUM past the last "real" named
6475 argument, to find out how many registers are left over. */
6477 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6479 /* Found out how many registers we need to save. */
6480 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6481 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6483 if (TARGET_GENERAL_REGS_ONLY)
6485 if (local_cum.aapcs_nvrn > 0)
6486 sorry ("%qs and floating point or vector arguments",
6487 "-mgeneral-regs-only");
6497 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6498 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6499 - gr_saved * UNITS_PER_WORD);
6500 mem = gen_frame_mem (BLKmode, ptr);
6501 set_mem_alias_set (mem, get_varargs_alias_set ());
6503 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6508 /* We can't use move_block_from_reg, because it will use
6509 the wrong mode, storing D regs only. */
6510 enum machine_mode mode = TImode;
6513 /* Set OFF to the offset from virtual_incoming_args_rtx of
6514 the first vector register. The VR save area lies below
6515 the GR one, and is aligned to 16 bytes. */
6516 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6517 STACK_BOUNDARY / BITS_PER_UNIT);
6518 off -= vr_saved * UNITS_PER_VREG;
6520 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6524 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6525 mem = gen_frame_mem (mode, ptr);
6526 set_mem_alias_set (mem, get_varargs_alias_set ());
6527 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6528 off += UNITS_PER_VREG;
6533 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6534 any complication of having crtl->args.pretend_args_size changed. */
6535 cfun->machine->saved_varargs_size
6536 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6537 STACK_BOUNDARY / BITS_PER_UNIT)
6538 + vr_saved * UNITS_PER_VREG);
6542 aarch64_conditional_register_usage (void)
6547 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6550 call_used_regs[i] = 1;
6555 /* Walk down the type tree of TYPE counting consecutive base elements.
6556 If *MODEP is VOIDmode, then set it to the first valid floating point
6557 type. If a non-floating point type is found, or if a floating point
6558 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6559 otherwise return the count in the sub-tree. */
6561 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6563 enum machine_mode mode;
6566 switch (TREE_CODE (type))
6569 mode = TYPE_MODE (type);
6570 if (mode != DFmode && mode != SFmode && mode != TFmode)
6573 if (*modep == VOIDmode)
6582 mode = TYPE_MODE (TREE_TYPE (type));
6583 if (mode != DFmode && mode != SFmode && mode != TFmode)
6586 if (*modep == VOIDmode)
6595 /* Use V2SImode and V4SImode as representatives of all 64-bit
6596 and 128-bit vector types. */
6597 size = int_size_in_bytes (type);
6610 if (*modep == VOIDmode)
6613 /* Vector modes are considered to be opaque: two vectors are
6614 equivalent for the purposes of being homogeneous aggregates
6615 if they are the same size. */
6624 tree index = TYPE_DOMAIN (type);
6626 /* Can't handle incomplete types. */
6627 if (!COMPLETE_TYPE_P (type))
6630 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6633 || !TYPE_MAX_VALUE (index)
6634 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6635 || !TYPE_MIN_VALUE (index)
6636 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6640 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6641 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6643 /* There must be no padding. */
6644 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6645 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6646 != count * GET_MODE_BITSIZE (*modep)))
6658 /* Can't handle incomplete types. */
6659 if (!COMPLETE_TYPE_P (type))
6662 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6664 if (TREE_CODE (field) != FIELD_DECL)
6667 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6673 /* There must be no padding. */
6674 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6675 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6676 != count * GET_MODE_BITSIZE (*modep)))
6683 case QUAL_UNION_TYPE:
6685 /* These aren't very interesting except in a degenerate case. */
6690 /* Can't handle incomplete types. */
6691 if (!COMPLETE_TYPE_P (type))
6694 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6696 if (TREE_CODE (field) != FIELD_DECL)
6699 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6702 count = count > sub_count ? count : sub_count;
6705 /* There must be no padding. */
6706 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6707 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6708 != count * GET_MODE_BITSIZE (*modep)))
6721 /* Return true if we use LRA instead of reload pass. */
6723 aarch64_lra_p (void)
6725 return aarch64_lra_flag;
6728 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6729 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6730 array types. The C99 floating-point complex types are also considered
6731 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6732 types, which are GCC extensions and out of the scope of AAPCS64, are
6733 treated as composite types here as well.
6735 Note that MODE itself is not sufficient in determining whether a type
6736 is such a composite type or not. This is because
6737 stor-layout.c:compute_record_mode may have already changed the MODE
6738 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6739 structure with only one field may have its MODE set to the mode of the
6740 field. Also an integer mode whose size matches the size of the
6741 RECORD_TYPE type may be used to substitute the original mode
6742 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6743 solely relied on. */
6746 aarch64_composite_type_p (const_tree type,
6747 enum machine_mode mode)
6749 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6753 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6754 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6760 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6761 type as described in AAPCS64 \S 4.1.2.
6763 See the comment above aarch64_composite_type_p for the notes on MODE. */
6766 aarch64_short_vector_p (const_tree type,
6767 enum machine_mode mode)
6769 HOST_WIDE_INT size = -1;
6771 if (type && TREE_CODE (type) == VECTOR_TYPE)
6772 size = int_size_in_bytes (type);
6773 else if (!aarch64_composite_type_p (type, mode)
6774 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6775 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6776 size = GET_MODE_SIZE (mode);
6778 return (size == 8 || size == 16) ? true : false;
6781 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6782 shall be passed or returned in simd/fp register(s) (providing these
6783 parameter passing registers are available).
6785 Upon successful return, *COUNT returns the number of needed registers,
6786 *BASE_MODE returns the mode of the individual register and when IS_HAF
6787 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6788 floating-point aggregate or a homogeneous short-vector aggregate. */
6791 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6793 enum machine_mode *base_mode,
6797 enum machine_mode new_mode = VOIDmode;
6798 bool composite_p = aarch64_composite_type_p (type, mode);
6800 if (is_ha != NULL) *is_ha = false;
6802 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6803 || aarch64_short_vector_p (type, mode))
6808 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6810 if (is_ha != NULL) *is_ha = true;
6812 new_mode = GET_MODE_INNER (mode);
6814 else if (type && composite_p)
6816 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6818 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6820 if (is_ha != NULL) *is_ha = true;
6829 *base_mode = new_mode;
6833 /* Implement TARGET_STRUCT_VALUE_RTX. */
6836 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6837 int incoming ATTRIBUTE_UNUSED)
6839 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6842 /* Implements target hook vector_mode_supported_p. */
6844 aarch64_vector_mode_supported_p (enum machine_mode mode)
6847 && (mode == V4SImode || mode == V8HImode
6848 || mode == V16QImode || mode == V2DImode
6849 || mode == V2SImode || mode == V4HImode
6850 || mode == V8QImode || mode == V2SFmode
6851 || mode == V4SFmode || mode == V2DFmode
6852 || mode == V1DFmode))
6858 /* Return appropriate SIMD container
6859 for MODE within a vector of WIDTH bits. */
6860 static enum machine_mode
6861 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6863 gcc_assert (width == 64 || width == 128);
6902 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6903 static enum machine_mode
6904 aarch64_preferred_simd_mode (enum machine_mode mode)
6906 return aarch64_simd_container_mode (mode, 128);
6909 /* Return the bitmask of possible vector sizes for the vectorizer
6912 aarch64_autovectorize_vector_sizes (void)
6917 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6918 vector types in order to conform to the AAPCS64 (see "Procedure
6919 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6920 qualify for emission with the mangled names defined in that document,
6921 a vector type must not only be of the correct mode but also be
6922 composed of AdvSIMD vector element types (e.g.
6923 _builtin_aarch64_simd_qi); these types are registered by
6924 aarch64_init_simd_builtins (). In other words, vector types defined
6925 in other ways e.g. via vector_size attribute will get default
6929 enum machine_mode mode;
6930 const char *element_type_name;
6931 const char *mangled_name;
6932 } aarch64_simd_mangle_map_entry;
6934 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6935 /* 64-bit containerized types. */
6936 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6937 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6938 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6939 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6940 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6941 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6942 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6943 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6944 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6945 /* 128-bit containerized types. */
6946 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6947 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6948 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6949 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6950 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6951 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6952 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6953 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6954 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6955 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6956 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6957 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6958 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6959 { VOIDmode, NULL, NULL }
6962 /* Implement TARGET_MANGLE_TYPE. */
6965 aarch64_mangle_type (const_tree type)
6967 /* The AArch64 ABI documents say that "__va_list" has to be
6968 managled as if it is in the "std" namespace. */
6969 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6970 return "St9__va_list";
6972 /* Check the mode of the vector type, and the name of the vector
6973 element type, against the table. */
6974 if (TREE_CODE (type) == VECTOR_TYPE)
6976 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6978 while (pos->mode != VOIDmode)
6980 tree elt_type = TREE_TYPE (type);
6982 if (pos->mode == TYPE_MODE (type)
6983 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6984 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6985 pos->element_type_name))
6986 return pos->mangled_name;
6992 /* Use the default mangling. */
6997 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
7003 is_memory_op (rtx mem_insn)
7005 rtx pattern = PATTERN (mem_insn);
7006 return for_each_rtx (&pattern, is_mem_p, NULL);
7009 /* Find the first rtx before insn that will generate an assembly
7013 aarch64_prev_real_insn (rtx insn)
7020 insn = prev_real_insn (insn);
7022 while (insn && recog_memoized (insn) < 0);
7028 is_madd_op (enum attr_type t1)
7031 /* A number of these may be AArch32 only. */
7032 enum attr_type mlatypes[] = {
7033 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7034 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7035 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7038 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7040 if (t1 == mlatypes[i])
7047 /* Check if there is a register dependency between a load and the insn
7048 for which we hold recog_data. */
7051 dep_between_memop_and_curr (rtx memop)
7059 if (!REG_P (SET_DEST (memop)))
7062 load_reg = SET_DEST (memop);
7063 for (opno = 0; opno < recog_data.n_operands; opno++)
7065 rtx operand = recog_data.operand[opno];
7067 && reg_overlap_mentioned_p (load_reg, operand))
7075 aarch64_madd_needs_nop (rtx insn)
7077 enum attr_type attr_type;
7081 if (!aarch64_fix_a53_err835769)
7084 if (recog_memoized (insn) < 0)
7087 attr_type = get_attr_type (insn);
7088 if (!is_madd_op (attr_type))
7091 prev = aarch64_prev_real_insn (insn);
7092 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7093 Restore recog state to INSN to avoid state corruption. */
7094 extract_constrain_insn_cached (insn);
7099 body = single_set (prev);
7101 /* If the previous insn is a memory op and there is no dependency between
7102 it and the madd, emit a nop between them. If we know the previous insn is
7103 a memory op but body is NULL, emit the nop to be safe, it's probably a
7104 load/store pair insn. */
7105 if (is_memory_op (prev)
7106 && GET_MODE (recog_data.operand[0]) == DImode
7107 && (!dep_between_memop_and_curr (body)))
7115 aarch64_final_prescan_insn (rtx insn)
7117 if (aarch64_madd_needs_nop (insn))
7118 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7122 /* Return the equivalent letter for size. */
7124 sizetochar (int size)
7128 case 64: return 'd';
7129 case 32: return 's';
7130 case 16: return 'h';
7131 case 8 : return 'b';
7132 default: gcc_unreachable ();
7136 /* Return true iff x is a uniform vector of floating-point
7137 constants, and the constant can be represented in
7138 quarter-precision form. Note, as aarch64_float_const_representable
7139 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7141 aarch64_vect_float_const_representable_p (rtx x)
7144 REAL_VALUE_TYPE r0, ri;
7147 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7150 x0 = CONST_VECTOR_ELT (x, 0);
7151 if (!CONST_DOUBLE_P (x0))
7154 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7156 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7158 xi = CONST_VECTOR_ELT (x, i);
7159 if (!CONST_DOUBLE_P (xi))
7162 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7163 if (!REAL_VALUES_EQUAL (r0, ri))
7167 return aarch64_float_const_representable_p (x0);
7170 /* Return true for valid and false for invalid. */
7172 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7173 struct simd_immediate_info *info)
7175 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7177 for (i = 0; i < idx; i += (STRIDE)) \
7182 immtype = (CLASS); \
7183 elsize = (ELSIZE); \
7189 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7190 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7191 unsigned char bytes[16];
7192 int immtype = -1, matches;
7193 unsigned int invmask = inverse ? 0xff : 0;
7196 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7198 if (! (aarch64_simd_imm_zero_p (op, mode)
7199 || aarch64_vect_float_const_representable_p (op)))
7204 info->value = CONST_VECTOR_ELT (op, 0);
7205 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7213 /* Splat vector constant out into a byte vector. */
7214 for (i = 0; i < n_elts; i++)
7216 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7217 it must be laid out in the vector register in reverse order. */
7218 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7219 unsigned HOST_WIDE_INT elpart;
7220 unsigned int part, parts;
7222 if (GET_CODE (el) == CONST_INT)
7224 elpart = INTVAL (el);
7227 else if (GET_CODE (el) == CONST_DOUBLE)
7229 elpart = CONST_DOUBLE_LOW (el);
7235 for (part = 0; part < parts; part++)
7238 for (byte = 0; byte < innersize; byte++)
7240 bytes[idx++] = (elpart & 0xff) ^ invmask;
7241 elpart >>= BITS_PER_UNIT;
7243 if (GET_CODE (el) == CONST_DOUBLE)
7244 elpart = CONST_DOUBLE_HIGH (el);
7249 gcc_assert (idx == GET_MODE_SIZE (mode));
7253 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7254 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7256 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7257 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7259 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7260 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7262 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7263 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7265 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7267 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7269 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7270 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7272 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7273 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7275 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7276 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7278 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7279 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7281 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7283 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7285 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7286 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7288 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7289 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7291 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7292 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7294 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7295 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7297 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7299 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7300 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7309 info->element_width = elsize;
7310 info->mvn = emvn != 0;
7311 info->shift = eshift;
7313 unsigned HOST_WIDE_INT imm = 0;
7315 if (immtype >= 12 && immtype <= 15)
7318 /* Un-invert bytes of recognized vector, if necessary. */
7320 for (i = 0; i < idx; i++)
7321 bytes[i] ^= invmask;
7325 /* FIXME: Broken on 32-bit H_W_I hosts. */
7326 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7328 for (i = 0; i < 8; i++)
7329 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7330 << (i * BITS_PER_UNIT);
7333 info->value = GEN_INT (imm);
7337 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7338 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7340 /* Construct 'abcdefgh' because the assembler cannot handle
7341 generic constants. */
7344 imm = (imm >> info->shift) & 0xff;
7345 info->value = GEN_INT (imm);
7354 aarch64_const_vec_all_same_int_p (rtx x,
7355 HOST_WIDE_INT minval,
7356 HOST_WIDE_INT maxval)
7358 HOST_WIDE_INT firstval;
7361 if (GET_CODE (x) != CONST_VECTOR
7362 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7365 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7366 if (firstval < minval || firstval > maxval)
7369 count = CONST_VECTOR_NUNITS (x);
7370 for (i = 1; i < count; i++)
7371 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7377 /* Check of immediate shift constants are within range. */
7379 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7381 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7383 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7385 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7388 /* Return true if X is a uniform vector where all elements
7389 are either the floating-point constant 0.0 or the
7390 integer constant 0. */
7392 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7394 return x == CONST0_RTX (mode);
7398 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7400 HOST_WIDE_INT imm = INTVAL (x);
7403 for (i = 0; i < 8; i++)
7405 unsigned int byte = imm & 0xff;
7406 if (byte != 0xff && byte != 0)
7415 aarch64_mov_operand_p (rtx x,
7416 enum aarch64_symbol_context context,
7417 enum machine_mode mode)
7419 if (GET_CODE (x) == HIGH
7420 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7423 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7426 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7429 return aarch64_classify_symbolic_expression (x, context)
7430 == SYMBOL_TINY_ABSOLUTE;
7433 /* Return a const_int vector of VAL. */
7435 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7437 int nunits = GET_MODE_NUNITS (mode);
7438 rtvec v = rtvec_alloc (nunits);
7441 for (i=0; i < nunits; i++)
7442 RTVEC_ELT (v, i) = GEN_INT (val);
7444 return gen_rtx_CONST_VECTOR (mode, v);
7447 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7450 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7452 enum machine_mode vmode;
7454 gcc_assert (!VECTOR_MODE_P (mode));
7455 vmode = aarch64_preferred_simd_mode (mode);
7456 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7457 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7460 /* Construct and return a PARALLEL RTX vector. */
7462 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7464 int nunits = GET_MODE_NUNITS (mode);
7465 rtvec v = rtvec_alloc (nunits / 2);
7466 int base = high ? nunits / 2 : 0;
7470 for (i=0; i < nunits / 2; i++)
7471 RTVEC_ELT (v, i) = GEN_INT (base + i);
7473 t1 = gen_rtx_PARALLEL (mode, v);
7477 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7478 HIGH (exclusive). */
7480 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7483 gcc_assert (GET_CODE (operand) == CONST_INT);
7484 lane = INTVAL (operand);
7486 if (lane < low || lane >= high)
7487 error ("lane out of range");
7491 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7493 gcc_assert (GET_CODE (operand) == CONST_INT);
7494 HOST_WIDE_INT lane = INTVAL (operand);
7496 if (lane < low || lane >= high)
7497 error ("constant out of range");
7500 /* Emit code to reinterpret one AdvSIMD type as another,
7501 without altering bits. */
7503 aarch64_simd_reinterpret (rtx dest, rtx src)
7505 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7508 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7511 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7512 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7515 rtx mem = gen_rtx_MEM (mode, destaddr);
7516 rtx tmp1 = gen_reg_rtx (mode);
7517 rtx tmp2 = gen_reg_rtx (mode);
7519 emit_insn (intfn (tmp1, op1, tmp2));
7521 emit_move_insn (mem, tmp1);
7522 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7523 emit_move_insn (mem, tmp2);
7526 /* Return TRUE if OP is a valid vector addressing mode. */
7528 aarch64_simd_mem_operand_p (rtx op)
7530 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7531 || GET_CODE (XEXP (op, 0)) == REG);
7534 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7535 not to early-clobber SRC registers in the process.
7537 We assume that the operands described by SRC and DEST represent a
7538 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7539 number of components into which the copy has been decomposed. */
7541 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7542 rtx *src, unsigned int count)
7546 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7547 || REGNO (operands[0]) < REGNO (operands[1]))
7549 for (i = 0; i < count; i++)
7551 operands[2 * i] = dest[i];
7552 operands[2 * i + 1] = src[i];
7557 for (i = 0; i < count; i++)
7559 operands[2 * i] = dest[count - i - 1];
7560 operands[2 * i + 1] = src[count - i - 1];
7565 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7566 one of VSTRUCT modes: OI, CI or XI. */
7568 aarch64_simd_attr_length_move (rtx insn)
7570 enum machine_mode mode;
7572 extract_insn_cached (insn);
7574 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7576 mode = GET_MODE (recog_data.operand[0]);
7592 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7593 alignment of a vector to 128 bits. */
7594 static HOST_WIDE_INT
7595 aarch64_simd_vector_alignment (const_tree type)
7597 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7598 return MIN (align, 128);
7601 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7603 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7608 /* We guarantee alignment for vectors up to 128-bits. */
7609 if (tree_int_cst_compare (TYPE_SIZE (type),
7610 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7613 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7617 /* If VALS is a vector constant that can be loaded into a register
7618 using DUP, generate instructions to do so and return an RTX to
7619 assign to the register. Otherwise return NULL_RTX. */
7621 aarch64_simd_dup_constant (rtx vals)
7623 enum machine_mode mode = GET_MODE (vals);
7624 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7625 int n_elts = GET_MODE_NUNITS (mode);
7626 bool all_same = true;
7630 if (GET_CODE (vals) != CONST_VECTOR)
7633 for (i = 1; i < n_elts; ++i)
7635 x = CONST_VECTOR_ELT (vals, i);
7636 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7643 /* We can load this constant by using DUP and a constant in a
7644 single ARM register. This will be cheaper than a vector
7646 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7647 return gen_rtx_VEC_DUPLICATE (mode, x);
7651 /* Generate code to load VALS, which is a PARALLEL containing only
7652 constants (for vec_init) or CONST_VECTOR, efficiently into a
7653 register. Returns an RTX to copy into the register, or NULL_RTX
7654 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7656 aarch64_simd_make_constant (rtx vals)
7658 enum machine_mode mode = GET_MODE (vals);
7660 rtx const_vec = NULL_RTX;
7661 int n_elts = GET_MODE_NUNITS (mode);
7665 if (GET_CODE (vals) == CONST_VECTOR)
7667 else if (GET_CODE (vals) == PARALLEL)
7669 /* A CONST_VECTOR must contain only CONST_INTs and
7670 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7671 Only store valid constants in a CONST_VECTOR. */
7672 for (i = 0; i < n_elts; ++i)
7674 rtx x = XVECEXP (vals, 0, i);
7675 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7678 if (n_const == n_elts)
7679 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7684 if (const_vec != NULL_RTX
7685 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7686 /* Load using MOVI/MVNI. */
7688 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7689 /* Loaded using DUP. */
7691 else if (const_vec != NULL_RTX)
7692 /* Load from constant pool. We can not take advantage of single-cycle
7693 LD1 because we need a PC-relative addressing mode. */
7696 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7697 We can not construct an initializer. */
7702 aarch64_expand_vector_init (rtx target, rtx vals)
7704 enum machine_mode mode = GET_MODE (target);
7705 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7706 int n_elts = GET_MODE_NUNITS (mode);
7707 int n_var = 0, one_var = -1;
7708 bool all_same = true;
7712 x = XVECEXP (vals, 0, 0);
7713 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7714 n_var = 1, one_var = 0;
7716 for (i = 1; i < n_elts; ++i)
7718 x = XVECEXP (vals, 0, i);
7719 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7720 ++n_var, one_var = i;
7722 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7728 rtx constant = aarch64_simd_make_constant (vals);
7729 if (constant != NULL_RTX)
7731 emit_move_insn (target, constant);
7736 /* Splat a single non-constant element if we can. */
7739 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7740 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7744 /* One field is non-constant. Load constant then overwrite varying
7745 field. This is more efficient than using the stack. */
7748 rtx copy = copy_rtx (vals);
7749 rtx index = GEN_INT (one_var);
7750 enum insn_code icode;
7752 /* Load constant part of vector, substitute neighboring value for
7754 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7755 aarch64_expand_vector_init (target, copy);
7757 /* Insert variable. */
7758 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7759 icode = optab_handler (vec_set_optab, mode);
7760 gcc_assert (icode != CODE_FOR_nothing);
7761 emit_insn (GEN_FCN (icode) (target, x, index));
7765 /* Construct the vector in memory one field at a time
7766 and load the whole vector. */
7767 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7768 for (i = 0; i < n_elts; i++)
7769 emit_move_insn (adjust_address_nv (mem, inner_mode,
7770 i * GET_MODE_SIZE (inner_mode)),
7771 XVECEXP (vals, 0, i));
7772 emit_move_insn (target, mem);
7776 static unsigned HOST_WIDE_INT
7777 aarch64_shift_truncation_mask (enum machine_mode mode)
7780 (aarch64_vector_mode_supported_p (mode)
7781 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7784 #ifndef TLS_SECTION_ASM_FLAG
7785 #define TLS_SECTION_ASM_FLAG 'T'
7789 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7790 tree decl ATTRIBUTE_UNUSED)
7792 char flagchars[10], *f = flagchars;
7794 /* If we have already declared this section, we can use an
7795 abbreviated form to switch back to it -- unless this section is
7796 part of a COMDAT groups, in which case GAS requires the full
7797 declaration every time. */
7798 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7799 && (flags & SECTION_DECLARED))
7801 fprintf (asm_out_file, "\t.section\t%s\n", name);
7805 if (!(flags & SECTION_DEBUG))
7807 if (flags & SECTION_WRITE)
7809 if (flags & SECTION_CODE)
7811 if (flags & SECTION_SMALL)
7813 if (flags & SECTION_MERGE)
7815 if (flags & SECTION_STRINGS)
7817 if (flags & SECTION_TLS)
7818 *f++ = TLS_SECTION_ASM_FLAG;
7819 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7823 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7825 if (!(flags & SECTION_NOTYPE))
7830 if (flags & SECTION_BSS)
7835 #ifdef TYPE_OPERAND_FMT
7836 format = "," TYPE_OPERAND_FMT;
7841 fprintf (asm_out_file, format, type);
7843 if (flags & SECTION_ENTSIZE)
7844 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7845 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7847 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7848 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7850 fprintf (asm_out_file, ",%s,comdat",
7851 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7855 putc ('\n', asm_out_file);
7858 /* Select a format to encode pointers in exception handling data. */
7860 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7863 switch (aarch64_cmodel)
7865 case AARCH64_CMODEL_TINY:
7866 case AARCH64_CMODEL_TINY_PIC:
7867 case AARCH64_CMODEL_SMALL:
7868 case AARCH64_CMODEL_SMALL_PIC:
7869 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7871 type = DW_EH_PE_sdata4;
7874 /* No assumptions here. 8-byte relocs required. */
7875 type = DW_EH_PE_sdata8;
7878 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7881 /* Emit load exclusive. */
7884 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7885 rtx mem, rtx model_rtx)
7887 rtx (*gen) (rtx, rtx, rtx);
7891 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7892 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7893 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7894 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7899 emit_insn (gen (rval, mem, model_rtx));
7902 /* Emit store exclusive. */
7905 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7906 rtx rval, rtx mem, rtx model_rtx)
7908 rtx (*gen) (rtx, rtx, rtx, rtx);
7912 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7913 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7914 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7915 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7920 emit_insn (gen (bval, rval, mem, model_rtx));
7923 /* Mark the previous jump instruction as unlikely. */
7926 aarch64_emit_unlikely_jump (rtx insn)
7928 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7930 insn = emit_jump_insn (insn);
7931 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7934 /* Expand a compare and swap pattern. */
7937 aarch64_expand_compare_and_swap (rtx operands[])
7939 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7940 enum machine_mode mode, cmp_mode;
7941 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7946 oldval = operands[3];
7947 newval = operands[4];
7948 is_weak = operands[5];
7949 mod_s = operands[6];
7950 mod_f = operands[7];
7951 mode = GET_MODE (mem);
7954 /* Normally the succ memory model must be stronger than fail, but in the
7955 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7956 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7958 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7959 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7960 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7966 /* For short modes, we're going to perform the comparison in SImode,
7967 so do the zero-extension now. */
7969 rval = gen_reg_rtx (SImode);
7970 oldval = convert_modes (SImode, mode, oldval, true);
7975 /* Force the value into a register if needed. */
7976 if (!aarch64_plus_operand (oldval, mode))
7977 oldval = force_reg (cmp_mode, oldval);
7986 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7987 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7988 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7989 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7994 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7996 if (mode == QImode || mode == HImode)
7997 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7999 x = gen_rtx_REG (CCmode, CC_REGNUM);
8000 x = gen_rtx_EQ (SImode, x, const0_rtx);
8001 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8004 /* Split a compare and swap pattern. */
8007 aarch64_split_compare_and_swap (rtx operands[])
8009 rtx rval, mem, oldval, newval, scratch;
8010 enum machine_mode mode;
8012 rtx label1, label2, x, cond;
8016 oldval = operands[2];
8017 newval = operands[3];
8018 is_weak = (operands[4] != const0_rtx);
8019 scratch = operands[7];
8020 mode = GET_MODE (mem);
8025 label1 = gen_label_rtx ();
8026 emit_label (label1);
8028 label2 = gen_label_rtx ();
8030 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8032 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8033 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8034 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8035 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8036 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8038 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8042 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8043 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8044 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8045 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8049 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8050 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8051 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8054 emit_label (label2);
8057 /* Split an atomic operation. */
8060 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8061 rtx value, rtx model_rtx, rtx cond)
8063 enum machine_mode mode = GET_MODE (mem);
8064 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8067 label = gen_label_rtx ();
8071 new_out = gen_lowpart (wmode, new_out);
8073 old_out = gen_lowpart (wmode, old_out);
8076 value = simplify_gen_subreg (wmode, value, mode, 0);
8078 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8087 x = gen_rtx_AND (wmode, old_out, value);
8088 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8089 x = gen_rtx_NOT (wmode, new_out);
8090 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8094 if (CONST_INT_P (value))
8096 value = GEN_INT (-INTVAL (value));
8102 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8103 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8107 aarch64_emit_store_exclusive (mode, cond, mem,
8108 gen_lowpart (mode, new_out), model_rtx);
8110 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8111 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8112 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8113 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8117 aarch64_print_extension (void)
8119 const struct aarch64_option_extension *opt = NULL;
8121 for (opt = all_extensions; opt->name != NULL; opt++)
8122 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8123 asm_fprintf (asm_out_file, "+%s", opt->name);
8125 asm_fprintf (asm_out_file, "\n");
8129 aarch64_start_file (void)
8133 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8134 aarch64_print_extension ();
8136 else if (selected_cpu)
8138 const char *truncated_name
8139 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8140 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8141 aarch64_print_extension ();
8143 default_file_start();
8146 /* Target hook for c_mode_for_suffix. */
8147 static enum machine_mode
8148 aarch64_c_mode_for_suffix (char suffix)
8156 /* We can only represent floating point constants which will fit in
8157 "quarter-precision" values. These values are characterised by
8158 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8161 (-1)^s * (n/16) * 2^r
8164 's' is the sign bit.
8165 'n' is an integer in the range 16 <= n <= 31.
8166 'r' is an integer in the range -3 <= r <= 4. */
8168 /* Return true iff X can be represented by a quarter-precision
8169 floating point immediate operand X. Note, we cannot represent 0.0. */
8171 aarch64_float_const_representable_p (rtx x)
8173 /* This represents our current view of how many bits
8174 make up the mantissa. */
8175 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8177 unsigned HOST_WIDE_INT mantissa, mask;
8178 HOST_WIDE_INT m1, m2;
8179 REAL_VALUE_TYPE r, m;
8181 if (!CONST_DOUBLE_P (x))
8184 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8186 /* We cannot represent infinities, NaNs or +/-zero. We won't
8187 know if we have +zero until we analyse the mantissa, but we
8188 can reject the other invalid values. */
8189 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8190 || REAL_VALUE_MINUS_ZERO (r))
8193 /* Extract exponent. */
8194 r = real_value_abs (&r);
8195 exponent = REAL_EXP (&r);
8197 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8198 highest (sign) bit, with a fixed binary point at bit point_pos.
8199 m1 holds the low part of the mantissa, m2 the high part.
8200 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8201 bits for the mantissa, this can fail (low bits will be lost). */
8202 real_ldexp (&m, &r, point_pos - exponent);
8203 REAL_VALUE_TO_INT (&m1, &m2, m);
8205 /* If the low part of the mantissa has bits set we cannot represent
8209 /* We have rejected the lower HOST_WIDE_INT, so update our
8210 understanding of how many bits lie in the mantissa and
8211 look only at the high HOST_WIDE_INT. */
8213 point_pos -= HOST_BITS_PER_WIDE_INT;
8215 /* We can only represent values with a mantissa of the form 1.xxxx. */
8216 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8217 if ((mantissa & mask) != 0)
8220 /* Having filtered unrepresentable values, we may now remove all
8221 but the highest 5 bits. */
8222 mantissa >>= point_pos - 5;
8224 /* We cannot represent the value 0.0, so reject it. This is handled
8229 /* Then, as bit 4 is always set, we can mask it off, leaving
8230 the mantissa in the range [0, 15]. */
8231 mantissa &= ~(1 << 4);
8232 gcc_assert (mantissa <= 15);
8234 /* GCC internally does not use IEEE754-like encoding (where normalized
8235 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8236 Our mantissa values are shifted 4 places to the left relative to
8237 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8238 by 5 places to correct for GCC's representation. */
8239 exponent = 5 - exponent;
8241 return (exponent >= 0 && exponent <= 7);
8245 aarch64_output_simd_mov_immediate (rtx const_vector,
8246 enum machine_mode mode,
8250 static char templ[40];
8251 const char *mnemonic;
8252 const char *shift_op;
8253 unsigned int lane_count = 0;
8256 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8258 /* This will return true to show const_vector is legal for use as either
8259 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8260 also update INFO to show how the immediate should be generated. */
8261 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8262 gcc_assert (is_valid);
8264 element_char = sizetochar (info.element_width);
8265 lane_count = width / info.element_width;
8267 mode = GET_MODE_INNER (mode);
8268 if (mode == SFmode || mode == DFmode)
8270 gcc_assert (info.shift == 0 && ! info.mvn);
8271 if (aarch64_float_const_zero_rtx_p (info.value))
8272 info.value = GEN_INT (0);
8277 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8278 char float_buf[buf_size] = {'\0'};
8279 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8282 if (lane_count == 1)
8283 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8285 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8286 lane_count, element_char, float_buf);
8291 mnemonic = info.mvn ? "mvni" : "movi";
8292 shift_op = info.msl ? "msl" : "lsl";
8294 if (lane_count == 1)
8295 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8296 mnemonic, UINTVAL (info.value));
8297 else if (info.shift)
8298 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8299 ", %s %d", mnemonic, lane_count, element_char,
8300 UINTVAL (info.value), shift_op, info.shift);
8302 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8303 mnemonic, lane_count, element_char, UINTVAL (info.value));
8308 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8309 enum machine_mode mode)
8311 enum machine_mode vmode;
8313 gcc_assert (!VECTOR_MODE_P (mode));
8314 vmode = aarch64_simd_container_mode (mode, 64);
8315 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8316 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8319 /* Split operands into moves from op[1] + op[2] into op[0]. */
8322 aarch64_split_combinev16qi (rtx operands[3])
8324 unsigned int dest = REGNO (operands[0]);
8325 unsigned int src1 = REGNO (operands[1]);
8326 unsigned int src2 = REGNO (operands[2]);
8327 enum machine_mode halfmode = GET_MODE (operands[1]);
8328 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8331 gcc_assert (halfmode == V16QImode);
8333 if (src1 == dest && src2 == dest + halfregs)
8335 /* No-op move. Can't split to nothing; emit something. */
8336 emit_note (NOTE_INSN_DELETED);
8340 /* Preserve register attributes for variable tracking. */
8341 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8342 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8343 GET_MODE_SIZE (halfmode));
8345 /* Special case of reversed high/low parts. */
8346 if (reg_overlap_mentioned_p (operands[2], destlo)
8347 && reg_overlap_mentioned_p (operands[1], desthi))
8349 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8350 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8351 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8353 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8355 /* Try to avoid unnecessary moves if part of the result
8356 is in the right place already. */
8358 emit_move_insn (destlo, operands[1]);
8359 if (src2 != dest + halfregs)
8360 emit_move_insn (desthi, operands[2]);
8364 if (src2 != dest + halfregs)
8365 emit_move_insn (desthi, operands[2]);
8367 emit_move_insn (destlo, operands[1]);
8371 /* vec_perm support. */
8373 #define MAX_VECT_LEN 16
8375 struct expand_vec_perm_d
8377 rtx target, op0, op1;
8378 unsigned char perm[MAX_VECT_LEN];
8379 enum machine_mode vmode;
8385 /* Generate a variable permutation. */
8388 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8390 enum machine_mode vmode = GET_MODE (target);
8391 bool one_vector_p = rtx_equal_p (op0, op1);
8393 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8394 gcc_checking_assert (GET_MODE (op0) == vmode);
8395 gcc_checking_assert (GET_MODE (op1) == vmode);
8396 gcc_checking_assert (GET_MODE (sel) == vmode);
8397 gcc_checking_assert (TARGET_SIMD);
8401 if (vmode == V8QImode)
8403 /* Expand the argument to a V16QI mode by duplicating it. */
8404 rtx pair = gen_reg_rtx (V16QImode);
8405 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8406 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8410 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8417 if (vmode == V8QImode)
8419 pair = gen_reg_rtx (V16QImode);
8420 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8421 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8425 pair = gen_reg_rtx (OImode);
8426 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8427 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8433 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8435 enum machine_mode vmode = GET_MODE (target);
8436 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8437 bool one_vector_p = rtx_equal_p (op0, op1);
8438 rtx rmask[MAX_VECT_LEN], mask;
8440 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8442 /* The TBL instruction does not use a modulo index, so we must take care
8443 of that ourselves. */
8444 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8445 for (i = 0; i < nelt; ++i)
8447 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8448 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8450 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8453 /* Recognize patterns suitable for the TRN instructions. */
8455 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8457 unsigned int i, odd, mask, nelt = d->nelt;
8458 rtx out, in0, in1, x;
8459 rtx (*gen) (rtx, rtx, rtx);
8460 enum machine_mode vmode = d->vmode;
8462 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8465 /* Note that these are little-endian tests.
8466 We correct for big-endian later. */
8467 if (d->perm[0] == 0)
8469 else if (d->perm[0] == 1)
8473 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8475 for (i = 0; i < nelt; i += 2)
8477 if (d->perm[i] != i + odd)
8479 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8489 if (BYTES_BIG_ENDIAN)
8491 x = in0, in0 = in1, in1 = x;
8500 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8501 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8502 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8503 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8504 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8505 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8506 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8507 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8508 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8509 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8518 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8519 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8520 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8521 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8522 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8523 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8524 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8525 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8526 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8527 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8533 emit_insn (gen (out, in0, in1));
8537 /* Recognize patterns suitable for the UZP instructions. */
8539 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8541 unsigned int i, odd, mask, nelt = d->nelt;
8542 rtx out, in0, in1, x;
8543 rtx (*gen) (rtx, rtx, rtx);
8544 enum machine_mode vmode = d->vmode;
8546 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8549 /* Note that these are little-endian tests.
8550 We correct for big-endian later. */
8551 if (d->perm[0] == 0)
8553 else if (d->perm[0] == 1)
8557 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8559 for (i = 0; i < nelt; i++)
8561 unsigned elt = (i * 2 + odd) & mask;
8562 if (d->perm[i] != elt)
8572 if (BYTES_BIG_ENDIAN)
8574 x = in0, in0 = in1, in1 = x;
8583 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8584 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8585 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8586 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8587 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8588 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8589 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8590 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8591 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8592 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8601 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8602 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8603 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8604 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8605 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8606 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8607 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8608 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8609 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8610 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8616 emit_insn (gen (out, in0, in1));
8620 /* Recognize patterns suitable for the ZIP instructions. */
8622 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8624 unsigned int i, high, mask, nelt = d->nelt;
8625 rtx out, in0, in1, x;
8626 rtx (*gen) (rtx, rtx, rtx);
8627 enum machine_mode vmode = d->vmode;
8629 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8632 /* Note that these are little-endian tests.
8633 We correct for big-endian later. */
8635 if (d->perm[0] == high)
8638 else if (d->perm[0] == 0)
8642 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8644 for (i = 0; i < nelt / 2; i++)
8646 unsigned elt = (i + high) & mask;
8647 if (d->perm[i * 2] != elt)
8649 elt = (elt + nelt) & mask;
8650 if (d->perm[i * 2 + 1] != elt)
8660 if (BYTES_BIG_ENDIAN)
8662 x = in0, in0 = in1, in1 = x;
8671 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8672 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8673 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8674 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8675 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8676 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8677 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8678 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8679 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8680 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8689 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8690 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8691 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8692 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8693 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8694 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8695 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8696 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8697 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8698 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8704 emit_insn (gen (out, in0, in1));
8709 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8711 rtx (*gen) (rtx, rtx, rtx);
8712 rtx out = d->target;
8714 enum machine_mode vmode = d->vmode;
8715 unsigned int i, elt, nelt = d->nelt;
8718 /* TODO: This may not be big-endian safe. */
8719 if (BYTES_BIG_ENDIAN)
8723 for (i = 1; i < nelt; i++)
8725 if (elt != d->perm[i])
8729 /* The generic preparation in aarch64_expand_vec_perm_const_1
8730 swaps the operand order and the permute indices if it finds
8731 d->perm[0] to be in the second operand. Thus, we can always
8732 use d->op0 and need not do any extra arithmetic to get the
8733 correct lane number. */
8735 lane = GEN_INT (elt);
8739 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8740 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8741 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8742 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8743 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8744 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8745 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8746 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8747 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8748 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8753 emit_insn (gen (out, in0, lane));
8758 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8760 rtx rperm[MAX_VECT_LEN], sel;
8761 enum machine_mode vmode = d->vmode;
8762 unsigned int i, nelt = d->nelt;
8764 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8765 numbering of elements for big-endian, we must reverse the order. */
8766 if (BYTES_BIG_ENDIAN)
8772 /* Generic code will try constant permutation twice. Once with the
8773 original mode and again with the elements lowered to QImode.
8774 So wait and don't do the selector expansion ourselves. */
8775 if (vmode != V8QImode && vmode != V16QImode)
8778 for (i = 0; i < nelt; ++i)
8779 rperm[i] = GEN_INT (d->perm[i]);
8780 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8781 sel = force_reg (vmode, sel);
8783 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8788 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8790 /* The pattern matching functions above are written to look for a small
8791 number to begin the sequence (0, 1, N/2). If we begin with an index
8792 from the second operand, we can swap the operands. */
8793 if (d->perm[0] >= d->nelt)
8795 unsigned i, nelt = d->nelt;
8798 for (i = 0; i < nelt; ++i)
8799 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8808 if (aarch64_evpc_zip (d))
8810 else if (aarch64_evpc_uzp (d))
8812 else if (aarch64_evpc_trn (d))
8814 else if (aarch64_evpc_dup (d))
8816 return aarch64_evpc_tbl (d);
8821 /* Expand a vec_perm_const pattern. */
8824 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8826 struct expand_vec_perm_d d;
8833 d.vmode = GET_MODE (target);
8834 gcc_assert (VECTOR_MODE_P (d.vmode));
8835 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8836 d.testing_p = false;
8838 for (i = which = 0; i < nelt; ++i)
8840 rtx e = XVECEXP (sel, 0, i);
8841 int ei = INTVAL (e) & (2 * nelt - 1);
8842 which |= (ei < nelt ? 1 : 2);
8852 d.one_vector_p = false;
8853 if (!rtx_equal_p (op0, op1))
8856 /* The elements of PERM do not suggest that only the first operand
8857 is used, but both operands are identical. Allow easier matching
8858 of the permutation by folding the permutation into the single
8862 for (i = 0; i < nelt; ++i)
8863 d.perm[i] &= nelt - 1;
8865 d.one_vector_p = true;
8870 d.one_vector_p = true;
8874 return aarch64_expand_vec_perm_const_1 (&d);
8878 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8879 const unsigned char *sel)
8881 struct expand_vec_perm_d d;
8882 unsigned int i, nelt, which;
8886 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8888 memcpy (d.perm, sel, nelt);
8890 /* Calculate whether all elements are in one vector. */
8891 for (i = which = 0; i < nelt; ++i)
8893 unsigned char e = d.perm[i];
8894 gcc_assert (e < 2 * nelt);
8895 which |= (e < nelt ? 1 : 2);
8898 /* If all elements are from the second vector, reindex as if from the
8901 for (i = 0; i < nelt; ++i)
8904 /* Check whether the mask can be applied to a single vector. */
8905 d.one_vector_p = (which != 3);
8907 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8908 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8909 if (!d.one_vector_p)
8910 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8913 ret = aarch64_expand_vec_perm_const_1 (&d);
8919 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8921 aarch64_cannot_change_mode_class (enum machine_mode from,
8922 enum machine_mode to,
8923 enum reg_class rclass)
8925 /* Full-reg subregs are allowed on general regs or any class if they are
8927 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8928 || !reg_classes_intersect_p (FP_REGS, rclass))
8931 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8932 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8933 2. Scalar to Scalar for integer modes or same size float modes.
8934 3. Vector to Vector modes. */
8935 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8937 if (aarch64_vector_mode_supported_p (from)
8938 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8941 if (GET_MODE_NUNITS (from) == 1
8942 && GET_MODE_NUNITS (to) == 1
8943 && (GET_MODE_CLASS (from) == MODE_INT
8947 if (aarch64_vector_mode_supported_p (from)
8948 && aarch64_vector_mode_supported_p (to))
8955 #undef TARGET_ADDRESS_COST
8956 #define TARGET_ADDRESS_COST aarch64_address_cost
8958 /* This hook will determines whether unnamed bitfields affect the alignment
8959 of the containing structure. The hook returns true if the structure
8960 should inherit the alignment requirements of an unnamed bitfield's
8962 #undef TARGET_ALIGN_ANON_BITFIELD
8963 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8965 #undef TARGET_ASM_ALIGNED_DI_OP
8966 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8968 #undef TARGET_ASM_ALIGNED_HI_OP
8969 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8971 #undef TARGET_ASM_ALIGNED_SI_OP
8972 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8974 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8975 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8976 hook_bool_const_tree_hwi_hwi_const_tree_true
8978 #undef TARGET_ASM_FILE_START
8979 #define TARGET_ASM_FILE_START aarch64_start_file
8981 #undef TARGET_ASM_OUTPUT_MI_THUNK
8982 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8984 #undef TARGET_ASM_SELECT_RTX_SECTION
8985 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8987 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8988 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8990 #undef TARGET_BUILD_BUILTIN_VA_LIST
8991 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8993 #undef TARGET_CALLEE_COPIES
8994 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8996 #undef TARGET_CAN_ELIMINATE
8997 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8999 #undef TARGET_CANNOT_FORCE_CONST_MEM
9000 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9002 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9003 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9005 /* Only the least significant bit is used for initialization guard
9007 #undef TARGET_CXX_GUARD_MASK_BIT
9008 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9010 #undef TARGET_C_MODE_FOR_SUFFIX
9011 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9013 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9014 #undef TARGET_DEFAULT_TARGET_FLAGS
9015 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9018 #undef TARGET_CLASS_MAX_NREGS
9019 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9021 #undef TARGET_BUILTIN_DECL
9022 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9024 #undef TARGET_EXPAND_BUILTIN
9025 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9027 #undef TARGET_EXPAND_BUILTIN_VA_START
9028 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9030 #undef TARGET_FOLD_BUILTIN
9031 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9033 #undef TARGET_FUNCTION_ARG
9034 #define TARGET_FUNCTION_ARG aarch64_function_arg
9036 #undef TARGET_FUNCTION_ARG_ADVANCE
9037 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9039 #undef TARGET_FUNCTION_ARG_BOUNDARY
9040 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9042 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9043 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9045 #undef TARGET_FUNCTION_VALUE
9046 #define TARGET_FUNCTION_VALUE aarch64_function_value
9048 #undef TARGET_FUNCTION_VALUE_REGNO_P
9049 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9051 #undef TARGET_FRAME_POINTER_REQUIRED
9052 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9054 #undef TARGET_GIMPLE_FOLD_BUILTIN
9055 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9057 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9058 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9060 #undef TARGET_INIT_BUILTINS
9061 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9063 #undef TARGET_LEGITIMATE_ADDRESS_P
9064 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9066 #undef TARGET_LEGITIMATE_CONSTANT_P
9067 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9069 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9070 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9073 #define TARGET_LRA_P aarch64_lra_p
9075 #undef TARGET_MANGLE_TYPE
9076 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9078 #undef TARGET_MEMORY_MOVE_COST
9079 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9081 #undef TARGET_MUST_PASS_IN_STACK
9082 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9084 /* This target hook should return true if accesses to volatile bitfields
9085 should use the narrowest mode possible. It should return false if these
9086 accesses should use the bitfield container type. */
9087 #undef TARGET_NARROW_VOLATILE_BITFIELD
9088 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9090 #undef TARGET_OPTION_OVERRIDE
9091 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9093 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9094 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9095 aarch64_override_options_after_change
9097 #undef TARGET_PASS_BY_REFERENCE
9098 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9100 #undef TARGET_PREFERRED_RELOAD_CLASS
9101 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9103 #undef TARGET_SECONDARY_RELOAD
9104 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9106 #undef TARGET_SHIFT_TRUNCATION_MASK
9107 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9109 #undef TARGET_SETUP_INCOMING_VARARGS
9110 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9112 #undef TARGET_STRUCT_VALUE_RTX
9113 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9115 #undef TARGET_REGISTER_MOVE_COST
9116 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9118 #undef TARGET_RETURN_IN_MEMORY
9119 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9121 #undef TARGET_RETURN_IN_MSB
9122 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9124 #undef TARGET_RTX_COSTS
9125 #define TARGET_RTX_COSTS aarch64_rtx_costs
9127 #undef TARGET_SCHED_ISSUE_RATE
9128 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9130 #undef TARGET_TRAMPOLINE_INIT
9131 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9133 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9134 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9136 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9137 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9139 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9140 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9142 #undef TARGET_VECTORIZE_ADD_STMT_COST
9143 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9145 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9146 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9147 aarch64_builtin_vectorization_cost
9149 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9150 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9152 #undef TARGET_VECTORIZE_BUILTINS
9153 #define TARGET_VECTORIZE_BUILTINS
9155 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9156 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9157 aarch64_builtin_vectorized_function
9159 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9160 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9161 aarch64_autovectorize_vector_sizes
9163 /* Section anchor support. */
9165 #undef TARGET_MIN_ANCHOR_OFFSET
9166 #define TARGET_MIN_ANCHOR_OFFSET -256
9168 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9169 byte offset; we can do much more for larger data types, but have no way
9170 to determine the size of the access. We assume accesses are aligned. */
9171 #undef TARGET_MAX_ANCHOR_OFFSET
9172 #define TARGET_MAX_ANCHOR_OFFSET 4095
9174 #undef TARGET_VECTOR_ALIGNMENT
9175 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9177 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9178 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9179 aarch64_simd_vector_alignment_reachable
9181 /* vec_perm support. */
9183 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9184 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9185 aarch64_vectorize_vec_perm_const_ok
9188 #undef TARGET_FIXED_CONDITION_CODE_REGS
9189 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9191 #undef TARGET_RELAXED_ORDERING
9192 #define TARGET_RELAXED_ORDERING true
9194 #undef TARGET_FLAGS_REGNUM
9195 #define TARGET_FLAGS_REGNUM CC_REGNUM
9197 struct gcc_target targetm = TARGET_INITIALIZER;
9199 #include "gt-aarch64.h"