1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 enum machine_mode *, int *,
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
303 const char *const name;
304 enum aarch64_processor core;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
479 /* Return true if calls to DECL should be treated as
480 long-calls (ie called via a register). */
482 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
487 /* Return true if calls to symbol-ref SYM should be treated as
488 long-calls (ie called via a register). */
490 aarch64_is_long_call_p (rtx sym)
492 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
495 /* Return true if the offsets to a zero/sign-extract operation
496 represent an expression that matches an extend operation. The
497 operands represent the paramters from
499 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
501 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
504 HOST_WIDE_INT mult_val, extract_val;
506 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
509 mult_val = INTVAL (mult_imm);
510 extract_val = INTVAL (extract_imm);
513 && extract_val < GET_MODE_BITSIZE (mode)
514 && exact_log2 (extract_val & ~7) > 0
515 && (extract_val & 7) <= 4
516 && mult_val == (1 << (extract_val & 7)))
522 /* Emit an insn that's a simple single-set. Both the operands must be
523 known to be valid. */
525 emit_set_insn (rtx x, rtx y)
527 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
530 /* X and Y are two things to compare using CODE. Emit the compare insn and
531 return the rtx for register 0 in the proper mode. */
533 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
535 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
536 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
538 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
542 /* Build the SYMBOL_REF for __tls_get_addr. */
544 static GTY(()) rtx tls_get_addr_libfunc;
547 aarch64_tls_get_addr (void)
549 if (!tls_get_addr_libfunc)
550 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
551 return tls_get_addr_libfunc;
554 /* Return the TLS model to use for ADDR. */
556 static enum tls_model
557 tls_symbolic_operand_type (rtx addr)
559 enum tls_model tls_kind = TLS_MODEL_NONE;
562 if (GET_CODE (addr) == CONST)
564 split_const (addr, &sym, &addend);
565 if (GET_CODE (sym) == SYMBOL_REF)
566 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
568 else if (GET_CODE (addr) == SYMBOL_REF)
569 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
574 /* We'll allow lo_sum's in addresses in our legitimate addresses
575 so that combine would take care of combining addresses where
576 necessary, but for generation purposes, we'll generate the address
579 tmp = hi (symbol_ref); adrp x1, foo
580 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
584 adrp x1, :got:foo adrp tmp, :tlsgd:foo
585 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
589 Load TLS symbol, depending on TLS mechanism and TLS access model.
591 Global Dynamic - Traditional TLS:
593 add dest, tmp, #:tlsgd_lo12:imm
596 Global Dynamic - TLS Descriptors:
597 adrp dest, :tlsdesc:imm
598 ldr tmp, [dest, #:tlsdesc_lo12:imm]
599 add dest, dest, #:tlsdesc_lo12:imm
606 adrp tmp, :gottprel:imm
607 ldr dest, [tmp, #:gottprel_lo12:imm]
612 add t0, tp, #:tprel_hi12:imm
613 add t0, #:tprel_lo12_nc:imm
617 aarch64_load_symref_appropriately (rtx dest, rtx imm,
618 enum aarch64_symbol_type type)
622 case SYMBOL_SMALL_ABSOLUTE:
624 /* In ILP32, the mode of dest can be either SImode or DImode. */
626 enum machine_mode mode = GET_MODE (dest);
628 gcc_assert (mode == Pmode || mode == ptr_mode);
630 if (can_create_pseudo_p ())
631 tmp_reg = gen_reg_rtx (mode);
633 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
634 emit_insn (gen_add_losym (dest, tmp_reg, imm));
638 case SYMBOL_TINY_ABSOLUTE:
639 emit_insn (gen_rtx_SET (Pmode, dest, imm));
642 case SYMBOL_SMALL_GOT:
644 /* In ILP32, the mode of dest can be either SImode or DImode,
645 while the got entry is always of SImode size. The mode of
646 dest depends on how dest is used: if dest is assigned to a
647 pointer (e.g. in the memory), it has SImode; it may have
648 DImode if dest is dereferenced to access the memeory.
649 This is why we have to handle three different ldr_got_small
650 patterns here (two patterns for ILP32). */
652 enum machine_mode mode = GET_MODE (dest);
654 if (can_create_pseudo_p ())
655 tmp_reg = gen_reg_rtx (mode);
657 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
658 if (mode == ptr_mode)
661 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
663 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
667 gcc_assert (mode == Pmode);
668 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
674 case SYMBOL_SMALL_TLSGD:
677 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
680 emit_call_insn (gen_tlsgd_small (result, imm));
681 insns = get_insns ();
684 RTL_CONST_CALL_P (insns) = 1;
685 emit_libcall_block (insns, dest, result, imm);
689 case SYMBOL_SMALL_TLSDESC:
691 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
694 emit_insn (gen_tlsdesc_small (imm));
695 tp = aarch64_load_tp (NULL);
696 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
697 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
701 case SYMBOL_SMALL_GOTTPREL:
703 rtx tmp_reg = gen_reg_rtx (Pmode);
704 rtx tp = aarch64_load_tp (NULL);
705 emit_insn (gen_tlsie_small (tmp_reg, imm));
706 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
707 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
711 case SYMBOL_SMALL_TPREL:
713 rtx tp = aarch64_load_tp (NULL);
714 emit_insn (gen_tlsle_small (dest, tp, imm));
715 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
719 case SYMBOL_TINY_GOT:
720 emit_insn (gen_ldr_got_tiny (dest, imm));
728 /* Emit a move from SRC to DEST. Assume that the move expanders can
729 handle all moves if !can_create_pseudo_p (). The distinction is
730 important because, unlike emit_move_insn, the move expanders know
731 how to force Pmode objects into the constant pool even when the
732 constant pool address is not itself legitimate. */
734 aarch64_emit_move (rtx dest, rtx src)
736 return (can_create_pseudo_p ()
737 ? emit_move_insn (dest, src)
738 : emit_move_insn_1 (dest, src));
741 /* Split a 128-bit move operation into two 64-bit move operations,
742 taking care to handle partial overlap of register to register
743 copies. Special cases are needed when moving between GP regs and
744 FP regs. SRC can be a register, constant or memory; DST a register
745 or memory. If either operand is memory it must not have any side
748 aarch64_split_128bit_move (rtx dst, rtx src)
753 enum machine_mode mode = GET_MODE (dst);
755 gcc_assert (mode == TImode || mode == TFmode);
756 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
757 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
759 if (REG_P (dst) && REG_P (src))
761 int src_regno = REGNO (src);
762 int dst_regno = REGNO (dst);
764 /* Handle FP <-> GP regs. */
765 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
767 src_lo = gen_lowpart (word_mode, src);
768 src_hi = gen_highpart (word_mode, src);
772 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
773 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
777 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
778 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
782 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
784 dst_lo = gen_lowpart (word_mode, dst);
785 dst_hi = gen_highpart (word_mode, dst);
789 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
790 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
794 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
795 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
801 dst_lo = gen_lowpart (word_mode, dst);
802 dst_hi = gen_highpart (word_mode, dst);
803 src_lo = gen_lowpart (word_mode, src);
804 src_hi = gen_highpart_mode (word_mode, mode, src);
806 /* At most one pairing may overlap. */
807 if (reg_overlap_mentioned_p (dst_lo, src_hi))
809 aarch64_emit_move (dst_hi, src_hi);
810 aarch64_emit_move (dst_lo, src_lo);
814 aarch64_emit_move (dst_lo, src_lo);
815 aarch64_emit_move (dst_hi, src_hi);
820 aarch64_split_128bit_move_p (rtx dst, rtx src)
822 return (! REG_P (src)
823 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
826 /* Split a complex SIMD combine. */
829 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
831 enum machine_mode src_mode = GET_MODE (src1);
832 enum machine_mode dst_mode = GET_MODE (dst);
834 gcc_assert (VECTOR_MODE_P (dst_mode));
836 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
838 rtx (*gen) (rtx, rtx, rtx);
843 gen = gen_aarch64_simd_combinev8qi;
846 gen = gen_aarch64_simd_combinev4hi;
849 gen = gen_aarch64_simd_combinev2si;
852 gen = gen_aarch64_simd_combinev2sf;
855 gen = gen_aarch64_simd_combinedi;
858 gen = gen_aarch64_simd_combinedf;
864 emit_insn (gen (dst, src1, src2));
869 /* Split a complex SIMD move. */
872 aarch64_split_simd_move (rtx dst, rtx src)
874 enum machine_mode src_mode = GET_MODE (src);
875 enum machine_mode dst_mode = GET_MODE (dst);
877 gcc_assert (VECTOR_MODE_P (dst_mode));
879 if (REG_P (dst) && REG_P (src))
881 rtx (*gen) (rtx, rtx);
883 gcc_assert (VECTOR_MODE_P (src_mode));
888 gen = gen_aarch64_split_simd_movv16qi;
891 gen = gen_aarch64_split_simd_movv8hi;
894 gen = gen_aarch64_split_simd_movv4si;
897 gen = gen_aarch64_split_simd_movv2di;
900 gen = gen_aarch64_split_simd_movv4sf;
903 gen = gen_aarch64_split_simd_movv2df;
909 emit_insn (gen (dst, src));
915 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
917 if (can_create_pseudo_p ())
918 return force_reg (mode, value);
921 x = aarch64_emit_move (x, value);
928 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
930 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
933 /* Load the full offset into a register. This
934 might be improvable in the future. */
935 high = GEN_INT (offset);
937 high = aarch64_force_temporary (mode, temp, high);
938 reg = aarch64_force_temporary (mode, temp,
939 gen_rtx_PLUS (mode, high, reg));
941 return plus_constant (mode, reg, offset);
945 aarch64_expand_mov_immediate (rtx dest, rtx imm)
947 enum machine_mode mode = GET_MODE (dest);
948 unsigned HOST_WIDE_INT mask;
951 unsigned HOST_WIDE_INT val;
954 int one_match, zero_match;
956 gcc_assert (mode == SImode || mode == DImode);
958 /* Check on what type of symbol it is. */
959 if (GET_CODE (imm) == SYMBOL_REF
960 || GET_CODE (imm) == LABEL_REF
961 || GET_CODE (imm) == CONST)
963 rtx mem, base, offset;
964 enum aarch64_symbol_type sty;
966 /* If we have (const (plus symbol offset)), separate out the offset
967 before we start classifying the symbol. */
968 split_const (imm, &base, &offset);
970 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
973 case SYMBOL_FORCE_TO_MEM:
974 if (offset != const0_rtx
975 && targetm.cannot_force_const_mem (mode, imm))
977 gcc_assert (can_create_pseudo_p ());
978 base = aarch64_force_temporary (mode, dest, base);
979 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
980 aarch64_emit_move (dest, base);
983 mem = force_const_mem (ptr_mode, imm);
985 if (mode != ptr_mode)
986 mem = gen_rtx_ZERO_EXTEND (mode, mem);
987 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
990 case SYMBOL_SMALL_TLSGD:
991 case SYMBOL_SMALL_TLSDESC:
992 case SYMBOL_SMALL_GOTTPREL:
993 case SYMBOL_SMALL_GOT:
994 case SYMBOL_TINY_GOT:
995 if (offset != const0_rtx)
997 gcc_assert(can_create_pseudo_p ());
998 base = aarch64_force_temporary (mode, dest, base);
999 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1000 aarch64_emit_move (dest, base);
1005 case SYMBOL_SMALL_TPREL:
1006 case SYMBOL_SMALL_ABSOLUTE:
1007 case SYMBOL_TINY_ABSOLUTE:
1008 aarch64_load_symref_appropriately (dest, imm, sty);
1016 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1018 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1022 if (!CONST_INT_P (imm))
1024 if (GET_CODE (imm) == HIGH)
1025 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1028 rtx mem = force_const_mem (mode, imm);
1030 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1038 /* We know we can't do this in 1 insn, and we must be able to do it
1039 in two; so don't mess around looking for sequences that don't buy
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1042 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1043 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1047 /* Remaining cases are all for DImode. */
1050 subtargets = optimize && can_create_pseudo_p ();
1056 for (i = 0; i < 64; i += 16, mask <<= 16)
1058 if ((val & mask) == 0)
1060 else if ((val & mask) == mask)
1067 for (i = 0; i < 64; i += 16, mask <<= 16)
1069 if ((val & mask) != mask)
1071 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1072 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1073 GEN_INT ((val >> i) & 0xffff)));
1080 if (zero_match == 2)
1081 goto simple_sequence;
1083 mask = 0x0ffff0000UL;
1084 for (i = 16; i < 64; i += 16, mask <<= 16)
1086 HOST_WIDE_INT comp = mask & ~(mask - 1);
1088 if (aarch64_uimm12_shift (val - (val & mask)))
1090 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1092 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1093 emit_insn (gen_adddi3 (dest, subtarget,
1094 GEN_INT (val - (val & mask))));
1097 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1099 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1101 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1102 GEN_INT ((val + comp) & mask)));
1103 emit_insn (gen_adddi3 (dest, subtarget,
1104 GEN_INT (val - ((val + comp) & mask))));
1107 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1109 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1111 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1112 GEN_INT ((val - comp) | ~mask)));
1113 emit_insn (gen_adddi3 (dest, subtarget,
1114 GEN_INT (val - ((val - comp) | ~mask))));
1117 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1119 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1121 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1122 GEN_INT (val | ~mask)));
1123 emit_insn (gen_adddi3 (dest, subtarget,
1124 GEN_INT (val - (val | ~mask))));
1129 /* See if we can do it by arithmetically combining two
1131 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1136 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1137 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1139 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1140 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1141 GEN_INT (aarch64_bitmasks[i])));
1142 emit_insn (gen_adddi3 (dest, subtarget,
1143 GEN_INT (val - aarch64_bitmasks[i])));
1147 for (j = 0; j < 64; j += 16, mask <<= 16)
1149 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1151 emit_insn (gen_rtx_SET (VOIDmode, dest,
1152 GEN_INT (aarch64_bitmasks[i])));
1153 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1154 GEN_INT ((val >> j) & 0xffff)));
1160 /* See if we can do it by logically combining two immediates. */
1161 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1163 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1167 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1168 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1170 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1171 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1172 GEN_INT (aarch64_bitmasks[i])));
1173 emit_insn (gen_iordi3 (dest, subtarget,
1174 GEN_INT (aarch64_bitmasks[j])));
1178 else if ((val & aarch64_bitmasks[i]) == val)
1182 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1183 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1186 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1187 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1188 GEN_INT (aarch64_bitmasks[j])));
1189 emit_insn (gen_anddi3 (dest, subtarget,
1190 GEN_INT (aarch64_bitmasks[i])));
1199 for (i = 0; i < 64; i += 16, mask <<= 16)
1201 if ((val & mask) != 0)
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (val & mask)));
1210 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1211 GEN_INT ((val >> i) & 0xffff)));
1217 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1219 /* Indirect calls are not currently supported. */
1223 /* Cannot tail-call to long-calls, since these are outside of the
1224 range of a branch instruction (we could handle this if we added
1225 support for indirect tail-calls. */
1226 if (aarch64_decl_is_long_call_p (decl))
1232 /* Implement TARGET_PASS_BY_REFERENCE. */
1235 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1236 enum machine_mode mode,
1238 bool named ATTRIBUTE_UNUSED)
1241 enum machine_mode dummymode;
1244 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1245 size = (mode == BLKmode && type)
1246 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1248 /* Aggregates are passed by reference based on their size. */
1249 if (type && AGGREGATE_TYPE_P (type))
1251 size = int_size_in_bytes (type);
1254 /* Variable sized arguments are always returned by reference. */
1258 /* Can this be a candidate to be passed in fp/simd register(s)? */
1259 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1264 /* Arguments which are variable sized or larger than 2 registers are
1265 passed by reference unless they are a homogenous floating point
1267 return size > 2 * UNITS_PER_WORD;
1270 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1272 aarch64_return_in_msb (const_tree valtype)
1274 enum machine_mode dummy_mode;
1277 /* Never happens in little-endian mode. */
1278 if (!BYTES_BIG_ENDIAN)
1281 /* Only composite types smaller than or equal to 16 bytes can
1282 be potentially returned in registers. */
1283 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1284 || int_size_in_bytes (valtype) <= 0
1285 || int_size_in_bytes (valtype) > 16)
1288 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1289 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1290 is always passed/returned in the least significant bits of fp/simd
1292 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1293 &dummy_mode, &dummy_int, NULL))
1299 /* Implement TARGET_FUNCTION_VALUE.
1300 Define how to find the value returned by a function. */
1303 aarch64_function_value (const_tree type, const_tree func,
1304 bool outgoing ATTRIBUTE_UNUSED)
1306 enum machine_mode mode;
1309 enum machine_mode ag_mode;
1311 mode = TYPE_MODE (type);
1312 if (INTEGRAL_TYPE_P (type))
1313 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1315 if (aarch64_return_in_msb (type))
1317 HOST_WIDE_INT size = int_size_in_bytes (type);
1319 if (size % UNITS_PER_WORD != 0)
1321 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1322 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1326 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1327 &ag_mode, &count, NULL))
1329 if (!aarch64_composite_type_p (type, mode))
1331 gcc_assert (count == 1 && mode == ag_mode);
1332 return gen_rtx_REG (mode, V0_REGNUM);
1339 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1340 for (i = 0; i < count; i++)
1342 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1343 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1344 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1345 XVECEXP (par, 0, i) = tmp;
1351 return gen_rtx_REG (mode, R0_REGNUM);
1354 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1355 Return true if REGNO is the number of a hard register in which the values
1356 of called function may come back. */
1359 aarch64_function_value_regno_p (const unsigned int regno)
1361 /* Maximum of 16 bytes can be returned in the general registers. Examples
1362 of 16-byte return values are: 128-bit integers and 16-byte small
1363 structures (excluding homogeneous floating-point aggregates). */
1364 if (regno == R0_REGNUM || regno == R1_REGNUM)
1367 /* Up to four fp/simd registers can return a function value, e.g. a
1368 homogeneous floating-point aggregate having four members. */
1369 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1370 return !TARGET_GENERAL_REGS_ONLY;
1375 /* Implement TARGET_RETURN_IN_MEMORY.
1377 If the type T of the result of a function is such that
1379 would require that arg be passed as a value in a register (or set of
1380 registers) according to the parameter passing rules, then the result
1381 is returned in the same registers as would be used for such an
1385 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1388 enum machine_mode ag_mode;
1391 if (!AGGREGATE_TYPE_P (type)
1392 && TREE_CODE (type) != COMPLEX_TYPE
1393 && TREE_CODE (type) != VECTOR_TYPE)
1394 /* Simple scalar types always returned in registers. */
1397 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1404 /* Types larger than 2 registers returned in memory. */
1405 size = int_size_in_bytes (type);
1406 return (size < 0 || size > 2 * UNITS_PER_WORD);
1410 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1411 const_tree type, int *nregs)
1413 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1414 return aarch64_vfp_is_call_or_return_candidate (mode,
1416 &pcum->aapcs_vfp_rmode,
1421 /* Given MODE and TYPE of a function argument, return the alignment in
1422 bits. The idea is to suppress any stronger alignment requested by
1423 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1424 This is a helper function for local use only. */
1427 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1429 unsigned int alignment;
1433 if (!integer_zerop (TYPE_SIZE (type)))
1435 if (TYPE_MODE (type) == mode)
1436 alignment = TYPE_ALIGN (type);
1438 alignment = GET_MODE_ALIGNMENT (mode);
1444 alignment = GET_MODE_ALIGNMENT (mode);
1449 /* Layout a function argument according to the AAPCS64 rules. The rule
1450 numbers refer to the rule numbers in the AAPCS64. */
1453 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1455 bool named ATTRIBUTE_UNUSED)
1457 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1458 int ncrn, nvrn, nregs;
1459 bool allocate_ncrn, allocate_nvrn;
1462 /* We need to do this once per argument. */
1463 if (pcum->aapcs_arg_processed)
1466 pcum->aapcs_arg_processed = true;
1468 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1470 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1473 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1474 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1479 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1480 The following code thus handles passing by SIMD/FP registers first. */
1482 nvrn = pcum->aapcs_nvrn;
1484 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1485 and homogenous short-vector aggregates (HVA). */
1488 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1490 pcum->aapcs_nextnvrn = nvrn + nregs;
1491 if (!aarch64_composite_type_p (type, mode))
1493 gcc_assert (nregs == 1);
1494 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1500 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1501 for (i = 0; i < nregs; i++)
1503 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1504 V0_REGNUM + nvrn + i);
1505 tmp = gen_rtx_EXPR_LIST
1507 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1508 XVECEXP (par, 0, i) = tmp;
1510 pcum->aapcs_reg = par;
1516 /* C.3 NSRN is set to 8. */
1517 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1522 ncrn = pcum->aapcs_ncrn;
1523 nregs = size / UNITS_PER_WORD;
1525 /* C6 - C9. though the sign and zero extension semantics are
1526 handled elsewhere. This is the case where the argument fits
1527 entirely general registers. */
1528 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1530 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1532 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1534 /* C.8 if the argument has an alignment of 16 then the NGRN is
1535 rounded up to the next even number. */
1536 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1539 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1541 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1542 A reg is still generated for it, but the caller should be smart
1543 enough not to use it. */
1544 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1546 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1553 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1554 for (i = 0; i < nregs; i++)
1556 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1557 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1558 GEN_INT (i * UNITS_PER_WORD));
1559 XVECEXP (par, 0, i) = tmp;
1561 pcum->aapcs_reg = par;
1564 pcum->aapcs_nextncrn = ncrn + nregs;
1569 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1571 /* The argument is passed on stack; record the needed number of words for
1572 this argument and align the total size if necessary. */
1574 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1575 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1576 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1577 16 / UNITS_PER_WORD);
1581 /* Implement TARGET_FUNCTION_ARG. */
1584 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1585 const_tree type, bool named)
1587 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1588 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1590 if (mode == VOIDmode)
1593 aarch64_layout_arg (pcum_v, mode, type, named);
1594 return pcum->aapcs_reg;
1598 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1599 const_tree fntype ATTRIBUTE_UNUSED,
1600 rtx libname ATTRIBUTE_UNUSED,
1601 const_tree fndecl ATTRIBUTE_UNUSED,
1602 unsigned n_named ATTRIBUTE_UNUSED)
1604 pcum->aapcs_ncrn = 0;
1605 pcum->aapcs_nvrn = 0;
1606 pcum->aapcs_nextncrn = 0;
1607 pcum->aapcs_nextnvrn = 0;
1608 pcum->pcs_variant = ARM_PCS_AAPCS64;
1609 pcum->aapcs_reg = NULL_RTX;
1610 pcum->aapcs_arg_processed = false;
1611 pcum->aapcs_stack_words = 0;
1612 pcum->aapcs_stack_size = 0;
1618 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1619 enum machine_mode mode,
1623 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1624 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1626 aarch64_layout_arg (pcum_v, mode, type, named);
1627 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1628 != (pcum->aapcs_stack_words != 0));
1629 pcum->aapcs_arg_processed = false;
1630 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1631 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1632 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1633 pcum->aapcs_stack_words = 0;
1634 pcum->aapcs_reg = NULL_RTX;
1639 aarch64_function_arg_regno_p (unsigned regno)
1641 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1642 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1645 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1646 PARM_BOUNDARY bits of alignment, but will be given anything up
1647 to STACK_BOUNDARY bits if the type requires it. This makes sure
1648 that both before and after the layout of each argument, the Next
1649 Stacked Argument Address (NSAA) will have a minimum alignment of
1653 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1655 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1657 if (alignment < PARM_BOUNDARY)
1658 alignment = PARM_BOUNDARY;
1659 if (alignment > STACK_BOUNDARY)
1660 alignment = STACK_BOUNDARY;
1664 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1666 Return true if an argument passed on the stack should be padded upwards,
1667 i.e. if the least-significant byte of the stack slot has useful data.
1669 Small aggregate types are placed in the lowest memory address.
1671 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1674 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1676 /* On little-endian targets, the least significant byte of every stack
1677 argument is passed at the lowest byte address of the stack slot. */
1678 if (!BYTES_BIG_ENDIAN)
1681 /* Otherwise, integral, floating-point and pointer types are padded downward:
1682 the least significant byte of a stack argument is passed at the highest
1683 byte address of the stack slot. */
1685 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1686 || POINTER_TYPE_P (type))
1687 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1690 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1694 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1696 It specifies padding for the last (may also be the only)
1697 element of a block move between registers and memory. If
1698 assuming the block is in the memory, padding upward means that
1699 the last element is padded after its highest significant byte,
1700 while in downward padding, the last element is padded at the
1701 its least significant byte side.
1703 Small aggregates and small complex types are always padded
1706 We don't need to worry about homogeneous floating-point or
1707 short-vector aggregates; their move is not affected by the
1708 padding direction determined here. Regardless of endianness,
1709 each element of such an aggregate is put in the least
1710 significant bits of a fp/simd register.
1712 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1713 register has useful data, and return the opposite if the most
1714 significant byte does. */
1717 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1718 bool first ATTRIBUTE_UNUSED)
1721 /* Small composite types are always padded upward. */
1722 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1724 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1725 : GET_MODE_SIZE (mode));
1726 if (size < 2 * UNITS_PER_WORD)
1730 /* Otherwise, use the default padding. */
1731 return !BYTES_BIG_ENDIAN;
1734 static enum machine_mode
1735 aarch64_libgcc_cmp_return_mode (void)
1741 aarch64_frame_pointer_required (void)
1743 /* If the function contains dynamic stack allocations, we need to
1744 use the frame pointer to access the static parts of the frame. */
1745 if (cfun->calls_alloca)
1748 /* In aarch64_override_options_after_change
1749 flag_omit_leaf_frame_pointer turns off the frame pointer by
1750 default. Turn it back on now if we've not got a leaf
1752 if (flag_omit_leaf_frame_pointer
1753 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1759 /* Mark the registers that need to be saved by the callee and calculate
1760 the size of the callee-saved registers area and frame record (both FP
1761 and LR may be omitted). */
1763 aarch64_layout_frame (void)
1765 HOST_WIDE_INT offset = 0;
1768 if (reload_completed && cfun->machine->frame.laid_out)
1771 cfun->machine->frame.fp_lr_offset = 0;
1773 /* First mark all the registers that really need to be saved... */
1774 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1775 cfun->machine->frame.reg_offset[regno] = -1;
1777 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1778 cfun->machine->frame.reg_offset[regno] = -1;
1780 /* ... that includes the eh data registers (if needed)... */
1781 if (crtl->calls_eh_return)
1782 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1783 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1785 /* ... and any callee saved register that dataflow says is live. */
1786 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1787 if (df_regs_ever_live_p (regno)
1788 && !call_used_regs[regno])
1789 cfun->machine->frame.reg_offset[regno] = 0;
1791 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1792 if (df_regs_ever_live_p (regno)
1793 && !call_used_regs[regno])
1794 cfun->machine->frame.reg_offset[regno] = 0;
1796 if (frame_pointer_needed)
1798 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1799 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1800 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1803 /* Now assign stack slots for them. */
1804 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1805 if (cfun->machine->frame.reg_offset[regno] != -1)
1807 cfun->machine->frame.reg_offset[regno] = offset;
1808 offset += UNITS_PER_WORD;
1811 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1812 if (cfun->machine->frame.reg_offset[regno] != -1)
1814 cfun->machine->frame.reg_offset[regno] = offset;
1815 offset += UNITS_PER_WORD;
1818 if (frame_pointer_needed)
1820 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1821 offset += UNITS_PER_WORD;
1822 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1825 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1827 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1828 offset += UNITS_PER_WORD;
1829 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1832 cfun->machine->frame.padding0 =
1833 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1834 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1836 cfun->machine->frame.saved_regs_size = offset;
1837 cfun->machine->frame.laid_out = true;
1840 /* Make the last instruction frame-related and note that it performs
1841 the operation described by FRAME_PATTERN. */
1844 aarch64_set_frame_expr (rtx frame_pattern)
1848 insn = get_last_insn ();
1849 RTX_FRAME_RELATED_P (insn) = 1;
1850 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1851 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1857 aarch64_register_saved_on_entry (int regno)
1859 return cfun->machine->frame.reg_offset[regno] != -1;
1864 aarch64_save_or_restore_fprs (int start_offset, int increment,
1865 bool restore, rtx base_rtx)
1871 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1872 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1875 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1877 if (aarch64_register_saved_on_entry (regno))
1880 mem = gen_mem_ref (DFmode,
1881 plus_constant (Pmode,
1885 for (regno2 = regno + 1;
1886 regno2 <= V31_REGNUM
1887 && !aarch64_register_saved_on_entry (regno2);
1892 if (regno2 <= V31_REGNUM &&
1893 aarch64_register_saved_on_entry (regno2))
1896 /* Next highest register to be saved. */
1897 mem2 = gen_mem_ref (DFmode,
1901 start_offset + increment));
1902 if (restore == false)
1905 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1906 mem2, gen_rtx_REG (DFmode, regno2)));
1912 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1913 gen_rtx_REG (DFmode, regno2), mem2));
1915 add_reg_note (insn, REG_CFA_RESTORE,
1916 gen_rtx_REG (DFmode, regno));
1917 add_reg_note (insn, REG_CFA_RESTORE,
1918 gen_rtx_REG (DFmode, regno2));
1921 /* The first part of a frame-related parallel insn
1922 is always assumed to be relevant to the frame
1923 calculations; subsequent parts, are only
1924 frame-related if explicitly marked. */
1925 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1927 start_offset += increment * 2;
1931 if (restore == false)
1932 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1935 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1936 add_reg_note (insn, REG_CFA_RESTORE,
1937 gen_rtx_REG (DImode, regno));
1939 start_offset += increment;
1941 RTX_FRAME_RELATED_P (insn) = 1;
1948 /* offset from the stack pointer of where the saves and
1949 restore's have to happen. */
1951 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1955 rtx base_rtx = stack_pointer_rtx;
1956 HOST_WIDE_INT start_offset = offset;
1957 HOST_WIDE_INT increment = UNITS_PER_WORD;
1958 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1959 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1963 for (regno = R0_REGNUM; regno <= limit; regno++)
1965 if (aarch64_register_saved_on_entry (regno))
1968 mem = gen_mem_ref (Pmode,
1969 plus_constant (Pmode,
1973 for (regno2 = regno + 1;
1975 && !aarch64_register_saved_on_entry (regno2);
1980 if (regno2 <= limit &&
1981 aarch64_register_saved_on_entry (regno2))
1984 /* Next highest register to be saved. */
1985 mem2 = gen_mem_ref (Pmode,
1989 start_offset + increment));
1990 if (restore == false)
1993 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1994 mem2, gen_rtx_REG (DImode, regno2)));
2000 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2001 gen_rtx_REG (DImode, regno2), mem2));
2003 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2004 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2007 /* The first part of a frame-related parallel insn
2008 is always assumed to be relevant to the frame
2009 calculations; subsequent parts, are only
2010 frame-related if explicitly marked. */
2011 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2014 start_offset += increment * 2;
2018 if (restore == false)
2019 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2022 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2023 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2025 start_offset += increment;
2027 RTX_FRAME_RELATED_P (insn) = 1;
2031 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2035 /* AArch64 stack frames generated by this compiler look like:
2037 +-------------------------------+
2039 | incoming stack arguments |
2041 +-------------------------------+ <-- arg_pointer_rtx
2043 | callee-allocated save area |
2044 | for register varargs |
2046 +-------------------------------+ <-- frame_pointer_rtx
2050 +-------------------------------+
2052 +-------------------------------+ |
2055 | callee-saved registers | | frame.saved_regs_size
2057 +-------------------------------+ |
2059 +-------------------------------+ |
2061 P +-------------------------------+ <-- hard_frame_pointer_rtx
2062 | dynamic allocation |
2063 +-------------------------------+
2065 | outgoing stack arguments |
2067 +-------------------------------+ <-- stack_pointer_rtx
2069 Dynamic stack allocations such as alloca insert data at point P.
2070 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2071 hard_frame_pointer_rtx unchanged. */
2073 /* Generate the prologue instructions for entry into a function.
2074 Establish the stack frame by decreasing the stack pointer with a
2075 properly calculated size and, if necessary, create a frame record
2076 filled with the values of LR and previous frame pointer. The
2077 current FP is also set up if it is in use. */
2080 aarch64_expand_prologue (void)
2082 /* sub sp, sp, #<frame_size>
2083 stp {fp, lr}, [sp, #<frame_size> - 16]
2084 add fp, sp, #<frame_size> - hardfp_offset
2085 stp {cs_reg}, [fp, #-16] etc.
2087 sub sp, sp, <final_adjustment_if_any>
2089 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2090 HOST_WIDE_INT frame_size, offset;
2091 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2094 aarch64_layout_frame ();
2095 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2096 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2097 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2098 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2099 + crtl->outgoing_args_size);
2100 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2101 STACK_BOUNDARY / BITS_PER_UNIT);
2103 if (flag_stack_usage_info)
2104 current_function_static_stack_size = frame_size;
2107 - original_frame_size
2108 - cfun->machine->frame.saved_regs_size);
2110 /* Store pairs and load pairs have a range only -512 to 504. */
2113 /* When the frame has a large size, an initial decrease is done on
2114 the stack pointer to jump over the callee-allocated save area for
2115 register varargs, the local variable area and/or the callee-saved
2116 register area. This will allow the pre-index write-back
2117 store pair instructions to be used for setting up the stack frame
2119 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2121 offset = cfun->machine->frame.saved_regs_size;
2123 frame_size -= (offset + crtl->outgoing_args_size);
2126 if (frame_size >= 0x1000000)
2128 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2129 emit_move_insn (op0, GEN_INT (-frame_size));
2130 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2131 aarch64_set_frame_expr (gen_rtx_SET
2132 (Pmode, stack_pointer_rtx,
2133 plus_constant (Pmode,
2137 else if (frame_size > 0)
2139 if ((frame_size & 0xfff) != frame_size)
2141 insn = emit_insn (gen_add2_insn
2143 GEN_INT (-(frame_size
2144 & ~(HOST_WIDE_INT)0xfff))));
2145 RTX_FRAME_RELATED_P (insn) = 1;
2147 if ((frame_size & 0xfff) != 0)
2149 insn = emit_insn (gen_add2_insn
2151 GEN_INT (-(frame_size
2152 & (HOST_WIDE_INT)0xfff))));
2153 RTX_FRAME_RELATED_P (insn) = 1;
2162 /* Save the frame pointer and lr if the frame pointer is needed
2163 first. Make the frame pointer point to the location of the
2164 old frame pointer on the stack. */
2165 if (frame_pointer_needed)
2171 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2172 GEN_INT (-offset)));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2174 aarch64_set_frame_expr (gen_rtx_SET
2175 (Pmode, stack_pointer_rtx,
2176 gen_rtx_MINUS (Pmode,
2178 GEN_INT (offset))));
2179 mem_fp = gen_frame_mem (DImode,
2180 plus_constant (Pmode,
2183 mem_lr = gen_frame_mem (DImode,
2184 plus_constant (Pmode,
2188 insn = emit_insn (gen_store_pairdi (mem_fp,
2189 hard_frame_pointer_rtx,
2191 gen_rtx_REG (DImode,
2196 insn = emit_insn (gen_storewb_pairdi_di
2197 (stack_pointer_rtx, stack_pointer_rtx,
2198 hard_frame_pointer_rtx,
2199 gen_rtx_REG (DImode, LR_REGNUM),
2201 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2202 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2205 /* The first part of a frame-related parallel insn is always
2206 assumed to be relevant to the frame calculations;
2207 subsequent parts, are only frame-related if explicitly
2209 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2210 RTX_FRAME_RELATED_P (insn) = 1;
2212 /* Set up frame pointer to point to the location of the
2213 previous frame pointer on the stack. */
2214 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2216 GEN_INT (fp_offset)));
2217 aarch64_set_frame_expr (gen_rtx_SET
2218 (Pmode, hard_frame_pointer_rtx,
2219 plus_constant (Pmode,
2222 RTX_FRAME_RELATED_P (insn) = 1;
2223 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2224 hard_frame_pointer_rtx));
2228 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2229 GEN_INT (-offset)));
2230 RTX_FRAME_RELATED_P (insn) = 1;
2233 aarch64_save_or_restore_callee_save_registers
2234 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2237 /* when offset >= 512,
2238 sub sp, sp, #<outgoing_args_size> */
2239 if (frame_size > -1)
2241 if (crtl->outgoing_args_size > 0)
2243 insn = emit_insn (gen_add2_insn
2245 GEN_INT (- crtl->outgoing_args_size)));
2246 RTX_FRAME_RELATED_P (insn) = 1;
2251 /* Generate the epilogue instructions for returning from a function. */
2253 aarch64_expand_epilogue (bool for_sibcall)
2255 HOST_WIDE_INT original_frame_size, frame_size, offset;
2256 HOST_WIDE_INT fp_offset;
2260 aarch64_layout_frame ();
2261 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2262 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2263 + crtl->outgoing_args_size);
2264 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2265 STACK_BOUNDARY / BITS_PER_UNIT);
2268 - original_frame_size
2269 - cfun->machine->frame.saved_regs_size);
2271 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2273 /* Store pairs and load pairs have a range only -512 to 504. */
2276 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2278 offset = cfun->machine->frame.saved_regs_size;
2280 frame_size -= (offset + crtl->outgoing_args_size);
2282 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2284 insn = emit_insn (gen_add2_insn
2286 GEN_INT (crtl->outgoing_args_size)));
2287 RTX_FRAME_RELATED_P (insn) = 1;
2293 /* If there were outgoing arguments or we've done dynamic stack
2294 allocation, then restore the stack pointer from the frame
2295 pointer. This is at most one insn and more efficient than using
2296 GCC's internal mechanism. */
2297 if (frame_pointer_needed
2298 && (crtl->outgoing_args_size || cfun->calls_alloca))
2300 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2301 hard_frame_pointer_rtx,
2302 GEN_INT (- fp_offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2304 /* As SP is set to (FP - fp_offset), according to the rules in
2305 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2306 from the value of SP from now on. */
2307 cfa_reg = stack_pointer_rtx;
2310 aarch64_save_or_restore_callee_save_registers
2311 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2313 /* Restore the frame pointer and lr if the frame pointer is needed. */
2316 if (frame_pointer_needed)
2322 mem_fp = gen_frame_mem (DImode,
2323 plus_constant (Pmode,
2326 mem_lr = gen_frame_mem (DImode,
2327 plus_constant (Pmode,
2331 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2333 gen_rtx_REG (DImode,
2339 insn = emit_insn (gen_loadwb_pairdi_di
2342 hard_frame_pointer_rtx,
2343 gen_rtx_REG (DImode, LR_REGNUM),
2345 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2346 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2347 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2348 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2349 plus_constant (Pmode, cfa_reg,
2353 /* The first part of a frame-related parallel insn
2354 is always assumed to be relevant to the frame
2355 calculations; subsequent parts, are only
2356 frame-related if explicitly marked. */
2357 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2358 RTX_FRAME_RELATED_P (insn) = 1;
2359 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2360 add_reg_note (insn, REG_CFA_RESTORE,
2361 gen_rtx_REG (DImode, LR_REGNUM));
2365 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2367 RTX_FRAME_RELATED_P (insn) = 1;
2372 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2374 RTX_FRAME_RELATED_P (insn) = 1;
2378 /* Stack adjustment for exception handler. */
2379 if (crtl->calls_eh_return)
2381 /* We need to unwind the stack by the offset computed by
2382 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2383 based on SP. Ideally we would update the SP and define the
2384 CFA along the lines of:
2386 SP = SP + EH_RETURN_STACKADJ_RTX
2387 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2389 However the dwarf emitter only understands a constant
2392 The solution chosen here is to use the otherwise unused IP0
2393 as a temporary register to hold the current SP value. The
2394 CFA is described using IP0 then SP is modified. */
2396 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2398 insn = emit_move_insn (ip0, stack_pointer_rtx);
2399 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2400 RTX_FRAME_RELATED_P (insn) = 1;
2402 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2404 /* Ensure the assignment to IP0 does not get optimized away. */
2408 if (frame_size > -1)
2410 if (frame_size >= 0x1000000)
2412 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2413 emit_move_insn (op0, GEN_INT (frame_size));
2414 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2415 aarch64_set_frame_expr (gen_rtx_SET
2416 (Pmode, stack_pointer_rtx,
2417 plus_constant (Pmode,
2421 else if (frame_size > 0)
2423 if ((frame_size & 0xfff) != 0)
2425 insn = emit_insn (gen_add2_insn
2427 GEN_INT ((frame_size
2428 & (HOST_WIDE_INT) 0xfff))));
2429 RTX_FRAME_RELATED_P (insn) = 1;
2431 if ((frame_size & 0xfff) != frame_size)
2433 insn = emit_insn (gen_add2_insn
2435 GEN_INT ((frame_size
2436 & ~ (HOST_WIDE_INT) 0xfff))));
2437 RTX_FRAME_RELATED_P (insn) = 1;
2441 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2442 plus_constant (Pmode,
2447 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2449 emit_jump_insn (ret_rtx);
2452 /* Return the place to copy the exception unwinding return address to.
2453 This will probably be a stack slot, but could (in theory be the
2454 return register). */
2456 aarch64_final_eh_return_addr (void)
2458 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2459 aarch64_layout_frame ();
2460 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2461 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2462 + crtl->outgoing_args_size);
2463 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2464 STACK_BOUNDARY / BITS_PER_UNIT);
2466 - original_frame_size
2467 - cfun->machine->frame.saved_regs_size;
2469 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2470 return gen_rtx_REG (DImode, LR_REGNUM);
2472 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2473 result in a store to save LR introduced by builtin_eh_return () being
2474 incorrectly deleted because the alias is not detected.
2475 So in the calculation of the address to copy the exception unwinding
2476 return address to, we note 2 cases.
2477 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2478 we return a SP-relative location since all the addresses are SP-relative
2479 in this case. This prevents the store from being optimized away.
2480 If the fp_offset is not 0, then the addresses will be FP-relative and
2481 therefore we return a FP-relative location. */
2483 if (frame_pointer_needed)
2486 return gen_frame_mem (DImode,
2487 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2489 return gen_frame_mem (DImode,
2490 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2493 /* If FP is not needed, we calculate the location of LR, which would be
2494 at the top of the saved registers block. */
2496 return gen_frame_mem (DImode,
2497 plus_constant (Pmode,
2500 + cfun->machine->frame.saved_regs_size
2501 - 2 * UNITS_PER_WORD));
2504 /* Possibly output code to build up a constant in a register. For
2505 the benefit of the costs infrastructure, returns the number of
2506 instructions which would be emitted. GENERATE inhibits or
2507 enables code generation. */
2510 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2514 if (aarch64_bitmask_imm (val, DImode))
2517 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2525 HOST_WIDE_INT valp = val >> 16;
2529 for (i = 16; i < 64; i += 16)
2531 valm = (valp & 0xffff);
2542 /* zcount contains the number of additional MOVK instructions
2543 required if the constant is built up with an initial MOVZ instruction,
2544 while ncount is the number of MOVK instructions required if starting
2545 with a MOVN instruction. Choose the sequence that yields the fewest
2546 number of instructions, preferring MOVZ instructions when they are both
2548 if (ncount < zcount)
2551 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2552 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2559 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2560 GEN_INT (val & 0xffff));
2567 for (i = 16; i < 64; i += 16)
2569 if ((val & 0xffff) != tval)
2572 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2574 GEN_INT (val & 0xffff)));
2584 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2586 HOST_WIDE_INT mdelta = delta;
2587 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2588 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2593 if (mdelta >= 4096 * 4096)
2595 (void) aarch64_build_constant (scratchreg, delta, true);
2596 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2598 else if (mdelta > 0)
2602 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2603 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2605 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2606 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2608 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2609 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2611 if (mdelta % 4096 != 0)
2613 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2614 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2615 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2620 /* Output code to add DELTA to the first argument, and then jump
2621 to FUNCTION. Used for C++ multiple inheritance. */
2623 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2624 HOST_WIDE_INT delta,
2625 HOST_WIDE_INT vcall_offset,
2628 /* The this pointer is always in x0. Note that this differs from
2629 Arm where the this pointer maybe bumped to r1 if r0 is required
2630 to return a pointer to an aggregate. On AArch64 a result value
2631 pointer will be in x8. */
2632 int this_regno = R0_REGNUM;
2633 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2635 reload_completed = 1;
2636 emit_note (NOTE_INSN_PROLOGUE_END);
2638 if (vcall_offset == 0)
2639 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2642 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2644 this_rtx = gen_rtx_REG (Pmode, this_regno);
2645 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2646 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2651 if (delta >= -256 && delta < 256)
2652 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2653 plus_constant (Pmode, this_rtx, delta));
2655 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2658 if (Pmode == ptr_mode)
2659 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2661 aarch64_emit_move (temp0,
2662 gen_rtx_ZERO_EXTEND (Pmode,
2663 gen_rtx_MEM (ptr_mode, addr)));
2665 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2666 addr = plus_constant (Pmode, temp0, vcall_offset);
2669 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2670 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2673 if (Pmode == ptr_mode)
2674 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2676 aarch64_emit_move (temp1,
2677 gen_rtx_SIGN_EXTEND (Pmode,
2678 gen_rtx_MEM (ptr_mode, addr)));
2680 emit_insn (gen_add2_insn (this_rtx, temp1));
2683 /* Generate a tail call to the target function. */
2684 if (!TREE_USED (function))
2686 assemble_external (function);
2687 TREE_USED (function) = 1;
2689 funexp = XEXP (DECL_RTL (function), 0);
2690 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2691 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2692 SIBLING_CALL_P (insn) = 1;
2694 insn = get_insns ();
2695 shorten_branches (insn);
2696 final_start_function (insn, file, 1);
2697 final (insn, file, 1);
2698 final_end_function ();
2700 /* Stop pretending to be a post-reload pass. */
2701 reload_completed = 0;
2705 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2707 if (GET_CODE (*x) == SYMBOL_REF)
2708 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2710 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2711 TLS offsets, not real symbol references. */
2712 if (GET_CODE (*x) == UNSPEC
2713 && XINT (*x, 1) == UNSPEC_TLS)
2720 aarch64_tls_referenced_p (rtx x)
2722 if (!TARGET_HAVE_TLS)
2725 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2730 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2732 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2733 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2744 aarch64_build_bitmask_table (void)
2746 unsigned HOST_WIDE_INT mask, imm;
2747 unsigned int log_e, e, s, r;
2748 unsigned int nimms = 0;
2750 for (log_e = 1; log_e <= 6; log_e++)
2754 mask = ~(HOST_WIDE_INT) 0;
2756 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2757 for (s = 1; s < e; s++)
2759 for (r = 0; r < e; r++)
2761 /* set s consecutive bits to 1 (s < 64) */
2762 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2763 /* rotate right by r */
2765 imm = ((imm >> r) | (imm << (e - r))) & mask;
2766 /* replicate the constant depending on SIMD size */
2768 case 1: imm |= (imm << 2);
2769 case 2: imm |= (imm << 4);
2770 case 3: imm |= (imm << 8);
2771 case 4: imm |= (imm << 16);
2772 case 5: imm |= (imm << 32);
2778 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2779 aarch64_bitmasks[nimms++] = imm;
2784 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2785 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2786 aarch64_bitmasks_cmp);
2790 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2791 a left shift of 0 or 12 bits. */
2793 aarch64_uimm12_shift (HOST_WIDE_INT val)
2795 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2796 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2801 /* Return true if val is an immediate that can be loaded into a
2802 register by a MOVZ instruction. */
2804 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2806 if (GET_MODE_SIZE (mode) > 4)
2808 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2809 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2814 /* Ignore sign extension. */
2815 val &= (HOST_WIDE_INT) 0xffffffff;
2817 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2818 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2822 /* Return true if val is a valid bitmask immediate. */
2824 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2826 if (GET_MODE_SIZE (mode) < 8)
2828 /* Replicate bit pattern. */
2829 val &= (HOST_WIDE_INT) 0xffffffff;
2832 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2833 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2837 /* Return true if val is an immediate that can be loaded into a
2838 register in a single instruction. */
2840 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2842 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2844 return aarch64_bitmask_imm (val, mode);
2848 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2852 if (GET_CODE (x) == HIGH)
2855 split_const (x, &base, &offset);
2856 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2858 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2859 != SYMBOL_FORCE_TO_MEM)
2862 /* Avoid generating a 64-bit relocation in ILP32; leave
2863 to aarch64_expand_mov_immediate to handle it properly. */
2864 return mode != ptr_mode;
2867 return aarch64_tls_referenced_p (x);
2870 /* Return true if register REGNO is a valid index register.
2871 STRICT_P is true if REG_OK_STRICT is in effect. */
2874 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2876 if (!HARD_REGISTER_NUM_P (regno))
2884 regno = reg_renumber[regno];
2886 return GP_REGNUM_P (regno);
2889 /* Return true if register REGNO is a valid base register for mode MODE.
2890 STRICT_P is true if REG_OK_STRICT is in effect. */
2893 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2895 if (!HARD_REGISTER_NUM_P (regno))
2903 regno = reg_renumber[regno];
2906 /* The fake registers will be eliminated to either the stack or
2907 hard frame pointer, both of which are usually valid base registers.
2908 Reload deals with the cases where the eliminated form isn't valid. */
2909 return (GP_REGNUM_P (regno)
2910 || regno == SP_REGNUM
2911 || regno == FRAME_POINTER_REGNUM
2912 || regno == ARG_POINTER_REGNUM);
2915 /* Return true if X is a valid base register for mode MODE.
2916 STRICT_P is true if REG_OK_STRICT is in effect. */
2919 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2921 if (!strict_p && GET_CODE (x) == SUBREG)
2924 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2927 /* Return true if address offset is a valid index. If it is, fill in INFO
2928 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2931 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2932 enum machine_mode mode, bool strict_p)
2934 enum aarch64_address_type type;
2939 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2940 && GET_MODE (x) == Pmode)
2942 type = ADDRESS_REG_REG;
2946 /* (sign_extend:DI (reg:SI)) */
2947 else if ((GET_CODE (x) == SIGN_EXTEND
2948 || GET_CODE (x) == ZERO_EXTEND)
2949 && GET_MODE (x) == DImode
2950 && GET_MODE (XEXP (x, 0)) == SImode)
2952 type = (GET_CODE (x) == SIGN_EXTEND)
2953 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2954 index = XEXP (x, 0);
2957 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2958 else if (GET_CODE (x) == MULT
2959 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2960 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2961 && GET_MODE (XEXP (x, 0)) == DImode
2962 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2963 && CONST_INT_P (XEXP (x, 1)))
2965 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2966 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2967 index = XEXP (XEXP (x, 0), 0);
2968 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2970 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2971 else if (GET_CODE (x) == ASHIFT
2972 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2973 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2974 && GET_MODE (XEXP (x, 0)) == DImode
2975 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2976 && CONST_INT_P (XEXP (x, 1)))
2978 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2979 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2980 index = XEXP (XEXP (x, 0), 0);
2981 shift = INTVAL (XEXP (x, 1));
2983 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2984 else if ((GET_CODE (x) == SIGN_EXTRACT
2985 || GET_CODE (x) == ZERO_EXTRACT)
2986 && GET_MODE (x) == DImode
2987 && GET_CODE (XEXP (x, 0)) == MULT
2988 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2989 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2991 type = (GET_CODE (x) == SIGN_EXTRACT)
2992 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2993 index = XEXP (XEXP (x, 0), 0);
2994 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2995 if (INTVAL (XEXP (x, 1)) != 32 + shift
2996 || INTVAL (XEXP (x, 2)) != 0)
2999 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3000 (const_int 0xffffffff<<shift)) */
3001 else if (GET_CODE (x) == AND
3002 && GET_MODE (x) == DImode
3003 && GET_CODE (XEXP (x, 0)) == MULT
3004 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3005 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3006 && CONST_INT_P (XEXP (x, 1)))
3008 type = ADDRESS_REG_UXTW;
3009 index = XEXP (XEXP (x, 0), 0);
3010 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3011 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3014 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3015 else if ((GET_CODE (x) == SIGN_EXTRACT
3016 || GET_CODE (x) == ZERO_EXTRACT)
3017 && GET_MODE (x) == DImode
3018 && GET_CODE (XEXP (x, 0)) == ASHIFT
3019 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3020 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3022 type = (GET_CODE (x) == SIGN_EXTRACT)
3023 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3024 index = XEXP (XEXP (x, 0), 0);
3025 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3026 if (INTVAL (XEXP (x, 1)) != 32 + shift
3027 || INTVAL (XEXP (x, 2)) != 0)
3030 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3031 (const_int 0xffffffff<<shift)) */
3032 else if (GET_CODE (x) == AND
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == ASHIFT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3037 && CONST_INT_P (XEXP (x, 1)))
3039 type = ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3042 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3045 /* (mult:P (reg:P) (const_int scale)) */
3046 else if (GET_CODE (x) == MULT
3047 && GET_MODE (x) == Pmode
3048 && GET_MODE (XEXP (x, 0)) == Pmode
3049 && CONST_INT_P (XEXP (x, 1)))
3051 type = ADDRESS_REG_REG;
3052 index = XEXP (x, 0);
3053 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3055 /* (ashift:P (reg:P) (const_int shift)) */
3056 else if (GET_CODE (x) == ASHIFT
3057 && GET_MODE (x) == Pmode
3058 && GET_MODE (XEXP (x, 0)) == Pmode
3059 && CONST_INT_P (XEXP (x, 1)))
3061 type = ADDRESS_REG_REG;
3062 index = XEXP (x, 0);
3063 shift = INTVAL (XEXP (x, 1));
3068 if (GET_CODE (index) == SUBREG)
3069 index = SUBREG_REG (index);
3072 (shift > 0 && shift <= 3
3073 && (1 << shift) == GET_MODE_SIZE (mode)))
3075 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3078 info->offset = index;
3079 info->shift = shift;
3087 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3089 return (offset >= -64 * GET_MODE_SIZE (mode)
3090 && offset < 64 * GET_MODE_SIZE (mode)
3091 && offset % GET_MODE_SIZE (mode) == 0);
3095 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3096 HOST_WIDE_INT offset)
3098 return offset >= -256 && offset < 256;
3102 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3105 && offset < 4096 * GET_MODE_SIZE (mode)
3106 && offset % GET_MODE_SIZE (mode) == 0);
3109 /* Return true if X is a valid address for machine mode MODE. If it is,
3110 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3111 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3114 aarch64_classify_address (struct aarch64_address_info *info,
3115 rtx x, enum machine_mode mode,
3116 RTX_CODE outer_code, bool strict_p)
3118 enum rtx_code code = GET_CODE (x);
3120 bool allow_reg_index_p =
3121 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3123 /* Don't support anything other than POST_INC or REG addressing for
3125 if (aarch64_vector_mode_p (mode)
3126 && (code != POST_INC && code != REG))
3133 info->type = ADDRESS_REG_IMM;
3135 info->offset = const0_rtx;
3136 return aarch64_base_register_rtx_p (x, strict_p);
3141 if (GET_MODE_SIZE (mode) != 0
3142 && CONST_INT_P (op1)
3143 && aarch64_base_register_rtx_p (op0, strict_p))
3145 HOST_WIDE_INT offset = INTVAL (op1);
3147 info->type = ADDRESS_REG_IMM;
3151 /* TImode and TFmode values are allowed in both pairs of X
3152 registers and individual Q registers. The available
3154 X,X: 7-bit signed scaled offset
3155 Q: 9-bit signed offset
3156 We conservatively require an offset representable in either mode.
3158 if (mode == TImode || mode == TFmode)
3159 return (offset_7bit_signed_scaled_p (mode, offset)
3160 && offset_9bit_signed_unscaled_p (mode, offset));
3162 if (outer_code == PARALLEL)
3163 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3164 && offset_7bit_signed_scaled_p (mode, offset));
3166 return (offset_9bit_signed_unscaled_p (mode, offset)
3167 || offset_12bit_unsigned_scaled_p (mode, offset));
3170 if (allow_reg_index_p)
3172 /* Look for base + (scaled/extended) index register. */
3173 if (aarch64_base_register_rtx_p (op0, strict_p)
3174 && aarch64_classify_index (info, op1, mode, strict_p))
3179 if (aarch64_base_register_rtx_p (op1, strict_p)
3180 && aarch64_classify_index (info, op0, mode, strict_p))
3193 info->type = ADDRESS_REG_WB;
3194 info->base = XEXP (x, 0);
3195 info->offset = NULL_RTX;
3196 return aarch64_base_register_rtx_p (info->base, strict_p);
3200 info->type = ADDRESS_REG_WB;
3201 info->base = XEXP (x, 0);
3202 if (GET_CODE (XEXP (x, 1)) == PLUS
3203 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3204 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3205 && aarch64_base_register_rtx_p (info->base, strict_p))
3207 HOST_WIDE_INT offset;
3208 info->offset = XEXP (XEXP (x, 1), 1);
3209 offset = INTVAL (info->offset);
3211 /* TImode and TFmode values are allowed in both pairs of X
3212 registers and individual Q registers. The available
3214 X,X: 7-bit signed scaled offset
3215 Q: 9-bit signed offset
3216 We conservatively require an offset representable in either mode.
3218 if (mode == TImode || mode == TFmode)
3219 return (offset_7bit_signed_scaled_p (mode, offset)
3220 && offset_9bit_signed_unscaled_p (mode, offset));
3222 if (outer_code == PARALLEL)
3223 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3224 && offset_7bit_signed_scaled_p (mode, offset));
3226 return offset_9bit_signed_unscaled_p (mode, offset);
3233 /* load literal: pc-relative constant pool entry. Only supported
3234 for SI mode or larger. */
3235 info->type = ADDRESS_SYMBOLIC;
3236 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3240 split_const (x, &sym, &addend);
3241 return (GET_CODE (sym) == LABEL_REF
3242 || (GET_CODE (sym) == SYMBOL_REF
3243 && CONSTANT_POOL_ADDRESS_P (sym)));
3248 info->type = ADDRESS_LO_SUM;
3249 info->base = XEXP (x, 0);
3250 info->offset = XEXP (x, 1);
3251 if (allow_reg_index_p
3252 && aarch64_base_register_rtx_p (info->base, strict_p))
3255 split_const (info->offset, &sym, &offs);
3256 if (GET_CODE (sym) == SYMBOL_REF
3257 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3258 == SYMBOL_SMALL_ABSOLUTE))
3260 /* The symbol and offset must be aligned to the access size. */
3262 unsigned int ref_size;
3264 if (CONSTANT_POOL_ADDRESS_P (sym))
3265 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3266 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3268 tree exp = SYMBOL_REF_DECL (sym);
3269 align = TYPE_ALIGN (TREE_TYPE (exp));
3270 align = CONSTANT_ALIGNMENT (exp, align);
3272 else if (SYMBOL_REF_DECL (sym))
3273 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3274 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3275 && SYMBOL_REF_BLOCK (sym) != NULL)
3276 align = SYMBOL_REF_BLOCK (sym)->alignment;
3278 align = BITS_PER_UNIT;
3280 ref_size = GET_MODE_SIZE (mode);
3282 ref_size = GET_MODE_SIZE (DImode);
3284 return ((INTVAL (offs) & (ref_size - 1)) == 0
3285 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3296 aarch64_symbolic_address_p (rtx x)
3300 split_const (x, &x, &offset);
3301 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3304 /* Classify the base of symbolic expression X, given that X appears in
3307 enum aarch64_symbol_type
3308 aarch64_classify_symbolic_expression (rtx x,
3309 enum aarch64_symbol_context context)
3313 split_const (x, &x, &offset);
3314 return aarch64_classify_symbol (x, context);
3318 /* Return TRUE if X is a legitimate address for accessing memory in
3321 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3323 struct aarch64_address_info addr;
3325 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3328 /* Return TRUE if X is a legitimate address for accessing memory in
3329 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3332 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3333 RTX_CODE outer_code, bool strict_p)
3335 struct aarch64_address_info addr;
3337 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3340 /* Return TRUE if rtx X is immediate constant 0.0 */
3342 aarch64_float_const_zero_rtx_p (rtx x)
3346 if (GET_MODE (x) == VOIDmode)
3349 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3350 if (REAL_VALUE_MINUS_ZERO (r))
3351 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3352 return REAL_VALUES_EQUAL (r, dconst0);
3355 /* Return the fixed registers used for condition codes. */
3358 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3361 *p2 = INVALID_REGNUM;
3366 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3368 /* All floating point compares return CCFP if it is an equality
3369 comparison, and CCFPE otherwise. */
3370 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3397 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3399 && (code == EQ || code == NE || code == LT || code == GE)
3400 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3401 || GET_CODE (x) == NEG))
3404 /* A compare with a shifted operand. Because of canonicalization,
3405 the comparison will have to be swapped when we emit the assembly
3407 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3408 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3409 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3410 || GET_CODE (x) == LSHIFTRT
3411 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3414 /* Similarly for a negated operand, but we can only do this for
3416 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3417 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3418 && (code == EQ || code == NE)
3419 && GET_CODE (x) == NEG)
3422 /* A compare of a mode narrower than SI mode against zero can be done
3423 by extending the value in the comparison. */
3424 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3426 /* Only use sign-extension if we really need it. */
3427 return ((code == GT || code == GE || code == LE || code == LT)
3428 ? CC_SESWPmode : CC_ZESWPmode);
3430 /* For everything else, return CCmode. */
3435 aarch64_get_condition_code (rtx x)
3437 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3438 enum rtx_code comp_code = GET_CODE (x);
3440 if (GET_MODE_CLASS (mode) != MODE_CC)
3441 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3449 case GE: return AARCH64_GE;
3450 case GT: return AARCH64_GT;
3451 case LE: return AARCH64_LS;
3452 case LT: return AARCH64_MI;
3453 case NE: return AARCH64_NE;
3454 case EQ: return AARCH64_EQ;
3455 case ORDERED: return AARCH64_VC;
3456 case UNORDERED: return AARCH64_VS;
3457 case UNLT: return AARCH64_LT;
3458 case UNLE: return AARCH64_LE;
3459 case UNGT: return AARCH64_HI;
3460 case UNGE: return AARCH64_PL;
3461 default: gcc_unreachable ();
3468 case NE: return AARCH64_NE;
3469 case EQ: return AARCH64_EQ;
3470 case GE: return AARCH64_GE;
3471 case GT: return AARCH64_GT;
3472 case LE: return AARCH64_LE;
3473 case LT: return AARCH64_LT;
3474 case GEU: return AARCH64_CS;
3475 case GTU: return AARCH64_HI;
3476 case LEU: return AARCH64_LS;
3477 case LTU: return AARCH64_CC;
3478 default: gcc_unreachable ();
3487 case NE: return AARCH64_NE;
3488 case EQ: return AARCH64_EQ;
3489 case GE: return AARCH64_LE;
3490 case GT: return AARCH64_LT;
3491 case LE: return AARCH64_GE;
3492 case LT: return AARCH64_GT;
3493 case GEU: return AARCH64_LS;
3494 case GTU: return AARCH64_CC;
3495 case LEU: return AARCH64_CS;
3496 case LTU: return AARCH64_HI;
3497 default: gcc_unreachable ();
3504 case NE: return AARCH64_NE;
3505 case EQ: return AARCH64_EQ;
3506 case GE: return AARCH64_PL;
3507 case LT: return AARCH64_MI;
3508 default: gcc_unreachable ();
3515 case NE: return AARCH64_NE;
3516 case EQ: return AARCH64_EQ;
3517 default: gcc_unreachable ();
3528 bit_count (unsigned HOST_WIDE_INT value)
3542 aarch64_print_operand (FILE *f, rtx x, char code)
3546 /* An integer or symbol address without a preceding # sign. */
3548 switch (GET_CODE (x))
3551 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3555 output_addr_const (f, x);
3559 if (GET_CODE (XEXP (x, 0)) == PLUS
3560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3562 output_addr_const (f, x);
3568 output_operand_lossage ("Unsupported operand for code '%c'", code);
3573 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3577 if (GET_CODE (x) != CONST_INT
3578 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3580 output_operand_lossage ("invalid operand for '%%%c'", code);
3596 output_operand_lossage ("invalid operand for '%%%c'", code);
3606 /* Print N such that 2^N == X. */
3607 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3609 output_operand_lossage ("invalid operand for '%%%c'", code);
3613 asm_fprintf (f, "%d", n);
3618 /* Print the number of non-zero bits in X (a const_int). */
3619 if (GET_CODE (x) != CONST_INT)
3621 output_operand_lossage ("invalid operand for '%%%c'", code);
3625 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3629 /* Print the higher numbered register of a pair (TImode) of regs. */
3630 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3632 output_operand_lossage ("invalid operand for '%%%c'", code);
3636 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3640 /* Print a condition (eq, ne, etc). */
3642 /* CONST_TRUE_RTX means always -- that's the default. */
3643 if (x == const_true_rtx)
3646 if (!COMPARISON_P (x))
3648 output_operand_lossage ("invalid operand for '%%%c'", code);
3652 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3656 /* Print the inverse of a condition (eq <-> ne, etc). */
3658 /* CONST_TRUE_RTX means never -- that's the default. */
3659 if (x == const_true_rtx)
3665 if (!COMPARISON_P (x))
3667 output_operand_lossage ("invalid operand for '%%%c'", code);
3671 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3672 (aarch64_get_condition_code (x))], f);
3680 /* Print a scalar FP/SIMD register name. */
3681 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3683 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3686 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3693 /* Print the first FP/SIMD register name in a list. */
3694 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3696 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3699 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3703 /* Print bottom 16 bits of integer constant in hex. */
3704 if (GET_CODE (x) != CONST_INT)
3706 output_operand_lossage ("invalid operand for '%%%c'", code);
3709 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3714 /* Print a general register name or the zero register (32-bit or
3717 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3719 asm_fprintf (f, "%czr", code);
3723 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3725 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3729 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3731 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3738 /* Print a normal operand, if it's a general register, then we
3742 output_operand_lossage ("missing operand");
3746 switch (GET_CODE (x))
3749 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3753 aarch64_memory_reference_mode = GET_MODE (x);
3754 output_address (XEXP (x, 0));
3759 output_addr_const (asm_out_file, x);
3763 asm_fprintf (f, "%wd", INTVAL (x));
3767 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3769 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3771 HOST_WIDE_INT_MAX));
3772 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3774 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3783 /* CONST_DOUBLE can represent a double-width integer.
3784 In this case, the mode of x is VOIDmode. */
3785 if (GET_MODE (x) == VOIDmode)
3787 else if (aarch64_float_const_zero_rtx_p (x))
3792 else if (aarch64_float_const_representable_p (x))
3795 char float_buf[buf_size] = {'\0'};
3797 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3798 real_to_decimal_for_mode (float_buf, &r,
3801 asm_fprintf (asm_out_file, "%s", float_buf);
3805 output_operand_lossage ("invalid constant");
3808 output_operand_lossage ("invalid operand");
3814 if (GET_CODE (x) == HIGH)
3817 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3819 case SYMBOL_SMALL_GOT:
3820 asm_fprintf (asm_out_file, ":got:");
3823 case SYMBOL_SMALL_TLSGD:
3824 asm_fprintf (asm_out_file, ":tlsgd:");
3827 case SYMBOL_SMALL_TLSDESC:
3828 asm_fprintf (asm_out_file, ":tlsdesc:");
3831 case SYMBOL_SMALL_GOTTPREL:
3832 asm_fprintf (asm_out_file, ":gottprel:");
3835 case SYMBOL_SMALL_TPREL:
3836 asm_fprintf (asm_out_file, ":tprel:");
3839 case SYMBOL_TINY_GOT:
3846 output_addr_const (asm_out_file, x);
3850 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3852 case SYMBOL_SMALL_GOT:
3853 asm_fprintf (asm_out_file, ":lo12:");
3856 case SYMBOL_SMALL_TLSGD:
3857 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3860 case SYMBOL_SMALL_TLSDESC:
3861 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3864 case SYMBOL_SMALL_GOTTPREL:
3865 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3868 case SYMBOL_SMALL_TPREL:
3869 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3872 case SYMBOL_TINY_GOT:
3873 asm_fprintf (asm_out_file, ":got:");
3879 output_addr_const (asm_out_file, x);
3884 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3886 case SYMBOL_SMALL_TPREL:
3887 asm_fprintf (asm_out_file, ":tprel_hi12:");
3892 output_addr_const (asm_out_file, x);
3896 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3902 aarch64_print_operand_address (FILE *f, rtx x)
3904 struct aarch64_address_info addr;
3906 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3910 case ADDRESS_REG_IMM:
3911 if (addr.offset == const0_rtx)
3912 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3914 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3915 INTVAL (addr.offset));
3918 case ADDRESS_REG_REG:
3919 if (addr.shift == 0)
3920 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3921 reg_names [REGNO (addr.offset)]);
3923 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3924 reg_names [REGNO (addr.offset)], addr.shift);
3927 case ADDRESS_REG_UXTW:
3928 if (addr.shift == 0)
3929 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3930 REGNO (addr.offset) - R0_REGNUM);
3932 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3933 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3936 case ADDRESS_REG_SXTW:
3937 if (addr.shift == 0)
3938 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3939 REGNO (addr.offset) - R0_REGNUM);
3941 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3942 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3945 case ADDRESS_REG_WB:
3946 switch (GET_CODE (x))
3949 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3950 GET_MODE_SIZE (aarch64_memory_reference_mode));
3953 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3954 GET_MODE_SIZE (aarch64_memory_reference_mode));
3957 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3958 GET_MODE_SIZE (aarch64_memory_reference_mode));
3961 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3962 GET_MODE_SIZE (aarch64_memory_reference_mode));
3965 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3966 INTVAL (addr.offset));
3969 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3970 INTVAL (addr.offset));
3977 case ADDRESS_LO_SUM:
3978 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3979 output_addr_const (f, addr.offset);
3980 asm_fprintf (f, "]");
3983 case ADDRESS_SYMBOLIC:
3987 output_addr_const (f, x);
3991 aarch64_label_mentioned_p (rtx x)
3996 if (GET_CODE (x) == LABEL_REF)
3999 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4000 referencing instruction, but they are constant offsets, not
4002 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4005 fmt = GET_RTX_FORMAT (GET_CODE (x));
4006 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4012 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4013 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4016 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4023 /* Implement REGNO_REG_CLASS. */
4026 aarch64_regno_regclass (unsigned regno)
4028 if (GP_REGNUM_P (regno))
4031 if (regno == SP_REGNUM)
4034 if (regno == FRAME_POINTER_REGNUM
4035 || regno == ARG_POINTER_REGNUM)
4036 return POINTER_REGS;
4038 if (FP_REGNUM_P (regno))
4039 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4044 /* Try a machine-dependent way of reloading an illegitimate address
4045 operand. If we find one, push the reload and return the new rtx. */
4048 aarch64_legitimize_reload_address (rtx *x_p,
4049 enum machine_mode mode,
4050 int opnum, int type,
4051 int ind_levels ATTRIBUTE_UNUSED)
4055 /* Do not allow mem (plus (reg, const)) if vector mode. */
4056 if (aarch64_vector_mode_p (mode)
4057 && GET_CODE (x) == PLUS
4058 && REG_P (XEXP (x, 0))
4059 && CONST_INT_P (XEXP (x, 1)))
4063 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4064 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4065 opnum, (enum reload_type) type);
4069 /* We must recognize output that we have already generated ourselves. */
4070 if (GET_CODE (x) == PLUS
4071 && GET_CODE (XEXP (x, 0)) == PLUS
4072 && REG_P (XEXP (XEXP (x, 0), 0))
4073 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4074 && CONST_INT_P (XEXP (x, 1)))
4076 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4077 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4078 opnum, (enum reload_type) type);
4082 /* We wish to handle large displacements off a base register by splitting
4083 the addend across an add and the mem insn. This can cut the number of
4084 extra insns needed from 3 to 1. It is only useful for load/store of a
4085 single register with 12 bit offset field. */
4086 if (GET_CODE (x) == PLUS
4087 && REG_P (XEXP (x, 0))
4088 && CONST_INT_P (XEXP (x, 1))
4089 && HARD_REGISTER_P (XEXP (x, 0))
4092 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4094 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4095 HOST_WIDE_INT low = val & 0xfff;
4096 HOST_WIDE_INT high = val - low;
4099 enum machine_mode xmode = GET_MODE (x);
4101 /* In ILP32, xmode can be either DImode or SImode. */
4102 gcc_assert (xmode == DImode || xmode == SImode);
4104 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4105 BLKmode alignment. */
4106 if (GET_MODE_SIZE (mode) == 0)
4109 offs = low % GET_MODE_SIZE (mode);
4111 /* Align misaligned offset by adjusting high part to compensate. */
4114 if (aarch64_uimm12_shift (high + offs))
4123 offs = GET_MODE_SIZE (mode) - offs;
4125 high = high + (low & 0x1000) - offs;
4130 /* Check for overflow. */
4131 if (high + low != val)
4134 cst = GEN_INT (high);
4135 if (!aarch64_uimm12_shift (high))
4136 cst = force_const_mem (xmode, cst);
4138 /* Reload high part into base reg, leaving the low part
4139 in the mem instruction.
4140 Note that replacing this gen_rtx_PLUS with plus_constant is
4141 wrong in this case because we rely on the
4142 (plus (plus reg c1) c2) structure being preserved so that
4143 XEXP (*p, 0) in push_reload below uses the correct term. */
4144 x = gen_rtx_PLUS (xmode,
4145 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4148 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4149 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4150 opnum, (enum reload_type) type);
4159 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4161 enum machine_mode mode,
4162 secondary_reload_info *sri)
4164 /* Without the TARGET_SIMD instructions we cannot move a Q register
4165 to a Q register directly. We need a scratch. */
4166 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4167 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4168 && reg_class_subset_p (rclass, FP_REGS))
4171 sri->icode = CODE_FOR_aarch64_reload_movtf;
4172 else if (mode == TImode)
4173 sri->icode = CODE_FOR_aarch64_reload_movti;
4177 /* A TFmode or TImode memory access should be handled via an FP_REGS
4178 because AArch64 has richer addressing modes for LDR/STR instructions
4179 than LDP/STP instructions. */
4180 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4181 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4184 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4191 aarch64_can_eliminate (const int from, const int to)
4193 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4194 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4196 if (frame_pointer_needed)
4198 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4200 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4202 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4203 && !cfun->calls_alloca)
4205 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4215 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4217 HOST_WIDE_INT frame_size;
4218 HOST_WIDE_INT offset;
4220 aarch64_layout_frame ();
4221 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4222 + crtl->outgoing_args_size
4223 + cfun->machine->saved_varargs_size);
4225 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4226 offset = frame_size;
4228 if (to == HARD_FRAME_POINTER_REGNUM)
4230 if (from == ARG_POINTER_REGNUM)
4231 return offset - crtl->outgoing_args_size;
4233 if (from == FRAME_POINTER_REGNUM)
4234 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4237 if (to == STACK_POINTER_REGNUM)
4239 if (from == FRAME_POINTER_REGNUM)
4241 HOST_WIDE_INT elim = crtl->outgoing_args_size
4242 + cfun->machine->frame.saved_regs_size
4244 - cfun->machine->frame.fp_lr_offset;
4245 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4254 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4258 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4262 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4267 aarch64_asm_trampoline_template (FILE *f)
4271 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4272 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4276 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4277 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4279 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4280 assemble_aligned_integer (4, const0_rtx);
4281 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4282 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4286 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4288 rtx fnaddr, mem, a_tramp;
4289 const int tramp_code_sz = 16;
4291 /* Don't need to copy the trailing D-words, we fill those in below. */
4292 emit_block_move (m_tramp, assemble_trampoline_template (),
4293 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4294 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4295 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4296 if (GET_MODE (fnaddr) != ptr_mode)
4297 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4298 emit_move_insn (mem, fnaddr);
4300 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4301 emit_move_insn (mem, chain_value);
4303 /* XXX We should really define a "clear_cache" pattern and use
4304 gen_clear_cache(). */
4305 a_tramp = XEXP (m_tramp, 0);
4306 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4307 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4308 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4312 static unsigned char
4313 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4324 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4325 (GET_MODE_SIZE (mode) + 7) / 8;
4339 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4341 if (regclass == POINTER_REGS)
4342 return GENERAL_REGS;
4344 if (regclass == STACK_REG)
4347 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4353 /* If it's an integer immediate that MOVI can't handle, then
4354 FP_REGS is not an option, so we return NO_REGS instead. */
4355 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4356 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4359 /* Register eliminiation can result in a request for
4360 SP+constant->FP_REGS. We cannot support such operations which
4361 use SP as source and an FP_REG as destination, so reject out
4363 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4365 rtx lhs = XEXP (x, 0);
4367 /* Look through a possible SUBREG introduced by ILP32. */
4368 if (GET_CODE (lhs) == SUBREG)
4369 lhs = SUBREG_REG (lhs);
4371 gcc_assert (REG_P (lhs));
4372 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4381 aarch64_asm_output_labelref (FILE* f, const char *name)
4383 asm_fprintf (f, "%U%s", name);
4387 aarch64_elf_asm_constructor (rtx symbol, int priority)
4389 if (priority == DEFAULT_INIT_PRIORITY)
4390 default_ctor_section_asm_out_constructor (symbol, priority);
4395 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4396 s = get_section (buf, SECTION_WRITE, NULL);
4397 switch_to_section (s);
4398 assemble_align (POINTER_SIZE);
4399 assemble_aligned_integer (POINTER_BYTES, symbol);
4404 aarch64_elf_asm_destructor (rtx symbol, int priority)
4406 if (priority == DEFAULT_INIT_PRIORITY)
4407 default_dtor_section_asm_out_destructor (symbol, priority);
4412 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4413 s = get_section (buf, SECTION_WRITE, NULL);
4414 switch_to_section (s);
4415 assemble_align (POINTER_SIZE);
4416 assemble_aligned_integer (POINTER_BYTES, symbol);
4421 aarch64_output_casesi (rtx *operands)
4425 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4427 static const char *const patterns[4][2] =
4430 "ldrb\t%w3, [%0,%w1,uxtw]",
4431 "add\t%3, %4, %w3, sxtb #2"
4434 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4435 "add\t%3, %4, %w3, sxth #2"
4438 "ldr\t%w3, [%0,%w1,uxtw #2]",
4439 "add\t%3, %4, %w3, sxtw #2"
4441 /* We assume that DImode is only generated when not optimizing and
4442 that we don't really need 64-bit address offsets. That would
4443 imply an object file with 8GB of code in a single function! */
4445 "ldr\t%w3, [%0,%w1,uxtw #2]",
4446 "add\t%3, %4, %w3, sxtw #2"
4450 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4452 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4454 gcc_assert (index >= 0 && index <= 3);
4456 /* Need to implement table size reduction, by chaning the code below. */
4457 output_asm_insn (patterns[index][0], operands);
4458 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4459 snprintf (buf, sizeof (buf),
4460 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4461 output_asm_insn (buf, operands);
4462 output_asm_insn (patterns[index][1], operands);
4463 output_asm_insn ("br\t%3", operands);
4464 assemble_label (asm_out_file, label);
4469 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4470 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4474 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4476 if (shift >= 0 && shift <= 3)
4479 for (size = 8; size <= 32; size *= 2)
4481 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4482 if (mask == bits << shift)
4490 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4491 const_rtx x ATTRIBUTE_UNUSED)
4493 /* We can't use blocks for constants when we're using a per-function
4499 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4500 rtx x ATTRIBUTE_UNUSED,
4501 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4503 /* Force all constant pool entries into the current function section. */
4504 return function_section (current_function_decl);
4510 /* Helper function for rtx cost calculation. Strip a shift expression
4511 from X. Returns the inner operand if successful, or the original
4512 expression on failure. */
4514 aarch64_strip_shift (rtx x)
4518 if ((GET_CODE (op) == ASHIFT
4519 || GET_CODE (op) == ASHIFTRT
4520 || GET_CODE (op) == LSHIFTRT)
4521 && CONST_INT_P (XEXP (op, 1)))
4522 return XEXP (op, 0);
4524 if (GET_CODE (op) == MULT
4525 && CONST_INT_P (XEXP (op, 1))
4526 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4527 return XEXP (op, 0);
4532 /* Helper function for rtx cost calculation. Strip an extend
4533 expression from X. Returns the inner operand if successful, or the
4534 original expression on failure. We deal with a number of possible
4535 canonicalization variations here. */
4537 aarch64_strip_extend (rtx x)
4541 /* Zero and sign extraction of a widened value. */
4542 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4543 && XEXP (op, 2) == const0_rtx
4544 && GET_CODE (XEXP (op, 0)) == MULT
4545 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4547 return XEXP (XEXP (op, 0), 0);
4549 /* It can also be represented (for zero-extend) as an AND with an
4551 if (GET_CODE (op) == AND
4552 && GET_CODE (XEXP (op, 0)) == MULT
4553 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4554 && CONST_INT_P (XEXP (op, 1))
4555 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4556 INTVAL (XEXP (op, 1))) != 0)
4557 return XEXP (XEXP (op, 0), 0);
4559 /* Now handle extended register, as this may also have an optional
4560 left shift by 1..4. */
4561 if (GET_CODE (op) == ASHIFT
4562 && CONST_INT_P (XEXP (op, 1))
4563 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4566 if (GET_CODE (op) == ZERO_EXTEND
4567 || GET_CODE (op) == SIGN_EXTEND)
4576 /* Helper function for rtx cost calculation. Calculate the cost of
4577 a MULT, which may be part of a multiply-accumulate rtx. Return
4578 the calculated cost of the expression, recursing manually in to
4579 operands where needed. */
4582 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4585 const struct cpu_cost_table *extra_cost
4586 = aarch64_tune_params->insn_extra_cost;
4588 bool maybe_fma = (outer == PLUS || outer == MINUS);
4589 enum machine_mode mode = GET_MODE (x);
4591 gcc_checking_assert (code == MULT);
4596 if (VECTOR_MODE_P (mode))
4597 mode = GET_MODE_INNER (mode);
4599 /* Integer multiply/fma. */
4600 if (GET_MODE_CLASS (mode) == MODE_INT)
4602 /* The multiply will be canonicalized as a shift, cost it as such. */
4603 if (CONST_INT_P (op1)
4604 && exact_log2 (INTVAL (op1)) > 0)
4609 /* ADD (shifted register). */
4610 cost += extra_cost->alu.arith_shift;
4612 /* LSL (immediate). */
4613 cost += extra_cost->alu.shift;
4616 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4621 /* Integer multiplies or FMAs have zero/sign extending variants. */
4622 if ((GET_CODE (op0) == ZERO_EXTEND
4623 && GET_CODE (op1) == ZERO_EXTEND)
4624 || (GET_CODE (op0) == SIGN_EXTEND
4625 && GET_CODE (op1) == SIGN_EXTEND))
4627 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4633 /* MADD/SMADDL/UMADDL. */
4634 cost += extra_cost->mult[0].extend_add;
4636 /* MUL/SMULL/UMULL. */
4637 cost += extra_cost->mult[0].extend;
4643 /* This is either an integer multiply or an FMA. In both cases
4644 we want to recurse and cost the operands. */
4645 cost += rtx_cost (op0, MULT, 0, speed)
4646 + rtx_cost (op1, MULT, 1, speed);
4652 cost += extra_cost->mult[mode == DImode].add;
4655 cost += extra_cost->mult[mode == DImode].simple;
4664 /* Floating-point FMA can also support negations of the
4666 if (GET_CODE (op0) == NEG)
4669 op0 = XEXP (op0, 0);
4671 if (GET_CODE (op1) == NEG)
4674 op1 = XEXP (op1, 0);
4678 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4679 cost += extra_cost->fp[mode == DFmode].fma;
4682 cost += extra_cost->fp[mode == DFmode].mult;
4685 cost += rtx_cost (op0, MULT, 0, speed)
4686 + rtx_cost (op1, MULT, 1, speed);
4692 aarch64_address_cost (rtx x,
4693 enum machine_mode mode,
4694 addr_space_t as ATTRIBUTE_UNUSED,
4697 enum rtx_code c = GET_CODE (x);
4698 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4699 struct aarch64_address_info info;
4703 if (!aarch64_classify_address (&info, x, mode, c, false))
4705 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4707 /* This is a CONST or SYMBOL ref which will be split
4708 in a different way depending on the code model in use.
4709 Cost it through the generic infrastructure. */
4710 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4711 /* Divide through by the cost of one instruction to
4712 bring it to the same units as the address costs. */
4713 cost_symbol_ref /= COSTS_N_INSNS (1);
4714 /* The cost is then the cost of preparing the address,
4715 followed by an immediate (possibly 0) offset. */
4716 return cost_symbol_ref + addr_cost->imm_offset;
4720 /* This is most likely a jump table from a case
4722 return addr_cost->register_offset;
4728 case ADDRESS_LO_SUM:
4729 case ADDRESS_SYMBOLIC:
4730 case ADDRESS_REG_IMM:
4731 cost += addr_cost->imm_offset;
4734 case ADDRESS_REG_WB:
4735 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4736 cost += addr_cost->pre_modify;
4737 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4738 cost += addr_cost->post_modify;
4744 case ADDRESS_REG_REG:
4745 cost += addr_cost->register_offset;
4748 case ADDRESS_REG_UXTW:
4749 case ADDRESS_REG_SXTW:
4750 cost += addr_cost->register_extend;
4760 /* For the sake of calculating the cost of the shifted register
4761 component, we can treat same sized modes in the same way. */
4762 switch (GET_MODE_BITSIZE (mode))
4765 cost += addr_cost->addr_scale_costs.hi;
4769 cost += addr_cost->addr_scale_costs.si;
4773 cost += addr_cost->addr_scale_costs.di;
4776 /* We can't tell, or this is a 128-bit vector. */
4778 cost += addr_cost->addr_scale_costs.ti;
4786 /* Calculate the cost of calculating X, storing it in *COST. Result
4787 is true if the total cost of the operation has now been calculated. */
4789 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4790 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4793 const struct cpu_cost_table *extra_cost
4794 = aarch64_tune_params->insn_extra_cost;
4795 enum machine_mode mode = GET_MODE (x);
4797 /* By default, assume that everything has equivalent cost to the
4798 cheapest instruction. Any additional costs are applied as a delta
4799 above this default. */
4800 *cost = COSTS_N_INSNS (1);
4802 /* TODO: The cost infrastructure currently does not handle
4803 vector operations. Assume that all vector operations
4804 are equally expensive. */
4805 if (VECTOR_MODE_P (mode))
4808 *cost += extra_cost->vect.alu;
4815 /* The cost depends entirely on the operands to SET. */
4820 switch (GET_CODE (op0))
4825 rtx address = XEXP (op0, 0);
4826 if (GET_MODE_CLASS (mode) == MODE_INT)
4827 *cost += extra_cost->ldst.store;
4828 else if (mode == SFmode)
4829 *cost += extra_cost->ldst.storef;
4830 else if (mode == DFmode)
4831 *cost += extra_cost->ldst.stored;
4834 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4838 *cost += rtx_cost (op1, SET, 1, speed);
4842 if (! REG_P (SUBREG_REG (op0)))
4843 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4847 /* const0_rtx is in general free, but we will use an
4848 instruction to set a register to 0. */
4849 if (REG_P (op1) || op1 == const0_rtx)
4851 /* The cost is 1 per register copied. */
4852 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4854 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4857 /* Cost is just the cost of the RHS of the set. */
4858 *cost += rtx_cost (op1, SET, 1, speed);
4863 /* Bit-field insertion. Strip any redundant widening of
4864 the RHS to meet the width of the target. */
4865 if (GET_CODE (op1) == SUBREG)
4866 op1 = SUBREG_REG (op1);
4867 if ((GET_CODE (op1) == ZERO_EXTEND
4868 || GET_CODE (op1) == SIGN_EXTEND)
4869 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4870 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4871 >= INTVAL (XEXP (op0, 1))))
4872 op1 = XEXP (op1, 0);
4874 if (CONST_INT_P (op1))
4876 /* MOV immediate is assumed to always be cheap. */
4877 *cost = COSTS_N_INSNS (1);
4883 *cost += extra_cost->alu.bfi;
4884 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4890 /* We can't make sense of this, assume default cost. */
4891 *cost = COSTS_N_INSNS (1);
4897 /* If an instruction can incorporate a constant within the
4898 instruction, the instruction's expression avoids calling
4899 rtx_cost() on the constant. If rtx_cost() is called on a
4900 constant, then it is usually because the constant must be
4901 moved into a register by one or more instructions.
4903 The exception is constant 0, which can be expressed
4904 as XZR/WZR and is therefore free. The exception to this is
4905 if we have (set (reg) (const0_rtx)) in which case we must cost
4906 the move. However, we can catch that when we cost the SET, so
4907 we don't need to consider that here. */
4908 if (x == const0_rtx)
4912 /* To an approximation, building any other constant is
4913 proportionally expensive to the number of instructions
4914 required to build that constant. This is true whether we
4915 are compiling for SPEED or otherwise. */
4916 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4925 /* mov[df,sf]_aarch64. */
4926 if (aarch64_float_const_representable_p (x))
4927 /* FMOV (scalar immediate). */
4928 *cost += extra_cost->fp[mode == DFmode].fpconst;
4929 else if (!aarch64_float_const_zero_rtx_p (x))
4931 /* This will be a load from memory. */
4933 *cost += extra_cost->ldst.loadd;
4935 *cost += extra_cost->ldst.loadf;
4938 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4939 or MOV v0.s[0], wzr - neither of which are modeled by the
4940 cost tables. Just use the default cost. */
4950 /* For loads we want the base cost of a load, plus an
4951 approximation for the additional cost of the addressing
4953 rtx address = XEXP (x, 0);
4954 if (GET_MODE_CLASS (mode) == MODE_INT)
4955 *cost += extra_cost->ldst.load;
4956 else if (mode == SFmode)
4957 *cost += extra_cost->ldst.loadf;
4958 else if (mode == DFmode)
4959 *cost += extra_cost->ldst.loadd;
4962 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4971 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4973 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4974 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4977 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
4981 /* Cost this as SUB wzr, X. */
4982 op0 = CONST0_RTX (GET_MODE (x));
4987 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4989 /* Support (neg(fma...)) as a single instruction only if
4990 sign of zeros is unimportant. This matches the decision
4991 making in aarch64.md. */
4992 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
4995 *cost = rtx_cost (op0, NEG, 0, speed);
5000 *cost += extra_cost->fp[mode == DFmode].neg;
5010 if (op1 == const0_rtx
5011 && GET_CODE (op0) == AND)
5017 /* Comparisons can work if the order is swapped.
5018 Canonicalization puts the more complex operation first, but
5019 we want it in op1. */
5021 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5034 /* Detect valid immediates. */
5035 if ((GET_MODE_CLASS (mode) == MODE_INT
5036 || (GET_MODE_CLASS (mode) == MODE_CC
5037 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5038 && CONST_INT_P (op1)
5039 && aarch64_uimm12_shift (INTVAL (op1)))
5041 *cost += rtx_cost (op0, MINUS, 0, speed);
5044 /* SUB(S) (immediate). */
5045 *cost += extra_cost->alu.arith;
5050 rtx new_op1 = aarch64_strip_extend (op1);
5052 /* Cost this as an FMA-alike operation. */
5053 if ((GET_CODE (new_op1) == MULT
5054 || GET_CODE (new_op1) == ASHIFT)
5057 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5058 (enum rtx_code) code,
5060 *cost += rtx_cost (op0, MINUS, 0, speed);
5064 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5068 if (GET_MODE_CLASS (mode) == MODE_INT)
5070 *cost += extra_cost->alu.arith;
5071 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5073 *cost += extra_cost->fp[mode == DFmode].addsub;
5085 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5086 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5089 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5090 *cost += rtx_cost (op1, PLUS, 1, speed);
5094 if (GET_MODE_CLASS (mode) == MODE_INT
5095 && CONST_INT_P (op1)
5096 && aarch64_uimm12_shift (INTVAL (op1)))
5098 *cost += rtx_cost (op0, PLUS, 0, speed);
5101 /* ADD (immediate). */
5102 *cost += extra_cost->alu.arith;
5106 /* Strip any extend, leave shifts behind as we will
5107 cost them through mult_cost. */
5108 new_op0 = aarch64_strip_extend (op0);
5110 if (GET_CODE (new_op0) == MULT
5111 || GET_CODE (new_op0) == ASHIFT)
5113 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5115 *cost += rtx_cost (op1, PLUS, 1, speed);
5119 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5120 + rtx_cost (op1, PLUS, 1, speed));
5124 if (GET_MODE_CLASS (mode) == MODE_INT)
5126 *cost += extra_cost->alu.arith;
5127 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5129 *cost += extra_cost->fp[mode == DFmode].addsub;
5142 && GET_CODE (op0) == MULT
5143 && CONST_INT_P (XEXP (op0, 1))
5144 && CONST_INT_P (op1)
5145 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5148 /* This is a UBFM/SBFM. */
5149 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5151 *cost += extra_cost->alu.bfx;
5155 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5157 /* We possibly get the immediate for free, this is not
5159 if (CONST_INT_P (op1)
5160 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5162 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5165 *cost += extra_cost->alu.logical;
5173 /* Handle ORN, EON, or BIC. */
5174 if (GET_CODE (op0) == NOT)
5175 op0 = XEXP (op0, 0);
5177 new_op0 = aarch64_strip_shift (op0);
5179 /* If we had a shift on op0 then this is a logical-shift-
5180 by-register/immediate operation. Otherwise, this is just
5181 a logical operation. */
5186 /* Shift by immediate. */
5187 if (CONST_INT_P (XEXP (op0, 1)))
5188 *cost += extra_cost->alu.log_shift;
5190 *cost += extra_cost->alu.log_shift_reg;
5193 *cost += extra_cost->alu.logical;
5196 /* In both cases we want to cost both operands. */
5197 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5198 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5208 *cost += extra_cost->alu.logical;
5210 /* The logical instruction could have the shifted register form,
5211 but the cost is the same if the shift is processed as a separate
5212 instruction, so we don't bother with it here. */
5216 if ((GET_MODE (x) == DImode
5217 && GET_MODE (XEXP (x, 0)) == SImode)
5218 || GET_CODE (XEXP (x, 0)) == MEM)
5220 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5226 if (GET_CODE (XEXP (x, 0)) == MEM)
5228 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
5234 if (!CONST_INT_P (XEXP (x, 1)))
5235 *cost += COSTS_N_INSNS (2);
5242 /* Shifting by a register often takes an extra cycle. */
5243 if (speed && !CONST_INT_P (XEXP (x, 1)))
5244 *cost += extra_cost->alu.arith_shift_reg;
5246 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5250 if (!CONSTANT_P (XEXP (x, 0)))
5251 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5255 if (!CONSTANT_P (XEXP (x, 1)))
5256 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5257 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5262 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5266 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5267 /* aarch64_rtx_mult_cost always handles recursion to its
5273 *cost = COSTS_N_INSNS (2);
5276 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5277 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5278 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5279 else if (GET_MODE (x) == DFmode)
5280 *cost += (extra_cost->fp[1].mult
5281 + extra_cost->fp[1].div);
5282 else if (GET_MODE (x) == SFmode)
5283 *cost += (extra_cost->fp[0].mult
5284 + extra_cost->fp[0].div);
5286 return false; /* All arguments need to be in registers. */
5290 *cost = COSTS_N_INSNS (1);
5293 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5294 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5295 else if (GET_MODE (x) == DFmode)
5296 *cost += extra_cost->fp[1].div;
5297 else if (GET_MODE (x) == SFmode)
5298 *cost += extra_cost->fp[0].div;
5300 return false; /* All arguments need to be in registers. */
5308 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5309 calculated for X. This cost is stored in *COST. Returns true
5310 if the total cost of X was calculated. */
5312 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5313 int param, int *cost, bool speed)
5315 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5317 if (dump_file && (dump_flags & TDF_DETAILS))
5319 print_rtl_single (dump_file, x);
5320 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5321 speed ? "Hot" : "Cold",
5322 *cost, result ? "final" : "partial");
5329 aarch64_register_move_cost (enum machine_mode mode,
5330 reg_class_t from_i, reg_class_t to_i)
5332 enum reg_class from = (enum reg_class) from_i;
5333 enum reg_class to = (enum reg_class) to_i;
5334 const struct cpu_regmove_cost *regmove_cost
5335 = aarch64_tune_params->regmove_cost;
5337 /* Moving between GPR and stack cost is the same as GP2GP. */
5338 if ((from == GENERAL_REGS && to == STACK_REG)
5339 || (to == GENERAL_REGS && from == STACK_REG))
5340 return regmove_cost->GP2GP;
5342 /* To/From the stack register, we move via the gprs. */
5343 if (to == STACK_REG || from == STACK_REG)
5344 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5345 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5347 if (from == GENERAL_REGS && to == GENERAL_REGS)
5348 return regmove_cost->GP2GP;
5349 else if (from == GENERAL_REGS)
5350 return regmove_cost->GP2FP;
5351 else if (to == GENERAL_REGS)
5352 return regmove_cost->FP2GP;
5354 /* When AdvSIMD instructions are disabled it is not possible to move
5355 a 128-bit value directly between Q registers. This is handled in
5356 secondary reload. A general register is used as a scratch to move
5357 the upper DI value and the lower DI value is moved directly,
5358 hence the cost is the sum of three moves. */
5359 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5360 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5362 return regmove_cost->FP2FP;
5366 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5367 reg_class_t rclass ATTRIBUTE_UNUSED,
5368 bool in ATTRIBUTE_UNUSED)
5370 return aarch64_tune_params->memmov_cost;
5373 /* Return the number of instructions that can be issued per cycle. */
5375 aarch64_sched_issue_rate (void)
5377 return aarch64_tune_params->issue_rate;
5380 /* Vectorizer cost model target hooks. */
5382 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5384 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5386 int misalign ATTRIBUTE_UNUSED)
5390 switch (type_of_cost)
5393 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5396 return aarch64_tune_params->vec_costs->scalar_load_cost;
5399 return aarch64_tune_params->vec_costs->scalar_store_cost;
5402 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5405 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5408 return aarch64_tune_params->vec_costs->vec_store_cost;
5411 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5414 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5416 case unaligned_load:
5417 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5419 case unaligned_store:
5420 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5422 case cond_branch_taken:
5423 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5425 case cond_branch_not_taken:
5426 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5429 case vec_promote_demote:
5430 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5433 elements = TYPE_VECTOR_SUBPARTS (vectype);
5434 return elements / 2 + 1;
5441 /* Implement targetm.vectorize.add_stmt_cost. */
5443 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5444 struct _stmt_vec_info *stmt_info, int misalign,
5445 enum vect_cost_model_location where)
5447 unsigned *cost = (unsigned *) data;
5448 unsigned retval = 0;
5450 if (flag_vect_cost_model)
5452 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5454 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5456 /* Statements in an inner loop relative to the loop being
5457 vectorized are weighted more heavily. The value here is
5458 a function (linear for now) of the loop nest level. */
5459 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5461 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5462 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5463 unsigned nest_level = loop_depth (loop);
5465 count *= nest_level;
5468 retval = (unsigned) (count * stmt_cost);
5469 cost[where] += retval;
5475 static void initialize_aarch64_code_model (void);
5477 /* Parse the architecture extension string. */
5480 aarch64_parse_extension (char *str)
5482 /* The extension string is parsed left to right. */
5483 const struct aarch64_option_extension *opt = NULL;
5485 /* Flag to say whether we are adding or removing an extension. */
5486 int adding_ext = -1;
5488 while (str != NULL && *str != 0)
5494 ext = strchr (str, '+');
5501 if (len >= 2 && strncmp (str, "no", 2) == 0)
5512 error ("missing feature modifier after %qs", "+no");
5516 /* Scan over the extensions table trying to find an exact match. */
5517 for (opt = all_extensions; opt->name != NULL; opt++)
5519 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5521 /* Add or remove the extension. */
5523 aarch64_isa_flags |= opt->flags_on;
5525 aarch64_isa_flags &= ~(opt->flags_off);
5530 if (opt->name == NULL)
5532 /* Extension not found in list. */
5533 error ("unknown feature modifier %qs", str);
5543 /* Parse the ARCH string. */
5546 aarch64_parse_arch (void)
5549 const struct processor *arch;
5550 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5553 strcpy (str, aarch64_arch_string);
5555 ext = strchr (str, '+');
5564 error ("missing arch name in -march=%qs", str);
5568 /* Loop through the list of supported ARCHs to find a match. */
5569 for (arch = all_architectures; arch->name != NULL; arch++)
5571 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5573 selected_arch = arch;
5574 aarch64_isa_flags = selected_arch->flags;
5577 selected_cpu = &all_cores[selected_arch->core];
5581 /* ARCH string contains at least one extension. */
5582 aarch64_parse_extension (ext);
5585 if (strcmp (selected_arch->arch, selected_cpu->arch))
5587 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5588 selected_cpu->name, selected_arch->name);
5595 /* ARCH name not found in list. */
5596 error ("unknown value %qs for -march", str);
5600 /* Parse the CPU string. */
5603 aarch64_parse_cpu (void)
5606 const struct processor *cpu;
5607 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5610 strcpy (str, aarch64_cpu_string);
5612 ext = strchr (str, '+');
5621 error ("missing cpu name in -mcpu=%qs", str);
5625 /* Loop through the list of supported CPUs to find a match. */
5626 for (cpu = all_cores; cpu->name != NULL; cpu++)
5628 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5631 selected_tune = cpu;
5632 aarch64_isa_flags = selected_cpu->flags;
5636 /* CPU string contains at least one extension. */
5637 aarch64_parse_extension (ext);
5644 /* CPU name not found in list. */
5645 error ("unknown value %qs for -mcpu", str);
5649 /* Parse the TUNE string. */
5652 aarch64_parse_tune (void)
5654 const struct processor *cpu;
5655 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5656 strcpy (str, aarch64_tune_string);
5658 /* Loop through the list of supported CPUs to find a match. */
5659 for (cpu = all_cores; cpu->name != NULL; cpu++)
5661 if (strcmp (cpu->name, str) == 0)
5663 selected_tune = cpu;
5668 /* CPU name not found in list. */
5669 error ("unknown value %qs for -mtune", str);
5674 /* Implement TARGET_OPTION_OVERRIDE. */
5677 aarch64_override_options (void)
5679 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5680 If either of -march or -mtune is given, they override their
5681 respective component of -mcpu.
5683 So, first parse AARCH64_CPU_STRING, then the others, be careful
5684 with -march as, if -mcpu is not present on the command line, march
5685 must set a sensible default CPU. */
5686 if (aarch64_cpu_string)
5688 aarch64_parse_cpu ();
5691 if (aarch64_arch_string)
5693 aarch64_parse_arch ();
5696 if (aarch64_tune_string)
5698 aarch64_parse_tune ();
5701 #ifndef HAVE_AS_MABI_OPTION
5702 /* The compiler may have been configured with 2.23.* binutils, which does
5703 not have support for ILP32. */
5705 error ("Assembler does not support -mabi=ilp32");
5708 initialize_aarch64_code_model ();
5710 aarch64_build_bitmask_table ();
5712 /* This target defaults to strict volatile bitfields. */
5713 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5714 flag_strict_volatile_bitfields = 1;
5716 /* If the user did not specify a processor, choose the default
5717 one for them. This will be the CPU set during configuration using
5718 --with-cpu, otherwise it is "generic". */
5721 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5722 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5725 gcc_assert (selected_cpu);
5727 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5729 selected_tune = &all_cores[selected_cpu->core];
5731 aarch64_tune_flags = selected_tune->flags;
5732 aarch64_tune = selected_tune->core;
5733 aarch64_tune_params = selected_tune->tune;
5735 if (aarch64_fix_a53_err835769 == 2)
5737 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
5738 aarch64_fix_a53_err835769 = 1;
5740 aarch64_fix_a53_err835769 = 0;
5744 aarch64_override_options_after_change ();
5747 /* Implement targetm.override_options_after_change. */
5750 aarch64_override_options_after_change (void)
5752 if (flag_omit_frame_pointer)
5753 flag_omit_leaf_frame_pointer = false;
5754 else if (flag_omit_leaf_frame_pointer)
5755 flag_omit_frame_pointer = true;
5758 static struct machine_function *
5759 aarch64_init_machine_status (void)
5761 struct machine_function *machine;
5762 machine = ggc_alloc_cleared_machine_function ();
5767 aarch64_init_expanders (void)
5769 init_machine_status = aarch64_init_machine_status;
5772 /* A checking mechanism for the implementation of the various code models. */
5774 initialize_aarch64_code_model (void)
5778 switch (aarch64_cmodel_var)
5780 case AARCH64_CMODEL_TINY:
5781 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5783 case AARCH64_CMODEL_SMALL:
5784 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5786 case AARCH64_CMODEL_LARGE:
5787 sorry ("code model %qs with -f%s", "large",
5788 flag_pic > 1 ? "PIC" : "pic");
5794 aarch64_cmodel = aarch64_cmodel_var;
5797 /* Return true if SYMBOL_REF X binds locally. */
5800 aarch64_symbol_binds_local_p (const_rtx x)
5802 return (SYMBOL_REF_DECL (x)
5803 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5804 : SYMBOL_REF_LOCAL_P (x));
5807 /* Return true if SYMBOL_REF X is thread local */
5809 aarch64_tls_symbol_p (rtx x)
5811 if (! TARGET_HAVE_TLS)
5814 if (GET_CODE (x) != SYMBOL_REF)
5817 return SYMBOL_REF_TLS_MODEL (x) != 0;
5820 /* Classify a TLS symbol into one of the TLS kinds. */
5821 enum aarch64_symbol_type
5822 aarch64_classify_tls_symbol (rtx x)
5824 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5828 case TLS_MODEL_GLOBAL_DYNAMIC:
5829 case TLS_MODEL_LOCAL_DYNAMIC:
5830 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5832 case TLS_MODEL_INITIAL_EXEC:
5833 return SYMBOL_SMALL_GOTTPREL;
5835 case TLS_MODEL_LOCAL_EXEC:
5836 return SYMBOL_SMALL_TPREL;
5838 case TLS_MODEL_EMULATED:
5839 case TLS_MODEL_NONE:
5840 return SYMBOL_FORCE_TO_MEM;
5847 /* Return the method that should be used to access SYMBOL_REF or
5848 LABEL_REF X in context CONTEXT. */
5850 enum aarch64_symbol_type
5851 aarch64_classify_symbol (rtx x,
5852 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5854 if (GET_CODE (x) == LABEL_REF)
5856 switch (aarch64_cmodel)
5858 case AARCH64_CMODEL_LARGE:
5859 return SYMBOL_FORCE_TO_MEM;
5861 case AARCH64_CMODEL_TINY_PIC:
5862 case AARCH64_CMODEL_TINY:
5863 return SYMBOL_TINY_ABSOLUTE;
5865 case AARCH64_CMODEL_SMALL_PIC:
5866 case AARCH64_CMODEL_SMALL:
5867 return SYMBOL_SMALL_ABSOLUTE;
5874 if (GET_CODE (x) == SYMBOL_REF)
5876 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5877 return SYMBOL_FORCE_TO_MEM;
5879 if (aarch64_tls_symbol_p (x))
5880 return aarch64_classify_tls_symbol (x);
5882 switch (aarch64_cmodel)
5884 case AARCH64_CMODEL_TINY:
5885 if (SYMBOL_REF_WEAK (x))
5886 return SYMBOL_FORCE_TO_MEM;
5887 return SYMBOL_TINY_ABSOLUTE;
5889 case AARCH64_CMODEL_SMALL:
5890 if (SYMBOL_REF_WEAK (x))
5891 return SYMBOL_FORCE_TO_MEM;
5892 return SYMBOL_SMALL_ABSOLUTE;
5894 case AARCH64_CMODEL_TINY_PIC:
5895 if (!aarch64_symbol_binds_local_p (x))
5896 return SYMBOL_TINY_GOT;
5897 return SYMBOL_TINY_ABSOLUTE;
5899 case AARCH64_CMODEL_SMALL_PIC:
5900 if (!aarch64_symbol_binds_local_p (x))
5901 return SYMBOL_SMALL_GOT;
5902 return SYMBOL_SMALL_ABSOLUTE;
5909 /* By default push everything into the constant pool. */
5910 return SYMBOL_FORCE_TO_MEM;
5914 aarch64_constant_address_p (rtx x)
5916 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5920 aarch64_legitimate_pic_operand_p (rtx x)
5922 if (GET_CODE (x) == SYMBOL_REF
5923 || (GET_CODE (x) == CONST
5924 && GET_CODE (XEXP (x, 0)) == PLUS
5925 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5931 /* Return true if X holds either a quarter-precision or
5932 floating-point +0.0 constant. */
5934 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5936 if (!CONST_DOUBLE_P (x))
5939 /* TODO: We could handle moving 0.0 to a TFmode register,
5940 but first we would like to refactor the movtf_aarch64
5941 to be more amicable to split moves properly and
5942 correctly gate on TARGET_SIMD. For now - reject all
5943 constants which are not to SFmode or DFmode registers. */
5944 if (!(mode == SFmode || mode == DFmode))
5947 if (aarch64_float_const_zero_rtx_p (x))
5949 return aarch64_float_const_representable_p (x);
5953 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5955 /* Do not allow vector struct mode constants. We could support
5956 0 and -1 easily, but they need support in aarch64-simd.md. */
5957 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5960 /* This could probably go away because
5961 we now decompose CONST_INTs according to expand_mov_immediate. */
5962 if ((GET_CODE (x) == CONST_VECTOR
5963 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5964 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5965 return !targetm.cannot_force_const_mem (mode, x);
5967 if (GET_CODE (x) == HIGH
5968 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5971 return aarch64_constant_address_p (x);
5975 aarch64_load_tp (rtx target)
5978 || GET_MODE (target) != Pmode
5979 || !register_operand (target, Pmode))
5980 target = gen_reg_rtx (Pmode);
5982 /* Can return in any reg. */
5983 emit_insn (gen_aarch64_load_tp_hard (target));
5987 /* On AAPCS systems, this is the "struct __va_list". */
5988 static GTY(()) tree va_list_type;
5990 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5991 Return the type to use as __builtin_va_list.
5993 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6005 aarch64_build_builtin_va_list (void)
6008 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6010 /* Create the type. */
6011 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6012 /* Give it the required name. */
6013 va_list_name = build_decl (BUILTINS_LOCATION,
6015 get_identifier ("__va_list"),
6017 DECL_ARTIFICIAL (va_list_name) = 1;
6018 TYPE_NAME (va_list_type) = va_list_name;
6019 TYPE_STUB_DECL (va_list_type) = va_list_name;
6021 /* Create the fields. */
6022 f_stack = build_decl (BUILTINS_LOCATION,
6023 FIELD_DECL, get_identifier ("__stack"),
6025 f_grtop = build_decl (BUILTINS_LOCATION,
6026 FIELD_DECL, get_identifier ("__gr_top"),
6028 f_vrtop = build_decl (BUILTINS_LOCATION,
6029 FIELD_DECL, get_identifier ("__vr_top"),
6031 f_groff = build_decl (BUILTINS_LOCATION,
6032 FIELD_DECL, get_identifier ("__gr_offs"),
6034 f_vroff = build_decl (BUILTINS_LOCATION,
6035 FIELD_DECL, get_identifier ("__vr_offs"),
6038 DECL_ARTIFICIAL (f_stack) = 1;
6039 DECL_ARTIFICIAL (f_grtop) = 1;
6040 DECL_ARTIFICIAL (f_vrtop) = 1;
6041 DECL_ARTIFICIAL (f_groff) = 1;
6042 DECL_ARTIFICIAL (f_vroff) = 1;
6044 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6045 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6046 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6047 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6048 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6050 TYPE_FIELDS (va_list_type) = f_stack;
6051 DECL_CHAIN (f_stack) = f_grtop;
6052 DECL_CHAIN (f_grtop) = f_vrtop;
6053 DECL_CHAIN (f_vrtop) = f_groff;
6054 DECL_CHAIN (f_groff) = f_vroff;
6056 /* Compute its layout. */
6057 layout_type (va_list_type);
6059 return va_list_type;
6062 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6064 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6066 const CUMULATIVE_ARGS *cum;
6067 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6068 tree stack, grtop, vrtop, groff, vroff;
6070 int gr_save_area_size;
6071 int vr_save_area_size;
6074 cum = &crtl->args.info;
6076 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6078 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6080 if (TARGET_GENERAL_REGS_ONLY)
6082 if (cum->aapcs_nvrn > 0)
6083 sorry ("%qs and floating point or vector arguments",
6084 "-mgeneral-regs-only");
6085 vr_save_area_size = 0;
6088 f_stack = TYPE_FIELDS (va_list_type_node);
6089 f_grtop = DECL_CHAIN (f_stack);
6090 f_vrtop = DECL_CHAIN (f_grtop);
6091 f_groff = DECL_CHAIN (f_vrtop);
6092 f_vroff = DECL_CHAIN (f_groff);
6094 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6096 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6098 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6100 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6102 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6105 /* Emit code to initialize STACK, which points to the next varargs stack
6106 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6107 by named arguments. STACK is 8-byte aligned. */
6108 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6109 if (cum->aapcs_stack_size > 0)
6110 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6111 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6114 /* Emit code to initialize GRTOP, the top of the GR save area.
6115 virtual_incoming_args_rtx should have been 16 byte aligned. */
6116 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6117 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6118 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6120 /* Emit code to initialize VRTOP, the top of the VR save area.
6121 This address is gr_save_area_bytes below GRTOP, rounded
6122 down to the next 16-byte boundary. */
6123 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6124 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6125 STACK_BOUNDARY / BITS_PER_UNIT);
6128 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6129 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6130 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6132 /* Emit code to initialize GROFF, the offset from GRTOP of the
6133 next GPR argument. */
6134 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6135 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6136 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6138 /* Likewise emit code to initialize VROFF, the offset from FTOP
6139 of the next VR argument. */
6140 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6141 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6142 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6145 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6148 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6149 gimple_seq *post_p ATTRIBUTE_UNUSED)
6153 bool is_ha; /* is HFA or HVA. */
6154 bool dw_align; /* double-word align. */
6155 enum machine_mode ag_mode = VOIDmode;
6157 enum machine_mode mode;
6159 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6160 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6161 HOST_WIDE_INT size, rsize, adjust, align;
6162 tree t, u, cond1, cond2;
6164 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6166 type = build_pointer_type (type);
6168 mode = TYPE_MODE (type);
6170 f_stack = TYPE_FIELDS (va_list_type_node);
6171 f_grtop = DECL_CHAIN (f_stack);
6172 f_vrtop = DECL_CHAIN (f_grtop);
6173 f_groff = DECL_CHAIN (f_vrtop);
6174 f_vroff = DECL_CHAIN (f_groff);
6176 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6177 f_stack, NULL_TREE);
6178 size = int_size_in_bytes (type);
6179 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6183 if (aarch64_vfp_is_call_or_return_candidate (mode,
6189 /* TYPE passed in fp/simd registers. */
6190 if (TARGET_GENERAL_REGS_ONLY)
6191 sorry ("%qs and floating point or vector arguments",
6192 "-mgeneral-regs-only");
6194 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6195 unshare_expr (valist), f_vrtop, NULL_TREE);
6196 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6197 unshare_expr (valist), f_vroff, NULL_TREE);
6199 rsize = nregs * UNITS_PER_VREG;
6203 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6204 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6206 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6207 && size < UNITS_PER_VREG)
6209 adjust = UNITS_PER_VREG - size;
6214 /* TYPE passed in general registers. */
6215 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6216 unshare_expr (valist), f_grtop, NULL_TREE);
6217 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6218 unshare_expr (valist), f_groff, NULL_TREE);
6219 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6220 nregs = rsize / UNITS_PER_WORD;
6225 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6226 && size < UNITS_PER_WORD)
6228 adjust = UNITS_PER_WORD - size;
6232 /* Get a local temporary for the field value. */
6233 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6235 /* Emit code to branch if off >= 0. */
6236 t = build2 (GE_EXPR, boolean_type_node, off,
6237 build_int_cst (TREE_TYPE (off), 0));
6238 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6242 /* Emit: offs = (offs + 15) & -16. */
6243 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6244 build_int_cst (TREE_TYPE (off), 15));
6245 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6246 build_int_cst (TREE_TYPE (off), -16));
6247 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6252 /* Update ap.__[g|v]r_offs */
6253 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6254 build_int_cst (TREE_TYPE (off), rsize));
6255 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6259 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6261 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6262 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6263 build_int_cst (TREE_TYPE (f_off), 0));
6264 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6266 /* String up: make sure the assignment happens before the use. */
6267 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6268 COND_EXPR_ELSE (cond1) = t;
6270 /* Prepare the trees handling the argument that is passed on the stack;
6271 the top level node will store in ON_STACK. */
6272 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6275 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6276 t = fold_convert (intDI_type_node, arg);
6277 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6278 build_int_cst (TREE_TYPE (t), 15));
6279 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6280 build_int_cst (TREE_TYPE (t), -16));
6281 t = fold_convert (TREE_TYPE (arg), t);
6282 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6286 /* Advance ap.__stack */
6287 t = fold_convert (intDI_type_node, arg);
6288 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6289 build_int_cst (TREE_TYPE (t), size + 7));
6290 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6291 build_int_cst (TREE_TYPE (t), -8));
6292 t = fold_convert (TREE_TYPE (arg), t);
6293 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6294 /* String up roundup and advance. */
6296 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6297 /* String up with arg */
6298 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6299 /* Big-endianness related address adjustment. */
6300 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6301 && size < UNITS_PER_WORD)
6303 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6304 size_int (UNITS_PER_WORD - size));
6305 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6308 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6309 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6311 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6314 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6315 build_int_cst (TREE_TYPE (off), adjust));
6317 t = fold_convert (sizetype, t);
6318 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6322 /* type ha; // treat as "struct {ftype field[n];}"
6323 ... [computing offs]
6324 for (i = 0; i <nregs; ++i, offs += 16)
6325 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6328 tree tmp_ha, field_t, field_ptr_t;
6330 /* Declare a local variable. */
6331 tmp_ha = create_tmp_var_raw (type, "ha");
6332 gimple_add_tmp_var (tmp_ha);
6334 /* Establish the base type. */
6338 field_t = float_type_node;
6339 field_ptr_t = float_ptr_type_node;
6342 field_t = double_type_node;
6343 field_ptr_t = double_ptr_type_node;
6346 field_t = long_double_type_node;
6347 field_ptr_t = long_double_ptr_type_node;
6349 /* The half precision and quad precision are not fully supported yet. Enable
6350 the following code after the support is complete. Need to find the correct
6351 type node for __fp16 *. */
6354 field_t = float_type_node;
6355 field_ptr_t = float_ptr_type_node;
6361 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6362 field_t = build_vector_type_for_mode (innertype, ag_mode);
6363 field_ptr_t = build_pointer_type (field_t);
6370 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6371 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6373 t = fold_convert (field_ptr_t, addr);
6374 t = build2 (MODIFY_EXPR, field_t,
6375 build1 (INDIRECT_REF, field_t, tmp_ha),
6376 build1 (INDIRECT_REF, field_t, t));
6378 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6379 for (i = 1; i < nregs; ++i)
6381 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6382 u = fold_convert (field_ptr_t, addr);
6383 u = build2 (MODIFY_EXPR, field_t,
6384 build2 (MEM_REF, field_t, tmp_ha,
6385 build_int_cst (field_ptr_t,
6387 int_size_in_bytes (field_t)))),
6388 build1 (INDIRECT_REF, field_t, u));
6389 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6392 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6393 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6396 COND_EXPR_ELSE (cond2) = t;
6397 addr = fold_convert (build_pointer_type (type), cond1);
6398 addr = build_va_arg_indirect_ref (addr);
6401 addr = build_va_arg_indirect_ref (addr);
6406 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6409 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6410 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6413 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6414 CUMULATIVE_ARGS local_cum;
6415 int gr_saved, vr_saved;
6417 /* The caller has advanced CUM up to, but not beyond, the last named
6418 argument. Advance a local copy of CUM past the last "real" named
6419 argument, to find out how many registers are left over. */
6421 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6423 /* Found out how many registers we need to save. */
6424 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6425 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6427 if (TARGET_GENERAL_REGS_ONLY)
6429 if (local_cum.aapcs_nvrn > 0)
6430 sorry ("%qs and floating point or vector arguments",
6431 "-mgeneral-regs-only");
6441 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6442 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6443 - gr_saved * UNITS_PER_WORD);
6444 mem = gen_frame_mem (BLKmode, ptr);
6445 set_mem_alias_set (mem, get_varargs_alias_set ());
6447 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6452 /* We can't use move_block_from_reg, because it will use
6453 the wrong mode, storing D regs only. */
6454 enum machine_mode mode = TImode;
6457 /* Set OFF to the offset from virtual_incoming_args_rtx of
6458 the first vector register. The VR save area lies below
6459 the GR one, and is aligned to 16 bytes. */
6460 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6461 STACK_BOUNDARY / BITS_PER_UNIT);
6462 off -= vr_saved * UNITS_PER_VREG;
6464 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6468 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6469 mem = gen_frame_mem (mode, ptr);
6470 set_mem_alias_set (mem, get_varargs_alias_set ());
6471 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6472 off += UNITS_PER_VREG;
6477 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6478 any complication of having crtl->args.pretend_args_size changed. */
6479 cfun->machine->saved_varargs_size
6480 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6481 STACK_BOUNDARY / BITS_PER_UNIT)
6482 + vr_saved * UNITS_PER_VREG);
6486 aarch64_conditional_register_usage (void)
6491 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6494 call_used_regs[i] = 1;
6499 /* Walk down the type tree of TYPE counting consecutive base elements.
6500 If *MODEP is VOIDmode, then set it to the first valid floating point
6501 type. If a non-floating point type is found, or if a floating point
6502 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6503 otherwise return the count in the sub-tree. */
6505 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6507 enum machine_mode mode;
6510 switch (TREE_CODE (type))
6513 mode = TYPE_MODE (type);
6514 if (mode != DFmode && mode != SFmode && mode != TFmode)
6517 if (*modep == VOIDmode)
6526 mode = TYPE_MODE (TREE_TYPE (type));
6527 if (mode != DFmode && mode != SFmode && mode != TFmode)
6530 if (*modep == VOIDmode)
6539 /* Use V2SImode and V4SImode as representatives of all 64-bit
6540 and 128-bit vector types. */
6541 size = int_size_in_bytes (type);
6554 if (*modep == VOIDmode)
6557 /* Vector modes are considered to be opaque: two vectors are
6558 equivalent for the purposes of being homogeneous aggregates
6559 if they are the same size. */
6568 tree index = TYPE_DOMAIN (type);
6570 /* Can't handle incomplete types. */
6571 if (!COMPLETE_TYPE_P (type))
6574 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6577 || !TYPE_MAX_VALUE (index)
6578 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6579 || !TYPE_MIN_VALUE (index)
6580 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6584 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6585 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6587 /* There must be no padding. */
6588 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6589 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6590 != count * GET_MODE_BITSIZE (*modep)))
6602 /* Can't handle incomplete types. */
6603 if (!COMPLETE_TYPE_P (type))
6606 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6608 if (TREE_CODE (field) != FIELD_DECL)
6611 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6617 /* There must be no padding. */
6618 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6619 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6620 != count * GET_MODE_BITSIZE (*modep)))
6627 case QUAL_UNION_TYPE:
6629 /* These aren't very interesting except in a degenerate case. */
6634 /* Can't handle incomplete types. */
6635 if (!COMPLETE_TYPE_P (type))
6638 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6640 if (TREE_CODE (field) != FIELD_DECL)
6643 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6646 count = count > sub_count ? count : sub_count;
6649 /* There must be no padding. */
6650 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6651 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6652 != count * GET_MODE_BITSIZE (*modep)))
6665 /* Return true if we use LRA instead of reload pass. */
6667 aarch64_lra_p (void)
6669 return aarch64_lra_flag;
6672 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6673 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6674 array types. The C99 floating-point complex types are also considered
6675 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6676 types, which are GCC extensions and out of the scope of AAPCS64, are
6677 treated as composite types here as well.
6679 Note that MODE itself is not sufficient in determining whether a type
6680 is such a composite type or not. This is because
6681 stor-layout.c:compute_record_mode may have already changed the MODE
6682 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6683 structure with only one field may have its MODE set to the mode of the
6684 field. Also an integer mode whose size matches the size of the
6685 RECORD_TYPE type may be used to substitute the original mode
6686 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6687 solely relied on. */
6690 aarch64_composite_type_p (const_tree type,
6691 enum machine_mode mode)
6693 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6697 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6698 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6704 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6705 type as described in AAPCS64 \S 4.1.2.
6707 See the comment above aarch64_composite_type_p for the notes on MODE. */
6710 aarch64_short_vector_p (const_tree type,
6711 enum machine_mode mode)
6713 HOST_WIDE_INT size = -1;
6715 if (type && TREE_CODE (type) == VECTOR_TYPE)
6716 size = int_size_in_bytes (type);
6717 else if (!aarch64_composite_type_p (type, mode)
6718 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6719 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6720 size = GET_MODE_SIZE (mode);
6722 return (size == 8 || size == 16) ? true : false;
6725 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6726 shall be passed or returned in simd/fp register(s) (providing these
6727 parameter passing registers are available).
6729 Upon successful return, *COUNT returns the number of needed registers,
6730 *BASE_MODE returns the mode of the individual register and when IS_HAF
6731 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6732 floating-point aggregate or a homogeneous short-vector aggregate. */
6735 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6737 enum machine_mode *base_mode,
6741 enum machine_mode new_mode = VOIDmode;
6742 bool composite_p = aarch64_composite_type_p (type, mode);
6744 if (is_ha != NULL) *is_ha = false;
6746 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6747 || aarch64_short_vector_p (type, mode))
6752 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6754 if (is_ha != NULL) *is_ha = true;
6756 new_mode = GET_MODE_INNER (mode);
6758 else if (type && composite_p)
6760 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6762 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6764 if (is_ha != NULL) *is_ha = true;
6773 *base_mode = new_mode;
6777 /* Implement TARGET_STRUCT_VALUE_RTX. */
6780 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6781 int incoming ATTRIBUTE_UNUSED)
6783 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6786 /* Implements target hook vector_mode_supported_p. */
6788 aarch64_vector_mode_supported_p (enum machine_mode mode)
6791 && (mode == V4SImode || mode == V8HImode
6792 || mode == V16QImode || mode == V2DImode
6793 || mode == V2SImode || mode == V4HImode
6794 || mode == V8QImode || mode == V2SFmode
6795 || mode == V4SFmode || mode == V2DFmode
6796 || mode == V1DFmode))
6802 /* Return appropriate SIMD container
6803 for MODE within a vector of WIDTH bits. */
6804 static enum machine_mode
6805 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6807 gcc_assert (width == 64 || width == 128);
6846 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6847 static enum machine_mode
6848 aarch64_preferred_simd_mode (enum machine_mode mode)
6850 return aarch64_simd_container_mode (mode, 128);
6853 /* Return the bitmask of possible vector sizes for the vectorizer
6856 aarch64_autovectorize_vector_sizes (void)
6861 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6862 vector types in order to conform to the AAPCS64 (see "Procedure
6863 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6864 qualify for emission with the mangled names defined in that document,
6865 a vector type must not only be of the correct mode but also be
6866 composed of AdvSIMD vector element types (e.g.
6867 _builtin_aarch64_simd_qi); these types are registered by
6868 aarch64_init_simd_builtins (). In other words, vector types defined
6869 in other ways e.g. via vector_size attribute will get default
6873 enum machine_mode mode;
6874 const char *element_type_name;
6875 const char *mangled_name;
6876 } aarch64_simd_mangle_map_entry;
6878 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6879 /* 64-bit containerized types. */
6880 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6881 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6882 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6883 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6884 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6885 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6886 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6887 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6888 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6889 /* 128-bit containerized types. */
6890 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6891 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6892 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6893 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6894 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6895 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6896 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6897 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6898 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6899 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6900 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6901 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6902 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6903 { VOIDmode, NULL, NULL }
6906 /* Implement TARGET_MANGLE_TYPE. */
6909 aarch64_mangle_type (const_tree type)
6911 /* The AArch64 ABI documents say that "__va_list" has to be
6912 managled as if it is in the "std" namespace. */
6913 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6914 return "St9__va_list";
6916 /* Check the mode of the vector type, and the name of the vector
6917 element type, against the table. */
6918 if (TREE_CODE (type) == VECTOR_TYPE)
6920 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6922 while (pos->mode != VOIDmode)
6924 tree elt_type = TREE_TYPE (type);
6926 if (pos->mode == TYPE_MODE (type)
6927 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6928 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6929 pos->element_type_name))
6930 return pos->mangled_name;
6936 /* Use the default mangling. */
6941 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6947 is_memory_op (rtx mem_insn)
6949 rtx pattern = PATTERN (mem_insn);
6950 return for_each_rtx (&pattern, is_mem_p, NULL);
6953 /* Find the first rtx before insn that will generate an assembly
6957 aarch64_prev_real_insn (rtx insn)
6964 insn = prev_real_insn (insn);
6966 while (insn && recog_memoized (insn) < 0);
6972 is_madd_op (enum attr_type t1)
6975 /* A number of these may be AArch32 only. */
6976 enum attr_type mlatypes[] = {
6977 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
6978 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
6979 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
6982 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
6984 if (t1 == mlatypes[i])
6991 /* Check if there is a register dependency between a load and the insn
6992 for which we hold recog_data. */
6995 dep_between_memop_and_curr (rtx memop)
7003 if (!REG_P (SET_DEST (memop)))
7006 load_reg = SET_DEST (memop);
7007 for (opno = 0; opno < recog_data.n_operands; opno++)
7009 rtx operand = recog_data.operand[opno];
7011 && reg_overlap_mentioned_p (load_reg, operand))
7019 aarch64_madd_needs_nop (rtx insn)
7021 enum attr_type attr_type;
7025 if (!aarch64_fix_a53_err835769)
7028 if (recog_memoized (insn) < 0)
7031 attr_type = get_attr_type (insn);
7032 if (!is_madd_op (attr_type))
7035 prev = aarch64_prev_real_insn (insn);
7036 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
7037 Restore recog state to INSN to avoid state corruption. */
7038 extract_constrain_insn_cached (insn);
7043 body = single_set (prev);
7045 /* If the previous insn is a memory op and there is no dependency between
7046 it and the madd, emit a nop between them. If we know the previous insn is
7047 a memory op but body is NULL, emit the nop to be safe, it's probably a
7048 load/store pair insn. */
7049 if (is_memory_op (prev)
7050 && GET_MODE (recog_data.operand[0]) == DImode
7051 && (!dep_between_memop_and_curr (body)))
7059 aarch64_final_prescan_insn (rtx insn)
7061 if (aarch64_madd_needs_nop (insn))
7062 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7066 /* Return the equivalent letter for size. */
7068 sizetochar (int size)
7072 case 64: return 'd';
7073 case 32: return 's';
7074 case 16: return 'h';
7075 case 8 : return 'b';
7076 default: gcc_unreachable ();
7080 /* Return true iff x is a uniform vector of floating-point
7081 constants, and the constant can be represented in
7082 quarter-precision form. Note, as aarch64_float_const_representable
7083 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7085 aarch64_vect_float_const_representable_p (rtx x)
7088 REAL_VALUE_TYPE r0, ri;
7091 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7094 x0 = CONST_VECTOR_ELT (x, 0);
7095 if (!CONST_DOUBLE_P (x0))
7098 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7100 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7102 xi = CONST_VECTOR_ELT (x, i);
7103 if (!CONST_DOUBLE_P (xi))
7106 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7107 if (!REAL_VALUES_EQUAL (r0, ri))
7111 return aarch64_float_const_representable_p (x0);
7114 /* Return true for valid and false for invalid. */
7116 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7117 struct simd_immediate_info *info)
7119 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7121 for (i = 0; i < idx; i += (STRIDE)) \
7126 immtype = (CLASS); \
7127 elsize = (ELSIZE); \
7133 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7134 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7135 unsigned char bytes[16];
7136 int immtype = -1, matches;
7137 unsigned int invmask = inverse ? 0xff : 0;
7140 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7142 if (! (aarch64_simd_imm_zero_p (op, mode)
7143 || aarch64_vect_float_const_representable_p (op)))
7148 info->value = CONST_VECTOR_ELT (op, 0);
7149 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7157 /* Splat vector constant out into a byte vector. */
7158 for (i = 0; i < n_elts; i++)
7160 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7161 it must be laid out in the vector register in reverse order. */
7162 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7163 unsigned HOST_WIDE_INT elpart;
7164 unsigned int part, parts;
7166 if (GET_CODE (el) == CONST_INT)
7168 elpart = INTVAL (el);
7171 else if (GET_CODE (el) == CONST_DOUBLE)
7173 elpart = CONST_DOUBLE_LOW (el);
7179 for (part = 0; part < parts; part++)
7182 for (byte = 0; byte < innersize; byte++)
7184 bytes[idx++] = (elpart & 0xff) ^ invmask;
7185 elpart >>= BITS_PER_UNIT;
7187 if (GET_CODE (el) == CONST_DOUBLE)
7188 elpart = CONST_DOUBLE_HIGH (el);
7193 gcc_assert (idx == GET_MODE_SIZE (mode));
7197 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7198 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7200 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7201 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7203 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7204 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7206 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7207 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7209 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7211 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7213 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7214 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7216 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7217 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7219 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7220 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7222 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7223 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7225 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7227 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7229 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7230 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7232 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7233 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7235 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7236 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7238 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7239 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7241 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7243 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7244 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7253 info->element_width = elsize;
7254 info->mvn = emvn != 0;
7255 info->shift = eshift;
7257 unsigned HOST_WIDE_INT imm = 0;
7259 if (immtype >= 12 && immtype <= 15)
7262 /* Un-invert bytes of recognized vector, if necessary. */
7264 for (i = 0; i < idx; i++)
7265 bytes[i] ^= invmask;
7269 /* FIXME: Broken on 32-bit H_W_I hosts. */
7270 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7272 for (i = 0; i < 8; i++)
7273 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7274 << (i * BITS_PER_UNIT);
7277 info->value = GEN_INT (imm);
7281 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7282 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7284 /* Construct 'abcdefgh' because the assembler cannot handle
7285 generic constants. */
7288 imm = (imm >> info->shift) & 0xff;
7289 info->value = GEN_INT (imm);
7298 aarch64_const_vec_all_same_int_p (rtx x,
7299 HOST_WIDE_INT minval,
7300 HOST_WIDE_INT maxval)
7302 HOST_WIDE_INT firstval;
7305 if (GET_CODE (x) != CONST_VECTOR
7306 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7309 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7310 if (firstval < minval || firstval > maxval)
7313 count = CONST_VECTOR_NUNITS (x);
7314 for (i = 1; i < count; i++)
7315 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7321 /* Check of immediate shift constants are within range. */
7323 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7325 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7327 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7329 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7332 /* Return true if X is a uniform vector where all elements
7333 are either the floating-point constant 0.0 or the
7334 integer constant 0. */
7336 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7338 return x == CONST0_RTX (mode);
7342 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7344 HOST_WIDE_INT imm = INTVAL (x);
7347 for (i = 0; i < 8; i++)
7349 unsigned int byte = imm & 0xff;
7350 if (byte != 0xff && byte != 0)
7359 aarch64_mov_operand_p (rtx x,
7360 enum aarch64_symbol_context context,
7361 enum machine_mode mode)
7363 if (GET_CODE (x) == HIGH
7364 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7367 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7370 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7373 return aarch64_classify_symbolic_expression (x, context)
7374 == SYMBOL_TINY_ABSOLUTE;
7377 /* Return a const_int vector of VAL. */
7379 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7381 int nunits = GET_MODE_NUNITS (mode);
7382 rtvec v = rtvec_alloc (nunits);
7385 for (i=0; i < nunits; i++)
7386 RTVEC_ELT (v, i) = GEN_INT (val);
7388 return gen_rtx_CONST_VECTOR (mode, v);
7391 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7394 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7396 enum machine_mode vmode;
7398 gcc_assert (!VECTOR_MODE_P (mode));
7399 vmode = aarch64_preferred_simd_mode (mode);
7400 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7401 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7404 /* Construct and return a PARALLEL RTX vector. */
7406 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7408 int nunits = GET_MODE_NUNITS (mode);
7409 rtvec v = rtvec_alloc (nunits / 2);
7410 int base = high ? nunits / 2 : 0;
7414 for (i=0; i < nunits / 2; i++)
7415 RTVEC_ELT (v, i) = GEN_INT (base + i);
7417 t1 = gen_rtx_PARALLEL (mode, v);
7421 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7422 HIGH (exclusive). */
7424 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7427 gcc_assert (GET_CODE (operand) == CONST_INT);
7428 lane = INTVAL (operand);
7430 if (lane < low || lane >= high)
7431 error ("lane out of range");
7435 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7437 gcc_assert (GET_CODE (operand) == CONST_INT);
7438 HOST_WIDE_INT lane = INTVAL (operand);
7440 if (lane < low || lane >= high)
7441 error ("constant out of range");
7444 /* Emit code to reinterpret one AdvSIMD type as another,
7445 without altering bits. */
7447 aarch64_simd_reinterpret (rtx dest, rtx src)
7449 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7452 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7455 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7456 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7459 rtx mem = gen_rtx_MEM (mode, destaddr);
7460 rtx tmp1 = gen_reg_rtx (mode);
7461 rtx tmp2 = gen_reg_rtx (mode);
7463 emit_insn (intfn (tmp1, op1, tmp2));
7465 emit_move_insn (mem, tmp1);
7466 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7467 emit_move_insn (mem, tmp2);
7470 /* Return TRUE if OP is a valid vector addressing mode. */
7472 aarch64_simd_mem_operand_p (rtx op)
7474 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7475 || GET_CODE (XEXP (op, 0)) == REG);
7478 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7479 not to early-clobber SRC registers in the process.
7481 We assume that the operands described by SRC and DEST represent a
7482 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7483 number of components into which the copy has been decomposed. */
7485 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7486 rtx *src, unsigned int count)
7490 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7491 || REGNO (operands[0]) < REGNO (operands[1]))
7493 for (i = 0; i < count; i++)
7495 operands[2 * i] = dest[i];
7496 operands[2 * i + 1] = src[i];
7501 for (i = 0; i < count; i++)
7503 operands[2 * i] = dest[count - i - 1];
7504 operands[2 * i + 1] = src[count - i - 1];
7509 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7510 one of VSTRUCT modes: OI, CI or XI. */
7512 aarch64_simd_attr_length_move (rtx insn)
7514 enum machine_mode mode;
7516 extract_insn_cached (insn);
7518 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7520 mode = GET_MODE (recog_data.operand[0]);
7536 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7537 alignment of a vector to 128 bits. */
7538 static HOST_WIDE_INT
7539 aarch64_simd_vector_alignment (const_tree type)
7541 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7542 return MIN (align, 128);
7545 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7547 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7552 /* We guarantee alignment for vectors up to 128-bits. */
7553 if (tree_int_cst_compare (TYPE_SIZE (type),
7554 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7557 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7561 /* If VALS is a vector constant that can be loaded into a register
7562 using DUP, generate instructions to do so and return an RTX to
7563 assign to the register. Otherwise return NULL_RTX. */
7565 aarch64_simd_dup_constant (rtx vals)
7567 enum machine_mode mode = GET_MODE (vals);
7568 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7569 int n_elts = GET_MODE_NUNITS (mode);
7570 bool all_same = true;
7574 if (GET_CODE (vals) != CONST_VECTOR)
7577 for (i = 1; i < n_elts; ++i)
7579 x = CONST_VECTOR_ELT (vals, i);
7580 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7587 /* We can load this constant by using DUP and a constant in a
7588 single ARM register. This will be cheaper than a vector
7590 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7591 return gen_rtx_VEC_DUPLICATE (mode, x);
7595 /* Generate code to load VALS, which is a PARALLEL containing only
7596 constants (for vec_init) or CONST_VECTOR, efficiently into a
7597 register. Returns an RTX to copy into the register, or NULL_RTX
7598 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7600 aarch64_simd_make_constant (rtx vals)
7602 enum machine_mode mode = GET_MODE (vals);
7604 rtx const_vec = NULL_RTX;
7605 int n_elts = GET_MODE_NUNITS (mode);
7609 if (GET_CODE (vals) == CONST_VECTOR)
7611 else if (GET_CODE (vals) == PARALLEL)
7613 /* A CONST_VECTOR must contain only CONST_INTs and
7614 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7615 Only store valid constants in a CONST_VECTOR. */
7616 for (i = 0; i < n_elts; ++i)
7618 rtx x = XVECEXP (vals, 0, i);
7619 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7622 if (n_const == n_elts)
7623 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7628 if (const_vec != NULL_RTX
7629 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7630 /* Load using MOVI/MVNI. */
7632 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7633 /* Loaded using DUP. */
7635 else if (const_vec != NULL_RTX)
7636 /* Load from constant pool. We can not take advantage of single-cycle
7637 LD1 because we need a PC-relative addressing mode. */
7640 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7641 We can not construct an initializer. */
7646 aarch64_expand_vector_init (rtx target, rtx vals)
7648 enum machine_mode mode = GET_MODE (target);
7649 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7650 int n_elts = GET_MODE_NUNITS (mode);
7651 int n_var = 0, one_var = -1;
7652 bool all_same = true;
7656 x = XVECEXP (vals, 0, 0);
7657 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7658 n_var = 1, one_var = 0;
7660 for (i = 1; i < n_elts; ++i)
7662 x = XVECEXP (vals, 0, i);
7663 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7664 ++n_var, one_var = i;
7666 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7672 rtx constant = aarch64_simd_make_constant (vals);
7673 if (constant != NULL_RTX)
7675 emit_move_insn (target, constant);
7680 /* Splat a single non-constant element if we can. */
7683 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7684 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7688 /* One field is non-constant. Load constant then overwrite varying
7689 field. This is more efficient than using the stack. */
7692 rtx copy = copy_rtx (vals);
7693 rtx index = GEN_INT (one_var);
7694 enum insn_code icode;
7696 /* Load constant part of vector, substitute neighboring value for
7698 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7699 aarch64_expand_vector_init (target, copy);
7701 /* Insert variable. */
7702 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7703 icode = optab_handler (vec_set_optab, mode);
7704 gcc_assert (icode != CODE_FOR_nothing);
7705 emit_insn (GEN_FCN (icode) (target, x, index));
7709 /* Construct the vector in memory one field at a time
7710 and load the whole vector. */
7711 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7712 for (i = 0; i < n_elts; i++)
7713 emit_move_insn (adjust_address_nv (mem, inner_mode,
7714 i * GET_MODE_SIZE (inner_mode)),
7715 XVECEXP (vals, 0, i));
7716 emit_move_insn (target, mem);
7720 static unsigned HOST_WIDE_INT
7721 aarch64_shift_truncation_mask (enum machine_mode mode)
7724 (aarch64_vector_mode_supported_p (mode)
7725 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7728 #ifndef TLS_SECTION_ASM_FLAG
7729 #define TLS_SECTION_ASM_FLAG 'T'
7733 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7734 tree decl ATTRIBUTE_UNUSED)
7736 char flagchars[10], *f = flagchars;
7738 /* If we have already declared this section, we can use an
7739 abbreviated form to switch back to it -- unless this section is
7740 part of a COMDAT groups, in which case GAS requires the full
7741 declaration every time. */
7742 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7743 && (flags & SECTION_DECLARED))
7745 fprintf (asm_out_file, "\t.section\t%s\n", name);
7749 if (!(flags & SECTION_DEBUG))
7751 if (flags & SECTION_WRITE)
7753 if (flags & SECTION_CODE)
7755 if (flags & SECTION_SMALL)
7757 if (flags & SECTION_MERGE)
7759 if (flags & SECTION_STRINGS)
7761 if (flags & SECTION_TLS)
7762 *f++ = TLS_SECTION_ASM_FLAG;
7763 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7767 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7769 if (!(flags & SECTION_NOTYPE))
7774 if (flags & SECTION_BSS)
7779 #ifdef TYPE_OPERAND_FMT
7780 format = "," TYPE_OPERAND_FMT;
7785 fprintf (asm_out_file, format, type);
7787 if (flags & SECTION_ENTSIZE)
7788 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7789 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7791 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7792 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7794 fprintf (asm_out_file, ",%s,comdat",
7795 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7799 putc ('\n', asm_out_file);
7802 /* Select a format to encode pointers in exception handling data. */
7804 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7807 switch (aarch64_cmodel)
7809 case AARCH64_CMODEL_TINY:
7810 case AARCH64_CMODEL_TINY_PIC:
7811 case AARCH64_CMODEL_SMALL:
7812 case AARCH64_CMODEL_SMALL_PIC:
7813 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7815 type = DW_EH_PE_sdata4;
7818 /* No assumptions here. 8-byte relocs required. */
7819 type = DW_EH_PE_sdata8;
7822 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7825 /* Emit load exclusive. */
7828 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7829 rtx mem, rtx model_rtx)
7831 rtx (*gen) (rtx, rtx, rtx);
7835 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7836 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7837 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7838 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7843 emit_insn (gen (rval, mem, model_rtx));
7846 /* Emit store exclusive. */
7849 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7850 rtx rval, rtx mem, rtx model_rtx)
7852 rtx (*gen) (rtx, rtx, rtx, rtx);
7856 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7857 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7858 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7859 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7864 emit_insn (gen (bval, rval, mem, model_rtx));
7867 /* Mark the previous jump instruction as unlikely. */
7870 aarch64_emit_unlikely_jump (rtx insn)
7872 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7874 insn = emit_jump_insn (insn);
7875 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7878 /* Expand a compare and swap pattern. */
7881 aarch64_expand_compare_and_swap (rtx operands[])
7883 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7884 enum machine_mode mode, cmp_mode;
7885 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7890 oldval = operands[3];
7891 newval = operands[4];
7892 is_weak = operands[5];
7893 mod_s = operands[6];
7894 mod_f = operands[7];
7895 mode = GET_MODE (mem);
7898 /* Normally the succ memory model must be stronger than fail, but in the
7899 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7900 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7902 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7903 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7904 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7910 /* For short modes, we're going to perform the comparison in SImode,
7911 so do the zero-extension now. */
7913 rval = gen_reg_rtx (SImode);
7914 oldval = convert_modes (SImode, mode, oldval, true);
7919 /* Force the value into a register if needed. */
7920 if (!aarch64_plus_operand (oldval, mode))
7921 oldval = force_reg (cmp_mode, oldval);
7930 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7931 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7932 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7933 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7938 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7940 if (mode == QImode || mode == HImode)
7941 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7943 x = gen_rtx_REG (CCmode, CC_REGNUM);
7944 x = gen_rtx_EQ (SImode, x, const0_rtx);
7945 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7948 /* Split a compare and swap pattern. */
7951 aarch64_split_compare_and_swap (rtx operands[])
7953 rtx rval, mem, oldval, newval, scratch;
7954 enum machine_mode mode;
7956 rtx label1, label2, x, cond;
7960 oldval = operands[2];
7961 newval = operands[3];
7962 is_weak = (operands[4] != const0_rtx);
7963 scratch = operands[7];
7964 mode = GET_MODE (mem);
7969 label1 = gen_label_rtx ();
7970 emit_label (label1);
7972 label2 = gen_label_rtx ();
7974 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7976 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7977 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7978 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7979 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7980 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7982 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7986 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7987 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7988 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7989 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7993 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7994 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7995 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7998 emit_label (label2);
8001 /* Split an atomic operation. */
8004 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8005 rtx value, rtx model_rtx, rtx cond)
8007 enum machine_mode mode = GET_MODE (mem);
8008 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8011 label = gen_label_rtx ();
8015 new_out = gen_lowpart (wmode, new_out);
8017 old_out = gen_lowpart (wmode, old_out);
8020 value = simplify_gen_subreg (wmode, value, mode, 0);
8022 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8031 x = gen_rtx_AND (wmode, old_out, value);
8032 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8033 x = gen_rtx_NOT (wmode, new_out);
8034 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8038 if (CONST_INT_P (value))
8040 value = GEN_INT (-INTVAL (value));
8046 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8047 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8051 aarch64_emit_store_exclusive (mode, cond, mem,
8052 gen_lowpart (mode, new_out), model_rtx);
8054 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8055 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8056 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8057 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8061 aarch64_print_extension (void)
8063 const struct aarch64_option_extension *opt = NULL;
8065 for (opt = all_extensions; opt->name != NULL; opt++)
8066 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8067 asm_fprintf (asm_out_file, "+%s", opt->name);
8069 asm_fprintf (asm_out_file, "\n");
8073 aarch64_start_file (void)
8077 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8078 aarch64_print_extension ();
8080 else if (selected_cpu)
8082 const char *truncated_name
8083 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8084 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8085 aarch64_print_extension ();
8087 default_file_start();
8090 /* Target hook for c_mode_for_suffix. */
8091 static enum machine_mode
8092 aarch64_c_mode_for_suffix (char suffix)
8100 /* We can only represent floating point constants which will fit in
8101 "quarter-precision" values. These values are characterised by
8102 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8105 (-1)^s * (n/16) * 2^r
8108 's' is the sign bit.
8109 'n' is an integer in the range 16 <= n <= 31.
8110 'r' is an integer in the range -3 <= r <= 4. */
8112 /* Return true iff X can be represented by a quarter-precision
8113 floating point immediate operand X. Note, we cannot represent 0.0. */
8115 aarch64_float_const_representable_p (rtx x)
8117 /* This represents our current view of how many bits
8118 make up the mantissa. */
8119 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8121 unsigned HOST_WIDE_INT mantissa, mask;
8122 HOST_WIDE_INT m1, m2;
8123 REAL_VALUE_TYPE r, m;
8125 if (!CONST_DOUBLE_P (x))
8128 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8130 /* We cannot represent infinities, NaNs or +/-zero. We won't
8131 know if we have +zero until we analyse the mantissa, but we
8132 can reject the other invalid values. */
8133 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8134 || REAL_VALUE_MINUS_ZERO (r))
8137 /* Extract exponent. */
8138 r = real_value_abs (&r);
8139 exponent = REAL_EXP (&r);
8141 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8142 highest (sign) bit, with a fixed binary point at bit point_pos.
8143 m1 holds the low part of the mantissa, m2 the high part.
8144 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8145 bits for the mantissa, this can fail (low bits will be lost). */
8146 real_ldexp (&m, &r, point_pos - exponent);
8147 REAL_VALUE_TO_INT (&m1, &m2, m);
8149 /* If the low part of the mantissa has bits set we cannot represent
8153 /* We have rejected the lower HOST_WIDE_INT, so update our
8154 understanding of how many bits lie in the mantissa and
8155 look only at the high HOST_WIDE_INT. */
8157 point_pos -= HOST_BITS_PER_WIDE_INT;
8159 /* We can only represent values with a mantissa of the form 1.xxxx. */
8160 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8161 if ((mantissa & mask) != 0)
8164 /* Having filtered unrepresentable values, we may now remove all
8165 but the highest 5 bits. */
8166 mantissa >>= point_pos - 5;
8168 /* We cannot represent the value 0.0, so reject it. This is handled
8173 /* Then, as bit 4 is always set, we can mask it off, leaving
8174 the mantissa in the range [0, 15]. */
8175 mantissa &= ~(1 << 4);
8176 gcc_assert (mantissa <= 15);
8178 /* GCC internally does not use IEEE754-like encoding (where normalized
8179 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8180 Our mantissa values are shifted 4 places to the left relative to
8181 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8182 by 5 places to correct for GCC's representation. */
8183 exponent = 5 - exponent;
8185 return (exponent >= 0 && exponent <= 7);
8189 aarch64_output_simd_mov_immediate (rtx const_vector,
8190 enum machine_mode mode,
8194 static char templ[40];
8195 const char *mnemonic;
8196 const char *shift_op;
8197 unsigned int lane_count = 0;
8200 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8202 /* This will return true to show const_vector is legal for use as either
8203 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8204 also update INFO to show how the immediate should be generated. */
8205 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8206 gcc_assert (is_valid);
8208 element_char = sizetochar (info.element_width);
8209 lane_count = width / info.element_width;
8211 mode = GET_MODE_INNER (mode);
8212 if (mode == SFmode || mode == DFmode)
8214 gcc_assert (info.shift == 0 && ! info.mvn);
8215 if (aarch64_float_const_zero_rtx_p (info.value))
8216 info.value = GEN_INT (0);
8221 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8222 char float_buf[buf_size] = {'\0'};
8223 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8226 if (lane_count == 1)
8227 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8229 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8230 lane_count, element_char, float_buf);
8235 mnemonic = info.mvn ? "mvni" : "movi";
8236 shift_op = info.msl ? "msl" : "lsl";
8238 if (lane_count == 1)
8239 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8240 mnemonic, UINTVAL (info.value));
8241 else if (info.shift)
8242 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8243 ", %s %d", mnemonic, lane_count, element_char,
8244 UINTVAL (info.value), shift_op, info.shift);
8246 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8247 mnemonic, lane_count, element_char, UINTVAL (info.value));
8252 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8253 enum machine_mode mode)
8255 enum machine_mode vmode;
8257 gcc_assert (!VECTOR_MODE_P (mode));
8258 vmode = aarch64_simd_container_mode (mode, 64);
8259 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8260 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8263 /* Split operands into moves from op[1] + op[2] into op[0]. */
8266 aarch64_split_combinev16qi (rtx operands[3])
8268 unsigned int dest = REGNO (operands[0]);
8269 unsigned int src1 = REGNO (operands[1]);
8270 unsigned int src2 = REGNO (operands[2]);
8271 enum machine_mode halfmode = GET_MODE (operands[1]);
8272 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8275 gcc_assert (halfmode == V16QImode);
8277 if (src1 == dest && src2 == dest + halfregs)
8279 /* No-op move. Can't split to nothing; emit something. */
8280 emit_note (NOTE_INSN_DELETED);
8284 /* Preserve register attributes for variable tracking. */
8285 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8286 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8287 GET_MODE_SIZE (halfmode));
8289 /* Special case of reversed high/low parts. */
8290 if (reg_overlap_mentioned_p (operands[2], destlo)
8291 && reg_overlap_mentioned_p (operands[1], desthi))
8293 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8294 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8295 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8297 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8299 /* Try to avoid unnecessary moves if part of the result
8300 is in the right place already. */
8302 emit_move_insn (destlo, operands[1]);
8303 if (src2 != dest + halfregs)
8304 emit_move_insn (desthi, operands[2]);
8308 if (src2 != dest + halfregs)
8309 emit_move_insn (desthi, operands[2]);
8311 emit_move_insn (destlo, operands[1]);
8315 /* vec_perm support. */
8317 #define MAX_VECT_LEN 16
8319 struct expand_vec_perm_d
8321 rtx target, op0, op1;
8322 unsigned char perm[MAX_VECT_LEN];
8323 enum machine_mode vmode;
8329 /* Generate a variable permutation. */
8332 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8334 enum machine_mode vmode = GET_MODE (target);
8335 bool one_vector_p = rtx_equal_p (op0, op1);
8337 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8338 gcc_checking_assert (GET_MODE (op0) == vmode);
8339 gcc_checking_assert (GET_MODE (op1) == vmode);
8340 gcc_checking_assert (GET_MODE (sel) == vmode);
8341 gcc_checking_assert (TARGET_SIMD);
8345 if (vmode == V8QImode)
8347 /* Expand the argument to a V16QI mode by duplicating it. */
8348 rtx pair = gen_reg_rtx (V16QImode);
8349 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8350 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8354 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8361 if (vmode == V8QImode)
8363 pair = gen_reg_rtx (V16QImode);
8364 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8365 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8369 pair = gen_reg_rtx (OImode);
8370 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8371 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8377 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8379 enum machine_mode vmode = GET_MODE (target);
8380 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8381 bool one_vector_p = rtx_equal_p (op0, op1);
8382 rtx rmask[MAX_VECT_LEN], mask;
8384 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8386 /* The TBL instruction does not use a modulo index, so we must take care
8387 of that ourselves. */
8388 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8389 for (i = 0; i < nelt; ++i)
8391 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8392 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8394 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8397 /* Recognize patterns suitable for the TRN instructions. */
8399 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8401 unsigned int i, odd, mask, nelt = d->nelt;
8402 rtx out, in0, in1, x;
8403 rtx (*gen) (rtx, rtx, rtx);
8404 enum machine_mode vmode = d->vmode;
8406 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8409 /* Note that these are little-endian tests.
8410 We correct for big-endian later. */
8411 if (d->perm[0] == 0)
8413 else if (d->perm[0] == 1)
8417 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8419 for (i = 0; i < nelt; i += 2)
8421 if (d->perm[i] != i + odd)
8423 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8433 if (BYTES_BIG_ENDIAN)
8435 x = in0, in0 = in1, in1 = x;
8444 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8445 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8446 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8447 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8448 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8449 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8450 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8451 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8452 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8453 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8462 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8463 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8464 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8465 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8466 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8467 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8468 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8469 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8470 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8471 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8477 emit_insn (gen (out, in0, in1));
8481 /* Recognize patterns suitable for the UZP instructions. */
8483 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8485 unsigned int i, odd, mask, nelt = d->nelt;
8486 rtx out, in0, in1, x;
8487 rtx (*gen) (rtx, rtx, rtx);
8488 enum machine_mode vmode = d->vmode;
8490 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8493 /* Note that these are little-endian tests.
8494 We correct for big-endian later. */
8495 if (d->perm[0] == 0)
8497 else if (d->perm[0] == 1)
8501 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8503 for (i = 0; i < nelt; i++)
8505 unsigned elt = (i * 2 + odd) & mask;
8506 if (d->perm[i] != elt)
8516 if (BYTES_BIG_ENDIAN)
8518 x = in0, in0 = in1, in1 = x;
8527 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8528 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8529 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8530 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8531 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8532 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8533 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8534 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8535 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8536 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8545 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8546 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8547 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8548 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8549 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8550 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8551 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8552 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8553 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8554 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8560 emit_insn (gen (out, in0, in1));
8564 /* Recognize patterns suitable for the ZIP instructions. */
8566 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8568 unsigned int i, high, mask, nelt = d->nelt;
8569 rtx out, in0, in1, x;
8570 rtx (*gen) (rtx, rtx, rtx);
8571 enum machine_mode vmode = d->vmode;
8573 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8576 /* Note that these are little-endian tests.
8577 We correct for big-endian later. */
8579 if (d->perm[0] == high)
8582 else if (d->perm[0] == 0)
8586 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8588 for (i = 0; i < nelt / 2; i++)
8590 unsigned elt = (i + high) & mask;
8591 if (d->perm[i * 2] != elt)
8593 elt = (elt + nelt) & mask;
8594 if (d->perm[i * 2 + 1] != elt)
8604 if (BYTES_BIG_ENDIAN)
8606 x = in0, in0 = in1, in1 = x;
8615 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8616 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8617 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8618 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8619 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8620 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8621 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8622 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8623 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8624 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8633 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8634 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8635 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8636 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8637 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8638 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8639 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8640 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8641 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8642 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8648 emit_insn (gen (out, in0, in1));
8653 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8655 rtx (*gen) (rtx, rtx, rtx);
8656 rtx out = d->target;
8658 enum machine_mode vmode = d->vmode;
8659 unsigned int i, elt, nelt = d->nelt;
8662 /* TODO: This may not be big-endian safe. */
8663 if (BYTES_BIG_ENDIAN)
8667 for (i = 1; i < nelt; i++)
8669 if (elt != d->perm[i])
8673 /* The generic preparation in aarch64_expand_vec_perm_const_1
8674 swaps the operand order and the permute indices if it finds
8675 d->perm[0] to be in the second operand. Thus, we can always
8676 use d->op0 and need not do any extra arithmetic to get the
8677 correct lane number. */
8679 lane = GEN_INT (elt);
8683 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8684 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8685 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8686 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8687 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8688 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8689 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8690 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8691 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8692 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8697 emit_insn (gen (out, in0, lane));
8702 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8704 rtx rperm[MAX_VECT_LEN], sel;
8705 enum machine_mode vmode = d->vmode;
8706 unsigned int i, nelt = d->nelt;
8708 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8709 numbering of elements for big-endian, we must reverse the order. */
8710 if (BYTES_BIG_ENDIAN)
8716 /* Generic code will try constant permutation twice. Once with the
8717 original mode and again with the elements lowered to QImode.
8718 So wait and don't do the selector expansion ourselves. */
8719 if (vmode != V8QImode && vmode != V16QImode)
8722 for (i = 0; i < nelt; ++i)
8723 rperm[i] = GEN_INT (d->perm[i]);
8724 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8725 sel = force_reg (vmode, sel);
8727 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8732 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8734 /* The pattern matching functions above are written to look for a small
8735 number to begin the sequence (0, 1, N/2). If we begin with an index
8736 from the second operand, we can swap the operands. */
8737 if (d->perm[0] >= d->nelt)
8739 unsigned i, nelt = d->nelt;
8742 for (i = 0; i < nelt; ++i)
8743 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8752 if (aarch64_evpc_zip (d))
8754 else if (aarch64_evpc_uzp (d))
8756 else if (aarch64_evpc_trn (d))
8758 else if (aarch64_evpc_dup (d))
8760 return aarch64_evpc_tbl (d);
8765 /* Expand a vec_perm_const pattern. */
8768 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8770 struct expand_vec_perm_d d;
8777 d.vmode = GET_MODE (target);
8778 gcc_assert (VECTOR_MODE_P (d.vmode));
8779 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8780 d.testing_p = false;
8782 for (i = which = 0; i < nelt; ++i)
8784 rtx e = XVECEXP (sel, 0, i);
8785 int ei = INTVAL (e) & (2 * nelt - 1);
8786 which |= (ei < nelt ? 1 : 2);
8796 d.one_vector_p = false;
8797 if (!rtx_equal_p (op0, op1))
8800 /* The elements of PERM do not suggest that only the first operand
8801 is used, but both operands are identical. Allow easier matching
8802 of the permutation by folding the permutation into the single
8806 for (i = 0; i < nelt; ++i)
8807 d.perm[i] &= nelt - 1;
8809 d.one_vector_p = true;
8814 d.one_vector_p = true;
8818 return aarch64_expand_vec_perm_const_1 (&d);
8822 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8823 const unsigned char *sel)
8825 struct expand_vec_perm_d d;
8826 unsigned int i, nelt, which;
8830 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8832 memcpy (d.perm, sel, nelt);
8834 /* Calculate whether all elements are in one vector. */
8835 for (i = which = 0; i < nelt; ++i)
8837 unsigned char e = d.perm[i];
8838 gcc_assert (e < 2 * nelt);
8839 which |= (e < nelt ? 1 : 2);
8842 /* If all elements are from the second vector, reindex as if from the
8845 for (i = 0; i < nelt; ++i)
8848 /* Check whether the mask can be applied to a single vector. */
8849 d.one_vector_p = (which != 3);
8851 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8852 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8853 if (!d.one_vector_p)
8854 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8857 ret = aarch64_expand_vec_perm_const_1 (&d);
8863 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8865 aarch64_cannot_change_mode_class (enum machine_mode from,
8866 enum machine_mode to,
8867 enum reg_class rclass)
8869 /* Full-reg subregs are allowed on general regs or any class if they are
8871 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8872 || !reg_classes_intersect_p (FP_REGS, rclass))
8875 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8876 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8877 2. Scalar to Scalar for integer modes or same size float modes.
8878 3. Vector to Vector modes. */
8879 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8881 if (aarch64_vector_mode_supported_p (from)
8882 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8885 if (GET_MODE_NUNITS (from) == 1
8886 && GET_MODE_NUNITS (to) == 1
8887 && (GET_MODE_CLASS (from) == MODE_INT
8891 if (aarch64_vector_mode_supported_p (from)
8892 && aarch64_vector_mode_supported_p (to))
8899 #undef TARGET_ADDRESS_COST
8900 #define TARGET_ADDRESS_COST aarch64_address_cost
8902 /* This hook will determines whether unnamed bitfields affect the alignment
8903 of the containing structure. The hook returns true if the structure
8904 should inherit the alignment requirements of an unnamed bitfield's
8906 #undef TARGET_ALIGN_ANON_BITFIELD
8907 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8909 #undef TARGET_ASM_ALIGNED_DI_OP
8910 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8912 #undef TARGET_ASM_ALIGNED_HI_OP
8913 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8915 #undef TARGET_ASM_ALIGNED_SI_OP
8916 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8918 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8919 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8920 hook_bool_const_tree_hwi_hwi_const_tree_true
8922 #undef TARGET_ASM_FILE_START
8923 #define TARGET_ASM_FILE_START aarch64_start_file
8925 #undef TARGET_ASM_OUTPUT_MI_THUNK
8926 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8928 #undef TARGET_ASM_SELECT_RTX_SECTION
8929 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8931 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8932 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8934 #undef TARGET_BUILD_BUILTIN_VA_LIST
8935 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8937 #undef TARGET_CALLEE_COPIES
8938 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8940 #undef TARGET_CAN_ELIMINATE
8941 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8943 #undef TARGET_CANNOT_FORCE_CONST_MEM
8944 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8946 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8947 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8949 /* Only the least significant bit is used for initialization guard
8951 #undef TARGET_CXX_GUARD_MASK_BIT
8952 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8954 #undef TARGET_C_MODE_FOR_SUFFIX
8955 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8957 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8958 #undef TARGET_DEFAULT_TARGET_FLAGS
8959 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8962 #undef TARGET_CLASS_MAX_NREGS
8963 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8965 #undef TARGET_BUILTIN_DECL
8966 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8968 #undef TARGET_EXPAND_BUILTIN
8969 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8971 #undef TARGET_EXPAND_BUILTIN_VA_START
8972 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8974 #undef TARGET_FOLD_BUILTIN
8975 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8977 #undef TARGET_FUNCTION_ARG
8978 #define TARGET_FUNCTION_ARG aarch64_function_arg
8980 #undef TARGET_FUNCTION_ARG_ADVANCE
8981 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8983 #undef TARGET_FUNCTION_ARG_BOUNDARY
8984 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8987 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8989 #undef TARGET_FUNCTION_VALUE
8990 #define TARGET_FUNCTION_VALUE aarch64_function_value
8992 #undef TARGET_FUNCTION_VALUE_REGNO_P
8993 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8995 #undef TARGET_FRAME_POINTER_REQUIRED
8996 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8998 #undef TARGET_GIMPLE_FOLD_BUILTIN
8999 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9001 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9002 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9004 #undef TARGET_INIT_BUILTINS
9005 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9007 #undef TARGET_LEGITIMATE_ADDRESS_P
9008 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9010 #undef TARGET_LEGITIMATE_CONSTANT_P
9011 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9013 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9014 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9017 #define TARGET_LRA_P aarch64_lra_p
9019 #undef TARGET_MANGLE_TYPE
9020 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9022 #undef TARGET_MEMORY_MOVE_COST
9023 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9025 #undef TARGET_MUST_PASS_IN_STACK
9026 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9028 /* This target hook should return true if accesses to volatile bitfields
9029 should use the narrowest mode possible. It should return false if these
9030 accesses should use the bitfield container type. */
9031 #undef TARGET_NARROW_VOLATILE_BITFIELD
9032 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9034 #undef TARGET_OPTION_OVERRIDE
9035 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9037 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9038 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9039 aarch64_override_options_after_change
9041 #undef TARGET_PASS_BY_REFERENCE
9042 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9044 #undef TARGET_PREFERRED_RELOAD_CLASS
9045 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9047 #undef TARGET_SECONDARY_RELOAD
9048 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9050 #undef TARGET_SHIFT_TRUNCATION_MASK
9051 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9053 #undef TARGET_SETUP_INCOMING_VARARGS
9054 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9056 #undef TARGET_STRUCT_VALUE_RTX
9057 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9059 #undef TARGET_REGISTER_MOVE_COST
9060 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9062 #undef TARGET_RETURN_IN_MEMORY
9063 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9065 #undef TARGET_RETURN_IN_MSB
9066 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9068 #undef TARGET_RTX_COSTS
9069 #define TARGET_RTX_COSTS aarch64_rtx_costs
9071 #undef TARGET_SCHED_ISSUE_RATE
9072 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9074 #undef TARGET_TRAMPOLINE_INIT
9075 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9077 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9078 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9080 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9081 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9083 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9084 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9086 #undef TARGET_VECTORIZE_ADD_STMT_COST
9087 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9089 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9090 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9091 aarch64_builtin_vectorization_cost
9093 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9094 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9096 #undef TARGET_VECTORIZE_BUILTINS
9097 #define TARGET_VECTORIZE_BUILTINS
9099 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9100 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9101 aarch64_builtin_vectorized_function
9103 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9104 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9105 aarch64_autovectorize_vector_sizes
9107 /* Section anchor support. */
9109 #undef TARGET_MIN_ANCHOR_OFFSET
9110 #define TARGET_MIN_ANCHOR_OFFSET -256
9112 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9113 byte offset; we can do much more for larger data types, but have no way
9114 to determine the size of the access. We assume accesses are aligned. */
9115 #undef TARGET_MAX_ANCHOR_OFFSET
9116 #define TARGET_MAX_ANCHOR_OFFSET 4095
9118 #undef TARGET_VECTOR_ALIGNMENT
9119 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9121 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9122 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9123 aarch64_simd_vector_alignment_reachable
9125 /* vec_perm support. */
9127 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9128 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9129 aarch64_vectorize_vec_perm_const_ok
9132 #undef TARGET_FIXED_CONDITION_CODE_REGS
9133 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9135 #undef TARGET_FLAGS_REGNUM
9136 #define TARGET_FLAGS_REGNUM CC_REGNUM
9138 struct gcc_target targetm = TARGET_INITIALIZER;
9140 #include "gt-aarch64.h"