1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
85 void (*arm_lang_output_object_attributes_hook)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
193 static void arm_function_arg_advance (cumulative_args_t,
194 const function_arg_info &);
195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 static rtx aapcs_libcall_value (machine_mode);
200 static int aapcs_select_return_coproc (const_tree, const_tree);
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_encode_section_info (tree, rtx, int);
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree, tree *);
214 static void arm_setup_incoming_varargs (cumulative_args_t,
215 const function_arg_info &, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t,
217 const function_arg_info &);
218 static bool arm_promote_prototypes (const_tree);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree);
222 static bool arm_must_pass_in_stack (const function_arg_info &);
223 static bool arm_return_in_memory (const_tree, const_tree);
225 static void arm_unwind_emit (FILE *, rtx_insn *);
226 static bool arm_output_ttype (rtx);
227 static void arm_asm_emit_except_personality (rtx);
229 static void arm_asm_init_sections (void);
230 static rtx arm_dwarf_register_span (rtx);
232 static tree arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree arm_get_cookie_size (tree);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree, rtx);
244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
247 static void arm_option_restore (struct gcc_options *,
248 struct cl_target_option *);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option *);
251 static void arm_set_current_function (tree);
252 static bool arm_can_inline_p (tree, tree);
253 static void arm_relayout_function (tree);
254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
256 static bool arm_sched_can_speculate_insn (rtx_insn *);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn *);
259 static int arm_issue_rate (void);
260 static int arm_first_cycle_multipass_dfa_lookahead (void);
261 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
263 static bool arm_output_addr_const_extra (FILE *, rtx);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree);
266 static tree arm_promoted_type (const_tree t);
267 static bool arm_scalar_mode_supported_p (scalar_mode);
268 static bool arm_frame_pointer_required (void);
269 static bool arm_can_eliminate (const int, const int);
270 static void arm_asm_trampoline_template (FILE *);
271 static void arm_trampoline_init (rtx, tree, rtx);
272 static rtx arm_trampoline_adjust_address (rtx);
273 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
274 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
275 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool arm_array_mode_supported_p (machine_mode,
278 unsigned HOST_WIDE_INT);
279 static machine_mode arm_preferred_simd_mode (scalar_mode);
280 static bool arm_class_likely_spilled_p (reg_class_t);
281 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
282 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
283 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
287 static void arm_conditional_register_usage (void);
288 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
289 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
290 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
291 static int arm_default_branch_cost (bool, bool);
292 static int arm_cortex_a5_branch_cost (bool, bool);
293 static int arm_cortex_m_branch_cost (bool, bool);
294 static int arm_cortex_m7_branch_cost (bool, bool);
296 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
297 const vec_perm_indices &);
299 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 int misalign ATTRIBUTE_UNUSED);
304 static unsigned arm_add_stmt_cost (void *data, int count,
305 enum vect_cost_for_stmt kind,
306 struct _stmt_vec_info *stmt_info,
308 enum vect_cost_model_location where);
310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
311 bool op0_preserve_value);
312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 /* Table of machine attributes. */
329 static const struct attribute_spec arm_attribute_table[] =
331 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
332 affects_type_identity, handler, exclude } */
333 /* Function calls made to this symbol must be done indirectly, because
334 it may lie outside of the 26 bit addressing range of a normal function
336 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
337 /* Whereas these functions are always known to reside within the 26 bit
339 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
340 /* Specify the procedure call conventions for a function. */
341 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 /* Interrupt Service Routines have special prologue and epilogue requirements. */
344 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 { "naked", 0, 0, true, false, false, false,
349 arm_handle_fndecl_attribute, NULL },
351 /* ARM/PE has three new attributes:
353 dllexport - for exporting a function/variable that will live in a dll
354 dllimport - for importing a function/variable from a dll
356 Microsoft allows multiple declspecs in one __declspec, separating
357 them with spaces. We do NOT support this. Instead, use __declspec
360 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
361 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
362 { "interfacearm", 0, 0, true, false, false, false,
363 arm_handle_fndecl_attribute, NULL },
364 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
365 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 { "notshared", 0, 0, false, true, false, false,
370 arm_handle_notshared_attribute, NULL },
372 /* ARMv8-M Security Extensions support. */
373 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
374 arm_handle_cmse_nonsecure_entry, NULL },
375 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
376 arm_handle_cmse_nonsecure_call, NULL },
377 { NULL, 0, 0, false, false, false, false, NULL, NULL }
380 /* Initialize the GCC target structure. */
381 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
382 #undef TARGET_MERGE_DECL_ATTRIBUTES
383 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
386 #undef TARGET_LEGITIMIZE_ADDRESS
387 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389 #undef TARGET_ATTRIBUTE_TABLE
390 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392 #undef TARGET_INSERT_ATTRIBUTES
393 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395 #undef TARGET_ASM_FILE_START
396 #define TARGET_ASM_FILE_START arm_file_start
397 #undef TARGET_ASM_FILE_END
398 #define TARGET_ASM_FILE_END arm_file_end
400 #undef TARGET_ASM_ALIGNED_SI_OP
401 #define TARGET_ASM_ALIGNED_SI_OP NULL
402 #undef TARGET_ASM_INTEGER
403 #define TARGET_ASM_INTEGER arm_assemble_integer
405 #undef TARGET_PRINT_OPERAND
406 #define TARGET_PRINT_OPERAND arm_print_operand
407 #undef TARGET_PRINT_OPERAND_ADDRESS
408 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
409 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
410 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
413 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415 #undef TARGET_ASM_FUNCTION_PROLOGUE
416 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418 #undef TARGET_ASM_FUNCTION_EPILOGUE
419 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421 #undef TARGET_CAN_INLINE_P
422 #define TARGET_CAN_INLINE_P arm_can_inline_p
424 #undef TARGET_RELAYOUT_FUNCTION
425 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427 #undef TARGET_OPTION_OVERRIDE
428 #define TARGET_OPTION_OVERRIDE arm_option_override
430 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
431 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433 #undef TARGET_OPTION_SAVE
434 #define TARGET_OPTION_SAVE arm_option_save
436 #undef TARGET_OPTION_RESTORE
437 #define TARGET_OPTION_RESTORE arm_option_restore
439 #undef TARGET_OPTION_PRINT
440 #define TARGET_OPTION_PRINT arm_option_print
442 #undef TARGET_COMP_TYPE_ATTRIBUTES
443 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445 #undef TARGET_SCHED_CAN_SPECULATE_INSN
446 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448 #undef TARGET_SCHED_MACRO_FUSION_P
449 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
452 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
455 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457 #undef TARGET_SCHED_ADJUST_COST
458 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460 #undef TARGET_SET_CURRENT_FUNCTION
461 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
464 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466 #undef TARGET_SCHED_REORDER
467 #define TARGET_SCHED_REORDER arm_sched_reorder
469 #undef TARGET_REGISTER_MOVE_COST
470 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472 #undef TARGET_MEMORY_MOVE_COST
473 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475 #undef TARGET_ENCODE_SECTION_INFO
477 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
482 #undef TARGET_STRIP_NAME_ENCODING
483 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485 #undef TARGET_ASM_INTERNAL_LABEL
486 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488 #undef TARGET_FLOATN_MODE
489 #define TARGET_FLOATN_MODE arm_floatn_mode
491 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
492 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494 #undef TARGET_FUNCTION_VALUE
495 #define TARGET_FUNCTION_VALUE arm_function_value
497 #undef TARGET_LIBCALL_VALUE
498 #define TARGET_LIBCALL_VALUE arm_libcall_value
500 #undef TARGET_FUNCTION_VALUE_REGNO_P
501 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503 #undef TARGET_ASM_OUTPUT_MI_THUNK
504 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
505 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
506 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508 #undef TARGET_RTX_COSTS
509 #define TARGET_RTX_COSTS arm_rtx_costs
510 #undef TARGET_ADDRESS_COST
511 #define TARGET_ADDRESS_COST arm_address_cost
513 #undef TARGET_SHIFT_TRUNCATION_MASK
514 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
515 #undef TARGET_VECTOR_MODE_SUPPORTED_P
516 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
517 #undef TARGET_ARRAY_MODE_SUPPORTED_P
518 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
519 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
520 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
521 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
522 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
523 arm_autovectorize_vector_sizes
525 #undef TARGET_MACHINE_DEPENDENT_REORG
526 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528 #undef TARGET_INIT_BUILTINS
529 #define TARGET_INIT_BUILTINS arm_init_builtins
530 #undef TARGET_EXPAND_BUILTIN
531 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
532 #undef TARGET_BUILTIN_DECL
533 #define TARGET_BUILTIN_DECL arm_builtin_decl
535 #undef TARGET_INIT_LIBFUNCS
536 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538 #undef TARGET_PROMOTE_FUNCTION_MODE
539 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
540 #undef TARGET_PROMOTE_PROTOTYPES
541 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
544 #undef TARGET_ARG_PARTIAL_BYTES
545 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
546 #undef TARGET_FUNCTION_ARG
547 #define TARGET_FUNCTION_ARG arm_function_arg
548 #undef TARGET_FUNCTION_ARG_ADVANCE
549 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
550 #undef TARGET_FUNCTION_ARG_PADDING
551 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
552 #undef TARGET_FUNCTION_ARG_BOUNDARY
553 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555 #undef TARGET_SETUP_INCOMING_VARARGS
556 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
559 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
562 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
563 #undef TARGET_TRAMPOLINE_INIT
564 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
565 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
566 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568 #undef TARGET_WARN_FUNC_RETURN
569 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571 #undef TARGET_DEFAULT_SHORT_ENUMS
572 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574 #undef TARGET_ALIGN_ANON_BITFIELD
575 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577 #undef TARGET_NARROW_VOLATILE_BITFIELD
578 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580 #undef TARGET_CXX_GUARD_TYPE
581 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583 #undef TARGET_CXX_GUARD_MASK_BIT
584 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586 #undef TARGET_CXX_GET_COOKIE_SIZE
587 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589 #undef TARGET_CXX_COOKIE_HAS_SIZE
590 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592 #undef TARGET_CXX_CDTOR_RETURNS_THIS
593 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
596 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598 #undef TARGET_CXX_USE_AEABI_ATEXIT
599 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
602 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
603 arm_cxx_determine_class_data_visibility
605 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
606 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608 #undef TARGET_RETURN_IN_MSB
609 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611 #undef TARGET_RETURN_IN_MEMORY
612 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614 #undef TARGET_MUST_PASS_IN_STACK
615 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
618 #undef TARGET_ASM_UNWIND_EMIT
619 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621 /* EABI unwinding tables use a different format for the typeinfo tables. */
622 #undef TARGET_ASM_TTYPE
623 #define TARGET_ASM_TTYPE arm_output_ttype
625 #undef TARGET_ARM_EABI_UNWINDER
626 #define TARGET_ARM_EABI_UNWINDER true
628 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
629 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631 #endif /* ARM_UNWIND_INFO */
633 #undef TARGET_ASM_INIT_SECTIONS
634 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636 #undef TARGET_DWARF_REGISTER_SPAN
637 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639 #undef TARGET_CANNOT_COPY_INSN_P
640 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
643 #undef TARGET_HAVE_TLS
644 #define TARGET_HAVE_TLS true
647 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
648 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650 #undef TARGET_LEGITIMATE_CONSTANT_P
651 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653 #undef TARGET_CANNOT_FORCE_CONST_MEM
654 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656 #undef TARGET_MAX_ANCHOR_OFFSET
657 #define TARGET_MAX_ANCHOR_OFFSET 4095
659 /* The minimum is set such that the total size of the block
660 for a particular anchor is -4088 + 1 + 4095 bytes, which is
661 divisible by eight, ensuring natural spacing of anchors. */
662 #undef TARGET_MIN_ANCHOR_OFFSET
663 #define TARGET_MIN_ANCHOR_OFFSET -4088
665 #undef TARGET_SCHED_ISSUE_RATE
666 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
670 arm_first_cycle_multipass_dfa_lookahead
672 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
673 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
674 arm_first_cycle_multipass_dfa_lookahead_guard
676 #undef TARGET_MANGLE_TYPE
677 #define TARGET_MANGLE_TYPE arm_mangle_type
679 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
680 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682 #undef TARGET_BUILD_BUILTIN_VA_LIST
683 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
684 #undef TARGET_EXPAND_BUILTIN_VA_START
685 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
686 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
687 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
690 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
691 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
694 #undef TARGET_LEGITIMATE_ADDRESS_P
695 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697 #undef TARGET_PREFERRED_RELOAD_CLASS
698 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700 #undef TARGET_PROMOTED_TYPE
701 #define TARGET_PROMOTED_TYPE arm_promoted_type
703 #undef TARGET_SCALAR_MODE_SUPPORTED_P
704 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706 #undef TARGET_COMPUTE_FRAME_LAYOUT
707 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709 #undef TARGET_FRAME_POINTER_REQUIRED
710 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712 #undef TARGET_CAN_ELIMINATE
713 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715 #undef TARGET_CONDITIONAL_REGISTER_USAGE
716 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718 #undef TARGET_CLASS_LIKELY_SPILLED_P
719 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721 #undef TARGET_VECTORIZE_BUILTINS
722 #define TARGET_VECTORIZE_BUILTINS
724 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
725 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
726 arm_builtin_vectorized_function
728 #undef TARGET_VECTOR_ALIGNMENT
729 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
732 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
733 arm_vector_alignment_reachable
735 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
736 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
737 arm_builtin_support_vector_misalignment
739 #undef TARGET_PREFERRED_RENAME_CLASS
740 #define TARGET_PREFERRED_RENAME_CLASS \
741 arm_preferred_rename_class
743 #undef TARGET_VECTORIZE_VEC_PERM_CONST
744 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
747 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
748 arm_builtin_vectorization_cost
749 #undef TARGET_VECTORIZE_ADD_STMT_COST
750 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752 #undef TARGET_CANONICALIZE_COMPARISON
753 #define TARGET_CANONICALIZE_COMPARISON \
754 arm_canonicalize_comparison
756 #undef TARGET_ASAN_SHADOW_OFFSET
757 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759 #undef MAX_INSN_PER_IT_BLOCK
760 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762 #undef TARGET_CAN_USE_DOLOOP_P
763 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
766 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
769 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771 #undef TARGET_SCHED_FUSION_PRIORITY
772 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774 #undef TARGET_ASM_FUNCTION_SECTION
775 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
778 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780 #undef TARGET_SECTION_TYPE_FLAGS
781 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
784 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786 #undef TARGET_C_EXCESS_PRECISION
787 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789 /* Although the architecture reserves bits 0 and 1, only the former is
790 used for ARM/Thumb ISA selection in v7 and earlier versions. */
791 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
792 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794 #undef TARGET_FIXED_CONDITION_CODE_REGS
795 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797 #undef TARGET_HARD_REGNO_NREGS
798 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
799 #undef TARGET_HARD_REGNO_MODE_OK
800 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802 #undef TARGET_MODES_TIEABLE_P
803 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805 #undef TARGET_CAN_CHANGE_MODE_CLASS
806 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808 #undef TARGET_CONSTANT_ALIGNMENT
809 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 /* Obstack for minipool constant handling. */
812 static struct obstack minipool_obstack;
813 static char * minipool_startobj;
815 /* The maximum number of insns skipped which
816 will be conditionalised if possible. */
817 static int max_insns_skipped = 5;
819 extern FILE * asm_out_file;
821 /* True if we are currently building a constant table. */
822 int making_const_table;
824 /* The processor for which instructions should be scheduled. */
825 enum processor_type arm_tune = TARGET_CPU_arm_none;
827 /* The current tuning set. */
828 const struct tune_params *current_tune;
830 /* Which floating point hardware to schedule for. */
833 /* Used for Thumb call_via trampolines. */
834 rtx thumb_call_via_label[14];
835 static int thumb_call_reg_needed;
837 /* The bits in this mask specify which instruction scheduling options should
839 unsigned int tune_flags = 0;
841 /* The highest ARM architecture version supported by the
843 enum base_architecture arm_base_arch = BASE_ARCH_0;
845 /* Active target architecture and tuning. */
847 struct arm_build_target arm_active_target;
849 /* The following are used in the arm.md file as equivalents to bits
850 in the above two flag variables. */
852 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
855 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
858 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
861 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
864 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
867 /* Nonzero if this chip supports the ARM 6K extensions. */
870 /* Nonzero if this chip supports the ARM 6KZ extensions. */
873 /* Nonzero if instructions present in ARMv6-M can be used. */
876 /* Nonzero if this chip supports the ARM 7 extensions. */
879 /* Nonzero if this chip supports the Large Physical Address Extension. */
880 int arm_arch_lpae = 0;
882 /* Nonzero if instructions not present in the 'M' profile can be used. */
883 int arm_arch_notm = 0;
885 /* Nonzero if instructions present in ARMv7E-M can be used. */
888 /* Nonzero if instructions present in ARMv8 can be used. */
891 /* Nonzero if this chip supports the ARMv8.1 extensions. */
894 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
897 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
900 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
903 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905 int arm_fp16_inst = 0;
907 /* Nonzero if this chip can benefit from load scheduling. */
908 int arm_ld_sched = 0;
910 /* Nonzero if this chip is a StrongARM. */
911 int arm_tune_strongarm = 0;
913 /* Nonzero if this chip supports Intel Wireless MMX technology. */
914 int arm_arch_iwmmxt = 0;
916 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
917 int arm_arch_iwmmxt2 = 0;
919 /* Nonzero if this chip is an XScale. */
920 int arm_arch_xscale = 0;
922 /* Nonzero if tuning for XScale */
923 int arm_tune_xscale = 0;
925 /* Nonzero if we want to tune for stores that access the write-buffer.
926 This typically means an ARM6 or ARM7 with MMU or MPU. */
927 int arm_tune_wbuf = 0;
929 /* Nonzero if tuning for Cortex-A9. */
930 int arm_tune_cortex_a9 = 0;
932 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934 XXX This is a bit of a hack, it's intended to help work around
935 problems in GLD which doesn't understand that armv5t code is
936 interworking clean. */
937 int arm_cpp_interwork = 0;
939 /* Nonzero if chip supports Thumb 1. */
942 /* Nonzero if chip supports Thumb 2. */
945 /* Nonzero if chip supports integer division instruction. */
946 int arm_arch_arm_hwdiv;
947 int arm_arch_thumb_hwdiv;
949 /* Nonzero if chip disallows volatile memory access in IT block. */
950 int arm_arch_no_volatile_ce;
952 /* Nonzero if we shouldn't use literal pools. */
953 bool arm_disable_literal_pool = false;
955 /* The register number to be used for the PIC offset register. */
956 unsigned arm_pic_register = INVALID_REGNUM;
958 enum arm_pcs arm_pcs_default;
960 /* For an explanation of these variables, see final_prescan_insn below. */
962 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
963 enum arm_cond_code arm_current_cc;
966 int arm_target_label;
967 /* The number of conditionally executed insns, including the current insn. */
968 int arm_condexec_count = 0;
969 /* A bitmask specifying the patterns for the IT block.
970 Zero means do not output an IT block before this insn. */
971 int arm_condexec_mask = 0;
972 /* The number of bits used in arm_condexec_mask. */
973 int arm_condexec_masklen = 0;
975 /* Nonzero if chip supports the ARMv8 CRC instructions. */
976 int arm_arch_crc = 0;
978 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
979 int arm_arch_dotprod = 0;
981 /* Nonzero if chip supports the ARMv8-M security extensions. */
982 int arm_arch_cmse = 0;
984 /* Nonzero if the core has a very small, high-latency, multiply unit. */
985 int arm_m_profile_small_mul = 0;
987 /* The condition codes of the ARM, and the inverse function. */
988 static const char * const arm_condition_codes[] =
990 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
991 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
994 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
995 int arm_regs_in_sequence[] =
997 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1000 #define ARM_LSL_NAME "lsl"
1001 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1003 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1004 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1005 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1007 /* Initialization code. */
1011 enum processor_type scheduler;
1012 unsigned int tune_flags;
1013 const struct tune_params *tune;
1016 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1017 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1024 /* arm generic vectorizer costs. */
1026 struct cpu_vec_costs arm_default_vec_cost = {
1027 1, /* scalar_stmt_cost. */
1028 1, /* scalar load_cost. */
1029 1, /* scalar_store_cost. */
1030 1, /* vec_stmt_cost. */
1031 1, /* vec_to_scalar_cost. */
1032 1, /* scalar_to_vec_cost. */
1033 1, /* vec_align_load_cost. */
1034 1, /* vec_unalign_load_cost. */
1035 1, /* vec_unalign_store_cost. */
1036 1, /* vec_store_cost. */
1037 3, /* cond_taken_branch_cost. */
1038 1, /* cond_not_taken_branch_cost. */
1041 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1042 #include "aarch-cost-tables.h"
1046 const struct cpu_cost_table cortexa9_extra_costs =
1053 COSTS_N_INSNS (1), /* shift_reg. */
1054 COSTS_N_INSNS (1), /* arith_shift. */
1055 COSTS_N_INSNS (2), /* arith_shift_reg. */
1057 COSTS_N_INSNS (1), /* log_shift_reg. */
1058 COSTS_N_INSNS (1), /* extend. */
1059 COSTS_N_INSNS (2), /* extend_arith. */
1060 COSTS_N_INSNS (1), /* bfi. */
1061 COSTS_N_INSNS (1), /* bfx. */
1065 true /* non_exec_costs_exec. */
1070 COSTS_N_INSNS (3), /* simple. */
1071 COSTS_N_INSNS (3), /* flag_setting. */
1072 COSTS_N_INSNS (2), /* extend. */
1073 COSTS_N_INSNS (3), /* add. */
1074 COSTS_N_INSNS (2), /* extend_add. */
1075 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1079 0, /* simple (N/A). */
1080 0, /* flag_setting (N/A). */
1081 COSTS_N_INSNS (4), /* extend. */
1083 COSTS_N_INSNS (4), /* extend_add. */
1089 COSTS_N_INSNS (2), /* load. */
1090 COSTS_N_INSNS (2), /* load_sign_extend. */
1091 COSTS_N_INSNS (2), /* ldrd. */
1092 COSTS_N_INSNS (2), /* ldm_1st. */
1093 1, /* ldm_regs_per_insn_1st. */
1094 2, /* ldm_regs_per_insn_subsequent. */
1095 COSTS_N_INSNS (5), /* loadf. */
1096 COSTS_N_INSNS (5), /* loadd. */
1097 COSTS_N_INSNS (1), /* load_unaligned. */
1098 COSTS_N_INSNS (2), /* store. */
1099 COSTS_N_INSNS (2), /* strd. */
1100 COSTS_N_INSNS (2), /* stm_1st. */
1101 1, /* stm_regs_per_insn_1st. */
1102 2, /* stm_regs_per_insn_subsequent. */
1103 COSTS_N_INSNS (1), /* storef. */
1104 COSTS_N_INSNS (1), /* stored. */
1105 COSTS_N_INSNS (1), /* store_unaligned. */
1106 COSTS_N_INSNS (1), /* loadv. */
1107 COSTS_N_INSNS (1) /* storev. */
1112 COSTS_N_INSNS (14), /* div. */
1113 COSTS_N_INSNS (4), /* mult. */
1114 COSTS_N_INSNS (7), /* mult_addsub. */
1115 COSTS_N_INSNS (30), /* fma. */
1116 COSTS_N_INSNS (3), /* addsub. */
1117 COSTS_N_INSNS (1), /* fpconst. */
1118 COSTS_N_INSNS (1), /* neg. */
1119 COSTS_N_INSNS (3), /* compare. */
1120 COSTS_N_INSNS (3), /* widen. */
1121 COSTS_N_INSNS (3), /* narrow. */
1122 COSTS_N_INSNS (3), /* toint. */
1123 COSTS_N_INSNS (3), /* fromint. */
1124 COSTS_N_INSNS (3) /* roundint. */
1128 COSTS_N_INSNS (24), /* div. */
1129 COSTS_N_INSNS (5), /* mult. */
1130 COSTS_N_INSNS (8), /* mult_addsub. */
1131 COSTS_N_INSNS (30), /* fma. */
1132 COSTS_N_INSNS (3), /* addsub. */
1133 COSTS_N_INSNS (1), /* fpconst. */
1134 COSTS_N_INSNS (1), /* neg. */
1135 COSTS_N_INSNS (3), /* compare. */
1136 COSTS_N_INSNS (3), /* widen. */
1137 COSTS_N_INSNS (3), /* narrow. */
1138 COSTS_N_INSNS (3), /* toint. */
1139 COSTS_N_INSNS (3), /* fromint. */
1140 COSTS_N_INSNS (3) /* roundint. */
1145 COSTS_N_INSNS (1) /* alu. */
1149 const struct cpu_cost_table cortexa8_extra_costs =
1155 COSTS_N_INSNS (1), /* shift. */
1157 COSTS_N_INSNS (1), /* arith_shift. */
1158 0, /* arith_shift_reg. */
1159 COSTS_N_INSNS (1), /* log_shift. */
1160 0, /* log_shift_reg. */
1162 0, /* extend_arith. */
1168 true /* non_exec_costs_exec. */
1173 COSTS_N_INSNS (1), /* simple. */
1174 COSTS_N_INSNS (1), /* flag_setting. */
1175 COSTS_N_INSNS (1), /* extend. */
1176 COSTS_N_INSNS (1), /* add. */
1177 COSTS_N_INSNS (1), /* extend_add. */
1178 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1182 0, /* simple (N/A). */
1183 0, /* flag_setting (N/A). */
1184 COSTS_N_INSNS (2), /* extend. */
1186 COSTS_N_INSNS (2), /* extend_add. */
1192 COSTS_N_INSNS (1), /* load. */
1193 COSTS_N_INSNS (1), /* load_sign_extend. */
1194 COSTS_N_INSNS (1), /* ldrd. */
1195 COSTS_N_INSNS (1), /* ldm_1st. */
1196 1, /* ldm_regs_per_insn_1st. */
1197 2, /* ldm_regs_per_insn_subsequent. */
1198 COSTS_N_INSNS (1), /* loadf. */
1199 COSTS_N_INSNS (1), /* loadd. */
1200 COSTS_N_INSNS (1), /* load_unaligned. */
1201 COSTS_N_INSNS (1), /* store. */
1202 COSTS_N_INSNS (1), /* strd. */
1203 COSTS_N_INSNS (1), /* stm_1st. */
1204 1, /* stm_regs_per_insn_1st. */
1205 2, /* stm_regs_per_insn_subsequent. */
1206 COSTS_N_INSNS (1), /* storef. */
1207 COSTS_N_INSNS (1), /* stored. */
1208 COSTS_N_INSNS (1), /* store_unaligned. */
1209 COSTS_N_INSNS (1), /* loadv. */
1210 COSTS_N_INSNS (1) /* storev. */
1215 COSTS_N_INSNS (36), /* div. */
1216 COSTS_N_INSNS (11), /* mult. */
1217 COSTS_N_INSNS (20), /* mult_addsub. */
1218 COSTS_N_INSNS (30), /* fma. */
1219 COSTS_N_INSNS (9), /* addsub. */
1220 COSTS_N_INSNS (3), /* fpconst. */
1221 COSTS_N_INSNS (3), /* neg. */
1222 COSTS_N_INSNS (6), /* compare. */
1223 COSTS_N_INSNS (4), /* widen. */
1224 COSTS_N_INSNS (4), /* narrow. */
1225 COSTS_N_INSNS (8), /* toint. */
1226 COSTS_N_INSNS (8), /* fromint. */
1227 COSTS_N_INSNS (8) /* roundint. */
1231 COSTS_N_INSNS (64), /* div. */
1232 COSTS_N_INSNS (16), /* mult. */
1233 COSTS_N_INSNS (25), /* mult_addsub. */
1234 COSTS_N_INSNS (30), /* fma. */
1235 COSTS_N_INSNS (9), /* addsub. */
1236 COSTS_N_INSNS (3), /* fpconst. */
1237 COSTS_N_INSNS (3), /* neg. */
1238 COSTS_N_INSNS (6), /* compare. */
1239 COSTS_N_INSNS (6), /* widen. */
1240 COSTS_N_INSNS (6), /* narrow. */
1241 COSTS_N_INSNS (8), /* toint. */
1242 COSTS_N_INSNS (8), /* fromint. */
1243 COSTS_N_INSNS (8) /* roundint. */
1248 COSTS_N_INSNS (1) /* alu. */
1252 const struct cpu_cost_table cortexa5_extra_costs =
1258 COSTS_N_INSNS (1), /* shift. */
1259 COSTS_N_INSNS (1), /* shift_reg. */
1260 COSTS_N_INSNS (1), /* arith_shift. */
1261 COSTS_N_INSNS (1), /* arith_shift_reg. */
1262 COSTS_N_INSNS (1), /* log_shift. */
1263 COSTS_N_INSNS (1), /* log_shift_reg. */
1264 COSTS_N_INSNS (1), /* extend. */
1265 COSTS_N_INSNS (1), /* extend_arith. */
1266 COSTS_N_INSNS (1), /* bfi. */
1267 COSTS_N_INSNS (1), /* bfx. */
1268 COSTS_N_INSNS (1), /* clz. */
1269 COSTS_N_INSNS (1), /* rev. */
1271 true /* non_exec_costs_exec. */
1278 COSTS_N_INSNS (1), /* flag_setting. */
1279 COSTS_N_INSNS (1), /* extend. */
1280 COSTS_N_INSNS (1), /* add. */
1281 COSTS_N_INSNS (1), /* extend_add. */
1282 COSTS_N_INSNS (7) /* idiv. */
1286 0, /* simple (N/A). */
1287 0, /* flag_setting (N/A). */
1288 COSTS_N_INSNS (1), /* extend. */
1290 COSTS_N_INSNS (2), /* extend_add. */
1296 COSTS_N_INSNS (1), /* load. */
1297 COSTS_N_INSNS (1), /* load_sign_extend. */
1298 COSTS_N_INSNS (6), /* ldrd. */
1299 COSTS_N_INSNS (1), /* ldm_1st. */
1300 1, /* ldm_regs_per_insn_1st. */
1301 2, /* ldm_regs_per_insn_subsequent. */
1302 COSTS_N_INSNS (2), /* loadf. */
1303 COSTS_N_INSNS (4), /* loadd. */
1304 COSTS_N_INSNS (1), /* load_unaligned. */
1305 COSTS_N_INSNS (1), /* store. */
1306 COSTS_N_INSNS (3), /* strd. */
1307 COSTS_N_INSNS (1), /* stm_1st. */
1308 1, /* stm_regs_per_insn_1st. */
1309 2, /* stm_regs_per_insn_subsequent. */
1310 COSTS_N_INSNS (2), /* storef. */
1311 COSTS_N_INSNS (2), /* stored. */
1312 COSTS_N_INSNS (1), /* store_unaligned. */
1313 COSTS_N_INSNS (1), /* loadv. */
1314 COSTS_N_INSNS (1) /* storev. */
1319 COSTS_N_INSNS (15), /* div. */
1320 COSTS_N_INSNS (3), /* mult. */
1321 COSTS_N_INSNS (7), /* mult_addsub. */
1322 COSTS_N_INSNS (7), /* fma. */
1323 COSTS_N_INSNS (3), /* addsub. */
1324 COSTS_N_INSNS (3), /* fpconst. */
1325 COSTS_N_INSNS (3), /* neg. */
1326 COSTS_N_INSNS (3), /* compare. */
1327 COSTS_N_INSNS (3), /* widen. */
1328 COSTS_N_INSNS (3), /* narrow. */
1329 COSTS_N_INSNS (3), /* toint. */
1330 COSTS_N_INSNS (3), /* fromint. */
1331 COSTS_N_INSNS (3) /* roundint. */
1335 COSTS_N_INSNS (30), /* div. */
1336 COSTS_N_INSNS (6), /* mult. */
1337 COSTS_N_INSNS (10), /* mult_addsub. */
1338 COSTS_N_INSNS (7), /* fma. */
1339 COSTS_N_INSNS (3), /* addsub. */
1340 COSTS_N_INSNS (3), /* fpconst. */
1341 COSTS_N_INSNS (3), /* neg. */
1342 COSTS_N_INSNS (3), /* compare. */
1343 COSTS_N_INSNS (3), /* widen. */
1344 COSTS_N_INSNS (3), /* narrow. */
1345 COSTS_N_INSNS (3), /* toint. */
1346 COSTS_N_INSNS (3), /* fromint. */
1347 COSTS_N_INSNS (3) /* roundint. */
1352 COSTS_N_INSNS (1) /* alu. */
1357 const struct cpu_cost_table cortexa7_extra_costs =
1363 COSTS_N_INSNS (1), /* shift. */
1364 COSTS_N_INSNS (1), /* shift_reg. */
1365 COSTS_N_INSNS (1), /* arith_shift. */
1366 COSTS_N_INSNS (1), /* arith_shift_reg. */
1367 COSTS_N_INSNS (1), /* log_shift. */
1368 COSTS_N_INSNS (1), /* log_shift_reg. */
1369 COSTS_N_INSNS (1), /* extend. */
1370 COSTS_N_INSNS (1), /* extend_arith. */
1371 COSTS_N_INSNS (1), /* bfi. */
1372 COSTS_N_INSNS (1), /* bfx. */
1373 COSTS_N_INSNS (1), /* clz. */
1374 COSTS_N_INSNS (1), /* rev. */
1376 true /* non_exec_costs_exec. */
1383 COSTS_N_INSNS (1), /* flag_setting. */
1384 COSTS_N_INSNS (1), /* extend. */
1385 COSTS_N_INSNS (1), /* add. */
1386 COSTS_N_INSNS (1), /* extend_add. */
1387 COSTS_N_INSNS (7) /* idiv. */
1391 0, /* simple (N/A). */
1392 0, /* flag_setting (N/A). */
1393 COSTS_N_INSNS (1), /* extend. */
1395 COSTS_N_INSNS (2), /* extend_add. */
1401 COSTS_N_INSNS (1), /* load. */
1402 COSTS_N_INSNS (1), /* load_sign_extend. */
1403 COSTS_N_INSNS (3), /* ldrd. */
1404 COSTS_N_INSNS (1), /* ldm_1st. */
1405 1, /* ldm_regs_per_insn_1st. */
1406 2, /* ldm_regs_per_insn_subsequent. */
1407 COSTS_N_INSNS (2), /* loadf. */
1408 COSTS_N_INSNS (2), /* loadd. */
1409 COSTS_N_INSNS (1), /* load_unaligned. */
1410 COSTS_N_INSNS (1), /* store. */
1411 COSTS_N_INSNS (3), /* strd. */
1412 COSTS_N_INSNS (1), /* stm_1st. */
1413 1, /* stm_regs_per_insn_1st. */
1414 2, /* stm_regs_per_insn_subsequent. */
1415 COSTS_N_INSNS (2), /* storef. */
1416 COSTS_N_INSNS (2), /* stored. */
1417 COSTS_N_INSNS (1), /* store_unaligned. */
1418 COSTS_N_INSNS (1), /* loadv. */
1419 COSTS_N_INSNS (1) /* storev. */
1424 COSTS_N_INSNS (15), /* div. */
1425 COSTS_N_INSNS (3), /* mult. */
1426 COSTS_N_INSNS (7), /* mult_addsub. */
1427 COSTS_N_INSNS (7), /* fma. */
1428 COSTS_N_INSNS (3), /* addsub. */
1429 COSTS_N_INSNS (3), /* fpconst. */
1430 COSTS_N_INSNS (3), /* neg. */
1431 COSTS_N_INSNS (3), /* compare. */
1432 COSTS_N_INSNS (3), /* widen. */
1433 COSTS_N_INSNS (3), /* narrow. */
1434 COSTS_N_INSNS (3), /* toint. */
1435 COSTS_N_INSNS (3), /* fromint. */
1436 COSTS_N_INSNS (3) /* roundint. */
1440 COSTS_N_INSNS (30), /* div. */
1441 COSTS_N_INSNS (6), /* mult. */
1442 COSTS_N_INSNS (10), /* mult_addsub. */
1443 COSTS_N_INSNS (7), /* fma. */
1444 COSTS_N_INSNS (3), /* addsub. */
1445 COSTS_N_INSNS (3), /* fpconst. */
1446 COSTS_N_INSNS (3), /* neg. */
1447 COSTS_N_INSNS (3), /* compare. */
1448 COSTS_N_INSNS (3), /* widen. */
1449 COSTS_N_INSNS (3), /* narrow. */
1450 COSTS_N_INSNS (3), /* toint. */
1451 COSTS_N_INSNS (3), /* fromint. */
1452 COSTS_N_INSNS (3) /* roundint. */
1457 COSTS_N_INSNS (1) /* alu. */
1461 const struct cpu_cost_table cortexa12_extra_costs =
1468 COSTS_N_INSNS (1), /* shift_reg. */
1469 COSTS_N_INSNS (1), /* arith_shift. */
1470 COSTS_N_INSNS (1), /* arith_shift_reg. */
1471 COSTS_N_INSNS (1), /* log_shift. */
1472 COSTS_N_INSNS (1), /* log_shift_reg. */
1474 COSTS_N_INSNS (1), /* extend_arith. */
1476 COSTS_N_INSNS (1), /* bfx. */
1477 COSTS_N_INSNS (1), /* clz. */
1478 COSTS_N_INSNS (1), /* rev. */
1480 true /* non_exec_costs_exec. */
1485 COSTS_N_INSNS (2), /* simple. */
1486 COSTS_N_INSNS (3), /* flag_setting. */
1487 COSTS_N_INSNS (2), /* extend. */
1488 COSTS_N_INSNS (3), /* add. */
1489 COSTS_N_INSNS (2), /* extend_add. */
1490 COSTS_N_INSNS (18) /* idiv. */
1494 0, /* simple (N/A). */
1495 0, /* flag_setting (N/A). */
1496 COSTS_N_INSNS (3), /* extend. */
1498 COSTS_N_INSNS (3), /* extend_add. */
1504 COSTS_N_INSNS (3), /* load. */
1505 COSTS_N_INSNS (3), /* load_sign_extend. */
1506 COSTS_N_INSNS (3), /* ldrd. */
1507 COSTS_N_INSNS (3), /* ldm_1st. */
1508 1, /* ldm_regs_per_insn_1st. */
1509 2, /* ldm_regs_per_insn_subsequent. */
1510 COSTS_N_INSNS (3), /* loadf. */
1511 COSTS_N_INSNS (3), /* loadd. */
1512 0, /* load_unaligned. */
1516 1, /* stm_regs_per_insn_1st. */
1517 2, /* stm_regs_per_insn_subsequent. */
1518 COSTS_N_INSNS (2), /* storef. */
1519 COSTS_N_INSNS (2), /* stored. */
1520 0, /* store_unaligned. */
1521 COSTS_N_INSNS (1), /* loadv. */
1522 COSTS_N_INSNS (1) /* storev. */
1527 COSTS_N_INSNS (17), /* div. */
1528 COSTS_N_INSNS (4), /* mult. */
1529 COSTS_N_INSNS (8), /* mult_addsub. */
1530 COSTS_N_INSNS (8), /* fma. */
1531 COSTS_N_INSNS (4), /* addsub. */
1532 COSTS_N_INSNS (2), /* fpconst. */
1533 COSTS_N_INSNS (2), /* neg. */
1534 COSTS_N_INSNS (2), /* compare. */
1535 COSTS_N_INSNS (4), /* widen. */
1536 COSTS_N_INSNS (4), /* narrow. */
1537 COSTS_N_INSNS (4), /* toint. */
1538 COSTS_N_INSNS (4), /* fromint. */
1539 COSTS_N_INSNS (4) /* roundint. */
1543 COSTS_N_INSNS (31), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (2), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1560 COSTS_N_INSNS (1) /* alu. */
1564 const struct cpu_cost_table cortexa15_extra_costs =
1572 COSTS_N_INSNS (1), /* arith_shift. */
1573 COSTS_N_INSNS (1), /* arith_shift_reg. */
1574 COSTS_N_INSNS (1), /* log_shift. */
1575 COSTS_N_INSNS (1), /* log_shift_reg. */
1577 COSTS_N_INSNS (1), /* extend_arith. */
1578 COSTS_N_INSNS (1), /* bfi. */
1583 true /* non_exec_costs_exec. */
1588 COSTS_N_INSNS (2), /* simple. */
1589 COSTS_N_INSNS (3), /* flag_setting. */
1590 COSTS_N_INSNS (2), /* extend. */
1591 COSTS_N_INSNS (2), /* add. */
1592 COSTS_N_INSNS (2), /* extend_add. */
1593 COSTS_N_INSNS (18) /* idiv. */
1597 0, /* simple (N/A). */
1598 0, /* flag_setting (N/A). */
1599 COSTS_N_INSNS (3), /* extend. */
1601 COSTS_N_INSNS (3), /* extend_add. */
1607 COSTS_N_INSNS (3), /* load. */
1608 COSTS_N_INSNS (3), /* load_sign_extend. */
1609 COSTS_N_INSNS (3), /* ldrd. */
1610 COSTS_N_INSNS (4), /* ldm_1st. */
1611 1, /* ldm_regs_per_insn_1st. */
1612 2, /* ldm_regs_per_insn_subsequent. */
1613 COSTS_N_INSNS (4), /* loadf. */
1614 COSTS_N_INSNS (4), /* loadd. */
1615 0, /* load_unaligned. */
1618 COSTS_N_INSNS (1), /* stm_1st. */
1619 1, /* stm_regs_per_insn_1st. */
1620 2, /* stm_regs_per_insn_subsequent. */
1623 0, /* store_unaligned. */
1624 COSTS_N_INSNS (1), /* loadv. */
1625 COSTS_N_INSNS (1) /* storev. */
1630 COSTS_N_INSNS (17), /* div. */
1631 COSTS_N_INSNS (4), /* mult. */
1632 COSTS_N_INSNS (8), /* mult_addsub. */
1633 COSTS_N_INSNS (8), /* fma. */
1634 COSTS_N_INSNS (4), /* addsub. */
1635 COSTS_N_INSNS (2), /* fpconst. */
1636 COSTS_N_INSNS (2), /* neg. */
1637 COSTS_N_INSNS (5), /* compare. */
1638 COSTS_N_INSNS (4), /* widen. */
1639 COSTS_N_INSNS (4), /* narrow. */
1640 COSTS_N_INSNS (4), /* toint. */
1641 COSTS_N_INSNS (4), /* fromint. */
1642 COSTS_N_INSNS (4) /* roundint. */
1646 COSTS_N_INSNS (31), /* div. */
1647 COSTS_N_INSNS (4), /* mult. */
1648 COSTS_N_INSNS (8), /* mult_addsub. */
1649 COSTS_N_INSNS (8), /* fma. */
1650 COSTS_N_INSNS (4), /* addsub. */
1651 COSTS_N_INSNS (2), /* fpconst. */
1652 COSTS_N_INSNS (2), /* neg. */
1653 COSTS_N_INSNS (2), /* compare. */
1654 COSTS_N_INSNS (4), /* widen. */
1655 COSTS_N_INSNS (4), /* narrow. */
1656 COSTS_N_INSNS (4), /* toint. */
1657 COSTS_N_INSNS (4), /* fromint. */
1658 COSTS_N_INSNS (4) /* roundint. */
1663 COSTS_N_INSNS (1) /* alu. */
1667 const struct cpu_cost_table v7m_extra_costs =
1675 0, /* arith_shift. */
1676 COSTS_N_INSNS (1), /* arith_shift_reg. */
1678 COSTS_N_INSNS (1), /* log_shift_reg. */
1680 COSTS_N_INSNS (1), /* extend_arith. */
1685 COSTS_N_INSNS (1), /* non_exec. */
1686 false /* non_exec_costs_exec. */
1691 COSTS_N_INSNS (1), /* simple. */
1692 COSTS_N_INSNS (1), /* flag_setting. */
1693 COSTS_N_INSNS (2), /* extend. */
1694 COSTS_N_INSNS (1), /* add. */
1695 COSTS_N_INSNS (3), /* extend_add. */
1696 COSTS_N_INSNS (8) /* idiv. */
1700 0, /* simple (N/A). */
1701 0, /* flag_setting (N/A). */
1702 COSTS_N_INSNS (2), /* extend. */
1704 COSTS_N_INSNS (3), /* extend_add. */
1710 COSTS_N_INSNS (2), /* load. */
1711 0, /* load_sign_extend. */
1712 COSTS_N_INSNS (3), /* ldrd. */
1713 COSTS_N_INSNS (2), /* ldm_1st. */
1714 1, /* ldm_regs_per_insn_1st. */
1715 1, /* ldm_regs_per_insn_subsequent. */
1716 COSTS_N_INSNS (2), /* loadf. */
1717 COSTS_N_INSNS (3), /* loadd. */
1718 COSTS_N_INSNS (1), /* load_unaligned. */
1719 COSTS_N_INSNS (2), /* store. */
1720 COSTS_N_INSNS (3), /* strd. */
1721 COSTS_N_INSNS (2), /* stm_1st. */
1722 1, /* stm_regs_per_insn_1st. */
1723 1, /* stm_regs_per_insn_subsequent. */
1724 COSTS_N_INSNS (2), /* storef. */
1725 COSTS_N_INSNS (3), /* stored. */
1726 COSTS_N_INSNS (1), /* store_unaligned. */
1727 COSTS_N_INSNS (1), /* loadv. */
1728 COSTS_N_INSNS (1) /* storev. */
1733 COSTS_N_INSNS (7), /* div. */
1734 COSTS_N_INSNS (2), /* mult. */
1735 COSTS_N_INSNS (5), /* mult_addsub. */
1736 COSTS_N_INSNS (3), /* fma. */
1737 COSTS_N_INSNS (1), /* addsub. */
1749 COSTS_N_INSNS (15), /* div. */
1750 COSTS_N_INSNS (5), /* mult. */
1751 COSTS_N_INSNS (7), /* mult_addsub. */
1752 COSTS_N_INSNS (7), /* fma. */
1753 COSTS_N_INSNS (3), /* addsub. */
1766 COSTS_N_INSNS (1) /* alu. */
1770 const struct addr_mode_cost_table generic_addr_mode_costs =
1774 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1775 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1776 COSTS_N_INSNS (0) /* AMO_WB. */
1780 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1781 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1782 COSTS_N_INSNS (0) /* AMO_WB. */
1786 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1787 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1788 COSTS_N_INSNS (0) /* AMO_WB. */
1792 const struct tune_params arm_slowmul_tune =
1794 &generic_extra_costs, /* Insn extra costs. */
1795 &generic_addr_mode_costs, /* Addressing mode costs. */
1796 NULL, /* Sched adj cost. */
1797 arm_default_branch_cost,
1798 &arm_default_vec_cost,
1799 3, /* Constant limit. */
1800 5, /* Max cond insns. */
1801 8, /* Memset max inline. */
1802 1, /* Issue rate. */
1803 ARM_PREFETCH_NOT_BENEFICIAL,
1804 tune_params::PREF_CONST_POOL_TRUE,
1805 tune_params::PREF_LDRD_FALSE,
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1808 tune_params::DISPARAGE_FLAGS_NEITHER,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1814 const struct tune_params arm_fastmul_tune =
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE,
1832 tune_params::FUSE_NOTHING,
1833 tune_params::SCHED_AUTOPREF_OFF
1836 /* StrongARM has early execution of branches, so a sequence that is worth
1837 skipping is shorter. Set max_insns_skipped to a lower value. */
1839 const struct tune_params arm_strongarm_tune =
1841 &generic_extra_costs, /* Insn extra costs. */
1842 &generic_addr_mode_costs, /* Addressing mode costs. */
1843 NULL, /* Sched adj cost. */
1844 arm_default_branch_cost,
1845 &arm_default_vec_cost,
1846 1, /* Constant limit. */
1847 3, /* Max cond insns. */
1848 8, /* Memset max inline. */
1849 1, /* Issue rate. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 tune_params::PREF_CONST_POOL_TRUE,
1852 tune_params::PREF_LDRD_FALSE,
1853 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1855 tune_params::DISPARAGE_FLAGS_NEITHER,
1856 tune_params::PREF_NEON_STRINGOPS_FALSE,
1857 tune_params::FUSE_NOTHING,
1858 tune_params::SCHED_AUTOPREF_OFF
1861 const struct tune_params arm_xscale_tune =
1863 &generic_extra_costs, /* Insn extra costs. */
1864 &generic_addr_mode_costs, /* Addressing mode costs. */
1865 xscale_sched_adjust_cost,
1866 arm_default_branch_cost,
1867 &arm_default_vec_cost,
1868 2, /* Constant limit. */
1869 3, /* Max cond insns. */
1870 8, /* Memset max inline. */
1871 1, /* Issue rate. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 tune_params::PREF_CONST_POOL_TRUE,
1874 tune_params::PREF_LDRD_FALSE,
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1876 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1877 tune_params::DISPARAGE_FLAGS_NEITHER,
1878 tune_params::PREF_NEON_STRINGOPS_FALSE,
1879 tune_params::FUSE_NOTHING,
1880 tune_params::SCHED_AUTOPREF_OFF
1883 const struct tune_params arm_9e_tune =
1885 &generic_extra_costs, /* Insn extra costs. */
1886 &generic_addr_mode_costs, /* Addressing mode costs. */
1887 NULL, /* Sched adj cost. */
1888 arm_default_branch_cost,
1889 &arm_default_vec_cost,
1890 1, /* Constant limit. */
1891 5, /* Max cond insns. */
1892 8, /* Memset max inline. */
1893 1, /* Issue rate. */
1894 ARM_PREFETCH_NOT_BENEFICIAL,
1895 tune_params::PREF_CONST_POOL_TRUE,
1896 tune_params::PREF_LDRD_FALSE,
1897 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1899 tune_params::DISPARAGE_FLAGS_NEITHER,
1900 tune_params::PREF_NEON_STRINGOPS_FALSE,
1901 tune_params::FUSE_NOTHING,
1902 tune_params::SCHED_AUTOPREF_OFF
1905 const struct tune_params arm_marvell_pj4_tune =
1907 &generic_extra_costs, /* Insn extra costs. */
1908 &generic_addr_mode_costs, /* Addressing mode costs. */
1909 NULL, /* Sched adj cost. */
1910 arm_default_branch_cost,
1911 &arm_default_vec_cost,
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 8, /* Memset max inline. */
1915 2, /* Issue rate. */
1916 ARM_PREFETCH_NOT_BENEFICIAL,
1917 tune_params::PREF_CONST_POOL_TRUE,
1918 tune_params::PREF_LDRD_FALSE,
1919 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1920 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1921 tune_params::DISPARAGE_FLAGS_NEITHER,
1922 tune_params::PREF_NEON_STRINGOPS_FALSE,
1923 tune_params::FUSE_NOTHING,
1924 tune_params::SCHED_AUTOPREF_OFF
1927 const struct tune_params arm_v6t2_tune =
1929 &generic_extra_costs, /* Insn extra costs. */
1930 &generic_addr_mode_costs, /* Addressing mode costs. */
1931 NULL, /* Sched adj cost. */
1932 arm_default_branch_cost,
1933 &arm_default_vec_cost,
1934 1, /* Constant limit. */
1935 5, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 1, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 tune_params::PREF_CONST_POOL_FALSE,
1940 tune_params::PREF_LDRD_FALSE,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER,
1944 tune_params::PREF_NEON_STRINGOPS_FALSE,
1945 tune_params::FUSE_NOTHING,
1946 tune_params::SCHED_AUTOPREF_OFF
1950 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1951 const struct tune_params arm_cortex_tune =
1953 &generic_extra_costs,
1954 &generic_addr_mode_costs, /* Addressing mode costs. */
1955 NULL, /* Sched adj cost. */
1956 arm_default_branch_cost,
1957 &arm_default_vec_cost,
1958 1, /* Constant limit. */
1959 5, /* Max cond insns. */
1960 8, /* Memset max inline. */
1961 2, /* Issue rate. */
1962 ARM_PREFETCH_NOT_BENEFICIAL,
1963 tune_params::PREF_CONST_POOL_FALSE,
1964 tune_params::PREF_LDRD_FALSE,
1965 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1966 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1967 tune_params::DISPARAGE_FLAGS_NEITHER,
1968 tune_params::PREF_NEON_STRINGOPS_FALSE,
1969 tune_params::FUSE_NOTHING,
1970 tune_params::SCHED_AUTOPREF_OFF
1973 const struct tune_params arm_cortex_a8_tune =
1975 &cortexa8_extra_costs,
1976 &generic_addr_mode_costs, /* Addressing mode costs. */
1977 NULL, /* Sched adj cost. */
1978 arm_default_branch_cost,
1979 &arm_default_vec_cost,
1980 1, /* Constant limit. */
1981 5, /* Max cond insns. */
1982 8, /* Memset max inline. */
1983 2, /* Issue rate. */
1984 ARM_PREFETCH_NOT_BENEFICIAL,
1985 tune_params::PREF_CONST_POOL_FALSE,
1986 tune_params::PREF_LDRD_FALSE,
1987 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1988 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1989 tune_params::DISPARAGE_FLAGS_NEITHER,
1990 tune_params::PREF_NEON_STRINGOPS_TRUE,
1991 tune_params::FUSE_NOTHING,
1992 tune_params::SCHED_AUTOPREF_OFF
1995 const struct tune_params arm_cortex_a7_tune =
1997 &cortexa7_extra_costs,
1998 &generic_addr_mode_costs, /* Addressing mode costs. */
1999 NULL, /* Sched adj cost. */
2000 arm_default_branch_cost,
2001 &arm_default_vec_cost,
2002 1, /* Constant limit. */
2003 5, /* Max cond insns. */
2004 8, /* Memset max inline. */
2005 2, /* Issue rate. */
2006 ARM_PREFETCH_NOT_BENEFICIAL,
2007 tune_params::PREF_CONST_POOL_FALSE,
2008 tune_params::PREF_LDRD_FALSE,
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2010 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2011 tune_params::DISPARAGE_FLAGS_NEITHER,
2012 tune_params::PREF_NEON_STRINGOPS_TRUE,
2013 tune_params::FUSE_NOTHING,
2014 tune_params::SCHED_AUTOPREF_OFF
2017 const struct tune_params arm_cortex_a15_tune =
2019 &cortexa15_extra_costs,
2020 &generic_addr_mode_costs, /* Addressing mode costs. */
2021 NULL, /* Sched adj cost. */
2022 arm_default_branch_cost,
2023 &arm_default_vec_cost,
2024 1, /* Constant limit. */
2025 2, /* Max cond insns. */
2026 8, /* Memset max inline. */
2027 3, /* Issue rate. */
2028 ARM_PREFETCH_NOT_BENEFICIAL,
2029 tune_params::PREF_CONST_POOL_FALSE,
2030 tune_params::PREF_LDRD_TRUE,
2031 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2032 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2033 tune_params::DISPARAGE_FLAGS_ALL,
2034 tune_params::PREF_NEON_STRINGOPS_TRUE,
2035 tune_params::FUSE_NOTHING,
2036 tune_params::SCHED_AUTOPREF_FULL
2039 const struct tune_params arm_cortex_a35_tune =
2041 &cortexa53_extra_costs,
2042 &generic_addr_mode_costs, /* Addressing mode costs. */
2043 NULL, /* Sched adj cost. */
2044 arm_default_branch_cost,
2045 &arm_default_vec_cost,
2046 1, /* Constant limit. */
2047 5, /* Max cond insns. */
2048 8, /* Memset max inline. */
2049 1, /* Issue rate. */
2050 ARM_PREFETCH_NOT_BENEFICIAL,
2051 tune_params::PREF_CONST_POOL_FALSE,
2052 tune_params::PREF_LDRD_FALSE,
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2054 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2055 tune_params::DISPARAGE_FLAGS_NEITHER,
2056 tune_params::PREF_NEON_STRINGOPS_TRUE,
2057 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2058 tune_params::SCHED_AUTOPREF_OFF
2061 const struct tune_params arm_cortex_a53_tune =
2063 &cortexa53_extra_costs,
2064 &generic_addr_mode_costs, /* Addressing mode costs. */
2065 NULL, /* Sched adj cost. */
2066 arm_default_branch_cost,
2067 &arm_default_vec_cost,
2068 1, /* Constant limit. */
2069 5, /* Max cond insns. */
2070 8, /* Memset max inline. */
2071 2, /* Issue rate. */
2072 ARM_PREFETCH_NOT_BENEFICIAL,
2073 tune_params::PREF_CONST_POOL_FALSE,
2074 tune_params::PREF_LDRD_FALSE,
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2077 tune_params::DISPARAGE_FLAGS_NEITHER,
2078 tune_params::PREF_NEON_STRINGOPS_TRUE,
2079 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2080 tune_params::SCHED_AUTOPREF_OFF
2083 const struct tune_params arm_cortex_a57_tune =
2085 &cortexa57_extra_costs,
2086 &generic_addr_mode_costs, /* addressing mode costs */
2087 NULL, /* Sched adj cost. */
2088 arm_default_branch_cost,
2089 &arm_default_vec_cost,
2090 1, /* Constant limit. */
2091 2, /* Max cond insns. */
2092 8, /* Memset max inline. */
2093 3, /* Issue rate. */
2094 ARM_PREFETCH_NOT_BENEFICIAL,
2095 tune_params::PREF_CONST_POOL_FALSE,
2096 tune_params::PREF_LDRD_TRUE,
2097 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2098 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2099 tune_params::DISPARAGE_FLAGS_ALL,
2100 tune_params::PREF_NEON_STRINGOPS_TRUE,
2101 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2102 tune_params::SCHED_AUTOPREF_FULL
2105 const struct tune_params arm_exynosm1_tune =
2107 &exynosm1_extra_costs,
2108 &generic_addr_mode_costs, /* Addressing mode costs. */
2109 NULL, /* Sched adj cost. */
2110 arm_default_branch_cost,
2111 &arm_default_vec_cost,
2112 1, /* Constant limit. */
2113 2, /* Max cond insns. */
2114 8, /* Memset max inline. */
2115 3, /* Issue rate. */
2116 ARM_PREFETCH_NOT_BENEFICIAL,
2117 tune_params::PREF_CONST_POOL_FALSE,
2118 tune_params::PREF_LDRD_TRUE,
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2121 tune_params::DISPARAGE_FLAGS_ALL,
2122 tune_params::PREF_NEON_STRINGOPS_TRUE,
2123 tune_params::FUSE_NOTHING,
2124 tune_params::SCHED_AUTOPREF_OFF
2127 const struct tune_params arm_xgene1_tune =
2129 &xgene1_extra_costs,
2130 &generic_addr_mode_costs, /* Addressing mode costs. */
2131 NULL, /* Sched adj cost. */
2132 arm_default_branch_cost,
2133 &arm_default_vec_cost,
2134 1, /* Constant limit. */
2135 2, /* Max cond insns. */
2136 32, /* Memset max inline. */
2137 4, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL,
2139 tune_params::PREF_CONST_POOL_FALSE,
2140 tune_params::PREF_LDRD_TRUE,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_ALL,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE,
2145 tune_params::FUSE_NOTHING,
2146 tune_params::SCHED_AUTOPREF_OFF
2149 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2150 less appealing. Set max_insns_skipped to a low value. */
2152 const struct tune_params arm_cortex_a5_tune =
2154 &cortexa5_extra_costs,
2155 &generic_addr_mode_costs, /* Addressing mode costs. */
2156 NULL, /* Sched adj cost. */
2157 arm_cortex_a5_branch_cost,
2158 &arm_default_vec_cost,
2159 1, /* Constant limit. */
2160 1, /* Max cond insns. */
2161 8, /* Memset max inline. */
2162 2, /* Issue rate. */
2163 ARM_PREFETCH_NOT_BENEFICIAL,
2164 tune_params::PREF_CONST_POOL_FALSE,
2165 tune_params::PREF_LDRD_FALSE,
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2167 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2168 tune_params::DISPARAGE_FLAGS_NEITHER,
2169 tune_params::PREF_NEON_STRINGOPS_TRUE,
2170 tune_params::FUSE_NOTHING,
2171 tune_params::SCHED_AUTOPREF_OFF
2174 const struct tune_params arm_cortex_a9_tune =
2176 &cortexa9_extra_costs,
2177 &generic_addr_mode_costs, /* Addressing mode costs. */
2178 cortex_a9_sched_adjust_cost,
2179 arm_default_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 5, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 2, /* Issue rate. */
2185 ARM_PREFETCH_BENEFICIAL(4,32,32),
2186 tune_params::PREF_CONST_POOL_FALSE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_STRINGOPS_FALSE,
2192 tune_params::FUSE_NOTHING,
2193 tune_params::SCHED_AUTOPREF_OFF
2196 const struct tune_params arm_cortex_a12_tune =
2198 &cortexa12_extra_costs,
2199 &generic_addr_mode_costs, /* Addressing mode costs. */
2200 NULL, /* Sched adj cost. */
2201 arm_default_branch_cost,
2202 &arm_default_vec_cost, /* Vectorizer costs. */
2203 1, /* Constant limit. */
2204 2, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_NOT_BENEFICIAL,
2208 tune_params::PREF_CONST_POOL_FALSE,
2209 tune_params::PREF_LDRD_TRUE,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_ALL,
2213 tune_params::PREF_NEON_STRINGOPS_TRUE,
2214 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2215 tune_params::SCHED_AUTOPREF_OFF
2218 const struct tune_params arm_cortex_a73_tune =
2220 &cortexa57_extra_costs,
2221 &generic_addr_mode_costs, /* Addressing mode costs. */
2222 NULL, /* Sched adj cost. */
2223 arm_default_branch_cost,
2224 &arm_default_vec_cost, /* Vectorizer costs. */
2225 1, /* Constant limit. */
2226 2, /* Max cond insns. */
2227 8, /* Memset max inline. */
2228 2, /* Issue rate. */
2229 ARM_PREFETCH_NOT_BENEFICIAL,
2230 tune_params::PREF_CONST_POOL_FALSE,
2231 tune_params::PREF_LDRD_TRUE,
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2234 tune_params::DISPARAGE_FLAGS_ALL,
2235 tune_params::PREF_NEON_STRINGOPS_TRUE,
2236 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2237 tune_params::SCHED_AUTOPREF_FULL
2240 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2241 cycle to execute each. An LDR from the constant pool also takes two cycles
2242 to execute, but mildly increases pipelining opportunity (consecutive
2243 loads/stores can be pipelined together, saving one cycle), and may also
2244 improve icache utilisation. Hence we prefer the constant pool for such
2247 const struct tune_params arm_v7m_tune =
2250 &generic_addr_mode_costs, /* Addressing mode costs. */
2251 NULL, /* Sched adj cost. */
2252 arm_cortex_m_branch_cost,
2253 &arm_default_vec_cost,
2254 1, /* Constant limit. */
2255 2, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL,
2259 tune_params::PREF_CONST_POOL_TRUE,
2260 tune_params::PREF_LDRD_FALSE,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER,
2264 tune_params::PREF_NEON_STRINGOPS_FALSE,
2265 tune_params::FUSE_NOTHING,
2266 tune_params::SCHED_AUTOPREF_OFF
2269 /* Cortex-M7 tuning. */
2271 const struct tune_params arm_cortex_m7_tune =
2274 &generic_addr_mode_costs, /* Addressing mode costs. */
2275 NULL, /* Sched adj cost. */
2276 arm_cortex_m7_branch_cost,
2277 &arm_default_vec_cost,
2278 0, /* Constant limit. */
2279 1, /* Max cond insns. */
2280 8, /* Memset max inline. */
2281 2, /* Issue rate. */
2282 ARM_PREFETCH_NOT_BENEFICIAL,
2283 tune_params::PREF_CONST_POOL_TRUE,
2284 tune_params::PREF_LDRD_FALSE,
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2287 tune_params::DISPARAGE_FLAGS_NEITHER,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE,
2289 tune_params::FUSE_NOTHING,
2290 tune_params::SCHED_AUTOPREF_OFF
2293 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2294 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2296 const struct tune_params arm_v6m_tune =
2298 &generic_extra_costs, /* Insn extra costs. */
2299 &generic_addr_mode_costs, /* Addressing mode costs. */
2300 NULL, /* Sched adj cost. */
2301 arm_default_branch_cost,
2302 &arm_default_vec_cost, /* Vectorizer costs. */
2303 1, /* Constant limit. */
2304 5, /* Max cond insns. */
2305 8, /* Memset max inline. */
2306 1, /* Issue rate. */
2307 ARM_PREFETCH_NOT_BENEFICIAL,
2308 tune_params::PREF_CONST_POOL_FALSE,
2309 tune_params::PREF_LDRD_FALSE,
2310 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2312 tune_params::DISPARAGE_FLAGS_NEITHER,
2313 tune_params::PREF_NEON_STRINGOPS_FALSE,
2314 tune_params::FUSE_NOTHING,
2315 tune_params::SCHED_AUTOPREF_OFF
2318 const struct tune_params arm_fa726te_tune =
2320 &generic_extra_costs, /* Insn extra costs. */
2321 &generic_addr_mode_costs, /* Addressing mode costs. */
2322 fa726te_sched_adjust_cost,
2323 arm_default_branch_cost,
2324 &arm_default_vec_cost,
2325 1, /* Constant limit. */
2326 5, /* Max cond insns. */
2327 8, /* Memset max inline. */
2328 2, /* Issue rate. */
2329 ARM_PREFETCH_NOT_BENEFICIAL,
2330 tune_params::PREF_CONST_POOL_TRUE,
2331 tune_params::PREF_LDRD_FALSE,
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2333 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2334 tune_params::DISPARAGE_FLAGS_NEITHER,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2340 /* Auto-generated CPU, FPU and architecture tables. */
2341 #include "arm-cpu-data.h"
2343 /* The name of the preprocessor macro to define for this architecture. PROFILE
2344 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2345 is thus chosen to be big enough to hold the longest architecture name. */
2347 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2349 /* Supported TLS relocations. */
2360 TLS_DESCSEQ /* GNU scheme */
2363 /* The maximum number of insns to be used when loading a constant. */
2365 arm_constant_limit (bool size_p)
2367 return size_p ? 1 : current_tune->constant_limit;
2370 /* Emit an insn that's a simple single-set. Both the operands must be known
2372 inline static rtx_insn *
2373 emit_set_insn (rtx x, rtx y)
2375 return emit_insn (gen_rtx_SET (x, y));
2378 /* Return the number of bits set in VALUE. */
2380 bit_count (unsigned long value)
2382 unsigned long count = 0;
2387 value &= value - 1; /* Clear the least-significant set bit. */
2393 /* Return the number of bits set in BMAP. */
2395 bitmap_popcount (const sbitmap bmap)
2397 unsigned int count = 0;
2399 sbitmap_iterator sbi;
2401 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2410 } arm_fixed_mode_set;
2412 /* A small helper for setting fixed-point library libfuncs. */
2415 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2416 const char *funcname, const char *modename,
2421 if (num_suffix == 0)
2422 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2424 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2426 set_optab_libfunc (optable, mode, buffer);
2430 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2431 machine_mode from, const char *funcname,
2432 const char *toname, const char *fromname)
2435 const char *maybe_suffix_2 = "";
2437 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2438 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2439 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2440 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2441 maybe_suffix_2 = "2";
2443 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2446 set_conv_libfunc (optable, to, from, buffer);
2449 static GTY(()) rtx speculation_barrier_libfunc;
2451 /* Set up library functions unique to ARM. */
2453 arm_init_libfuncs (void)
2455 /* For Linux, we have access to kernel support for atomic operations. */
2456 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2457 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2459 /* There are no special library functions unless we are using the
2464 /* The functions below are described in Section 4 of the "Run-Time
2465 ABI for the ARM architecture", Version 1.0. */
2467 /* Double-precision floating-point arithmetic. Table 2. */
2468 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2469 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2470 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2471 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2472 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2474 /* Double-precision comparisons. Table 3. */
2475 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2476 set_optab_libfunc (ne_optab, DFmode, NULL);
2477 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2478 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2479 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2480 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2481 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2483 /* Single-precision floating-point arithmetic. Table 4. */
2484 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2485 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2486 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2487 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2488 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2490 /* Single-precision comparisons. Table 5. */
2491 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2492 set_optab_libfunc (ne_optab, SFmode, NULL);
2493 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2494 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2495 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2496 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2497 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2499 /* Floating-point to integer conversions. Table 6. */
2500 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2501 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2502 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2503 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2504 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2505 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2506 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2507 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2509 /* Conversions between floating types. Table 7. */
2510 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2511 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2513 /* Integer to floating-point conversions. Table 8. */
2514 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2515 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2516 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2517 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2518 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2519 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2520 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2521 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2523 /* Long long. Table 9. */
2524 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2525 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2526 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2527 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2528 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2529 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2530 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2531 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2533 /* Integer (32/32->32) division. \S 4.3.1. */
2534 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2535 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2537 /* The divmod functions are designed so that they can be used for
2538 plain division, even though they return both the quotient and the
2539 remainder. The quotient is returned in the usual location (i.e.,
2540 r0 for SImode, {r0, r1} for DImode), just as would be expected
2541 for an ordinary division routine. Because the AAPCS calling
2542 conventions specify that all of { r0, r1, r2, r3 } are
2543 callee-saved registers, there is no need to tell the compiler
2544 explicitly that those registers are clobbered by these
2546 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2547 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2549 /* For SImode division the ABI provides div-without-mod routines,
2550 which are faster. */
2551 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2552 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2554 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2555 divmod libcalls instead. */
2556 set_optab_libfunc (smod_optab, DImode, NULL);
2557 set_optab_libfunc (umod_optab, DImode, NULL);
2558 set_optab_libfunc (smod_optab, SImode, NULL);
2559 set_optab_libfunc (umod_optab, SImode, NULL);
2561 /* Half-precision float operations. The compiler handles all operations
2562 with NULL libfuncs by converting the SFmode. */
2563 switch (arm_fp16_format)
2565 case ARM_FP16_FORMAT_IEEE:
2566 case ARM_FP16_FORMAT_ALTERNATIVE:
2569 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2570 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2572 : "__gnu_f2h_alternative"));
2573 set_conv_libfunc (sext_optab, SFmode, HFmode,
2574 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2576 : "__gnu_h2f_alternative"));
2578 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2579 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2581 : "__gnu_d2h_alternative"));
2584 set_optab_libfunc (add_optab, HFmode, NULL);
2585 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2586 set_optab_libfunc (smul_optab, HFmode, NULL);
2587 set_optab_libfunc (neg_optab, HFmode, NULL);
2588 set_optab_libfunc (sub_optab, HFmode, NULL);
2591 set_optab_libfunc (eq_optab, HFmode, NULL);
2592 set_optab_libfunc (ne_optab, HFmode, NULL);
2593 set_optab_libfunc (lt_optab, HFmode, NULL);
2594 set_optab_libfunc (le_optab, HFmode, NULL);
2595 set_optab_libfunc (ge_optab, HFmode, NULL);
2596 set_optab_libfunc (gt_optab, HFmode, NULL);
2597 set_optab_libfunc (unord_optab, HFmode, NULL);
2604 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2606 const arm_fixed_mode_set fixed_arith_modes[] =
2609 { E_UQQmode, "uqq" },
2611 { E_UHQmode, "uhq" },
2613 { E_USQmode, "usq" },
2615 { E_UDQmode, "udq" },
2617 { E_UTQmode, "utq" },
2619 { E_UHAmode, "uha" },
2621 { E_USAmode, "usa" },
2623 { E_UDAmode, "uda" },
2625 { E_UTAmode, "uta" }
2627 const arm_fixed_mode_set fixed_conv_modes[] =
2630 { E_UQQmode, "uqq" },
2632 { E_UHQmode, "uhq" },
2634 { E_USQmode, "usq" },
2636 { E_UDQmode, "udq" },
2638 { E_UTQmode, "utq" },
2640 { E_UHAmode, "uha" },
2642 { E_USAmode, "usa" },
2644 { E_UDAmode, "uda" },
2646 { E_UTAmode, "uta" },
2657 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2659 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2660 "add", fixed_arith_modes[i].name, 3);
2661 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2662 "ssadd", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2664 "usadd", fixed_arith_modes[i].name, 3);
2665 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2666 "sub", fixed_arith_modes[i].name, 3);
2667 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2668 "sssub", fixed_arith_modes[i].name, 3);
2669 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2670 "ussub", fixed_arith_modes[i].name, 3);
2671 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2672 "mul", fixed_arith_modes[i].name, 3);
2673 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2674 "ssmul", fixed_arith_modes[i].name, 3);
2675 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2676 "usmul", fixed_arith_modes[i].name, 3);
2677 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2678 "div", fixed_arith_modes[i].name, 3);
2679 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2680 "udiv", fixed_arith_modes[i].name, 3);
2681 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2682 "ssdiv", fixed_arith_modes[i].name, 3);
2683 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2684 "usdiv", fixed_arith_modes[i].name, 3);
2685 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2686 "neg", fixed_arith_modes[i].name, 2);
2687 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2688 "ssneg", fixed_arith_modes[i].name, 2);
2689 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2690 "usneg", fixed_arith_modes[i].name, 2);
2691 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2692 "ashl", fixed_arith_modes[i].name, 3);
2693 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2694 "ashr", fixed_arith_modes[i].name, 3);
2695 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2696 "lshr", fixed_arith_modes[i].name, 3);
2697 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2698 "ssashl", fixed_arith_modes[i].name, 3);
2699 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2700 "usashl", fixed_arith_modes[i].name, 3);
2701 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2702 "cmp", fixed_arith_modes[i].name, 2);
2705 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2706 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2709 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2710 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2713 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2714 fixed_conv_modes[j].mode, "fract",
2715 fixed_conv_modes[i].name,
2716 fixed_conv_modes[j].name);
2717 arm_set_fixed_conv_libfunc (satfract_optab,
2718 fixed_conv_modes[i].mode,
2719 fixed_conv_modes[j].mode, "satfract",
2720 fixed_conv_modes[i].name,
2721 fixed_conv_modes[j].name);
2722 arm_set_fixed_conv_libfunc (fractuns_optab,
2723 fixed_conv_modes[i].mode,
2724 fixed_conv_modes[j].mode, "fractuns",
2725 fixed_conv_modes[i].name,
2726 fixed_conv_modes[j].name);
2727 arm_set_fixed_conv_libfunc (satfractuns_optab,
2728 fixed_conv_modes[i].mode,
2729 fixed_conv_modes[j].mode, "satfractuns",
2730 fixed_conv_modes[i].name,
2731 fixed_conv_modes[j].name);
2735 if (TARGET_AAPCS_BASED)
2736 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2738 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2741 /* On AAPCS systems, this is the "struct __va_list". */
2742 static GTY(()) tree va_list_type;
2744 /* Return the type to use as __builtin_va_list. */
2746 arm_build_builtin_va_list (void)
2751 if (!TARGET_AAPCS_BASED)
2752 return std_build_builtin_va_list ();
2754 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2762 The C Library ABI further reinforces this definition in \S
2765 We must follow this definition exactly. The structure tag
2766 name is visible in C++ mangled names, and thus forms a part
2767 of the ABI. The field name may be used by people who
2768 #include <stdarg.h>. */
2769 /* Create the type. */
2770 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2771 /* Give it the required name. */
2772 va_list_name = build_decl (BUILTINS_LOCATION,
2774 get_identifier ("__va_list"),
2776 DECL_ARTIFICIAL (va_list_name) = 1;
2777 TYPE_NAME (va_list_type) = va_list_name;
2778 TYPE_STUB_DECL (va_list_type) = va_list_name;
2779 /* Create the __ap field. */
2780 ap_field = build_decl (BUILTINS_LOCATION,
2782 get_identifier ("__ap"),
2784 DECL_ARTIFICIAL (ap_field) = 1;
2785 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2786 TYPE_FIELDS (va_list_type) = ap_field;
2787 /* Compute its layout. */
2788 layout_type (va_list_type);
2790 return va_list_type;
2793 /* Return an expression of type "void *" pointing to the next
2794 available argument in a variable-argument list. VALIST is the
2795 user-level va_list object, of type __builtin_va_list. */
2797 arm_extract_valist_ptr (tree valist)
2799 if (TREE_TYPE (valist) == error_mark_node)
2800 return error_mark_node;
2802 /* On an AAPCS target, the pointer is stored within "struct
2804 if (TARGET_AAPCS_BASED)
2806 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2807 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2808 valist, ap_field, NULL_TREE);
2814 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2816 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2818 valist = arm_extract_valist_ptr (valist);
2819 std_expand_builtin_va_start (valist, nextarg);
2822 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2824 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2827 valist = arm_extract_valist_ptr (valist);
2828 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2831 /* Check any incompatible options that the user has specified. */
2833 arm_option_check_internal (struct gcc_options *opts)
2835 int flags = opts->x_target_flags;
2837 /* iWMMXt and NEON are incompatible. */
2839 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2840 error ("iWMMXt and NEON are incompatible");
2842 /* Make sure that the processor choice does not conflict with any of the
2843 other command line choices. */
2844 if (TARGET_ARM_P (flags)
2845 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2846 error ("target CPU does not support ARM mode");
2848 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2849 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2850 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2852 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2853 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2855 /* If this target is normally configured to use APCS frames, warn if they
2856 are turned off and debugging is turned on. */
2857 if (TARGET_ARM_P (flags)
2858 && write_symbols != NO_DEBUG
2859 && !TARGET_APCS_FRAME
2860 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2861 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2864 /* iWMMXt unsupported under Thumb mode. */
2865 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2866 error ("iWMMXt unsupported under Thumb mode");
2868 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2869 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2871 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2873 error ("RTP PIC is incompatible with Thumb");
2877 if (target_pure_code || target_slow_flash_data)
2879 const char *flag = (target_pure_code ? "-mpure-code" :
2880 "-mslow-flash-data");
2882 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2884 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2885 error ("%s only supports non-pic code on M-profile targets with the "
2886 "MOVT instruction", flag);
2888 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2889 -mword-relocations forbids relocation of MOVT/MOVW. */
2890 if (target_word_relocations)
2891 error ("%s incompatible with %<-mword-relocations%>", flag);
2895 /* Recompute the global settings depending on target attribute options. */
2898 arm_option_params_internal (void)
2900 /* If we are not using the default (ARM mode) section anchor offset
2901 ranges, then set the correct ranges now. */
2904 /* Thumb-1 LDR instructions cannot have negative offsets.
2905 Permissible positive offset ranges are 5-bit (for byte loads),
2906 6-bit (for halfword loads), or 7-bit (for word loads).
2907 Empirical results suggest a 7-bit anchor range gives the best
2908 overall code size. */
2909 targetm.min_anchor_offset = 0;
2910 targetm.max_anchor_offset = 127;
2912 else if (TARGET_THUMB2)
2914 /* The minimum is set such that the total size of the block
2915 for a particular anchor is 248 + 1 + 4095 bytes, which is
2916 divisible by eight, ensuring natural spacing of anchors. */
2917 targetm.min_anchor_offset = -248;
2918 targetm.max_anchor_offset = 4095;
2922 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2923 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2926 /* Increase the number of conditional instructions with -Os. */
2927 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2929 /* For THUMB2, we limit the conditional sequence to one IT block. */
2931 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2934 /* True if -mflip-thumb should next add an attribute for the default
2935 mode, false if it should next add an attribute for the opposite mode. */
2936 static GTY(()) bool thumb_flipper;
2938 /* Options after initial target override. */
2939 static GTY(()) tree init_optimize;
2942 arm_override_options_after_change_1 (struct gcc_options *opts)
2944 /* -falign-functions without argument: supply one. */
2945 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2946 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2947 && opts->x_optimize_size ? "2" : "4";
2950 /* Implement targetm.override_options_after_change. */
2953 arm_override_options_after_change (void)
2955 arm_configure_build_target (&arm_active_target,
2956 TREE_TARGET_OPTION (target_option_default_node),
2957 &global_options_set, false);
2959 arm_override_options_after_change_1 (&global_options);
2962 /* Implement TARGET_OPTION_SAVE. */
2964 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2966 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2967 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2968 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2971 /* Implement TARGET_OPTION_RESTORE. */
2973 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2975 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2976 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2977 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2978 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2982 /* Reset options between modes that the user has specified. */
2984 arm_option_override_internal (struct gcc_options *opts,
2985 struct gcc_options *opts_set)
2987 arm_override_options_after_change_1 (opts);
2989 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2991 /* The default is to enable interworking, so this warning message would
2992 be confusing to users who have just compiled with
2993 eg, -march=armv4. */
2994 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2995 opts->x_target_flags &= ~MASK_INTERWORK;
2998 if (TARGET_THUMB_P (opts->x_target_flags)
2999 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3001 warning (0, "target CPU does not support THUMB instructions");
3002 opts->x_target_flags &= ~MASK_THUMB;
3005 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3007 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3008 opts->x_target_flags &= ~MASK_APCS_FRAME;
3011 /* Callee super interworking implies thumb interworking. Adding
3012 this to the flags here simplifies the logic elsewhere. */
3013 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3014 opts->x_target_flags |= MASK_INTERWORK;
3016 /* need to remember initial values so combinaisons of options like
3017 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3018 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3020 if (! opts_set->x_arm_restrict_it)
3021 opts->x_arm_restrict_it = arm_arch8;
3023 /* ARM execution state and M profile don't have [restrict] IT. */
3024 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3025 opts->x_arm_restrict_it = 0;
3027 /* Enable -munaligned-access by default for
3028 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3029 i.e. Thumb2 and ARM state only.
3030 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3031 - ARMv8 architecture-base processors.
3033 Disable -munaligned-access by default for
3034 - all pre-ARMv6 architecture-based processors
3035 - ARMv6-M architecture-based processors
3036 - ARMv8-M Baseline processors. */
3038 if (! opts_set->x_unaligned_access)
3040 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3041 && arm_arch6 && (arm_arch_notm || arm_arch7));
3043 else if (opts->x_unaligned_access == 1
3044 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3046 warning (0, "target CPU does not support unaligned accesses");
3047 opts->x_unaligned_access = 0;
3050 /* Don't warn since it's on by default in -O2. */
3051 if (TARGET_THUMB1_P (opts->x_target_flags))
3052 opts->x_flag_schedule_insns = 0;
3054 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3056 /* Disable shrink-wrap when optimizing function for size, since it tends to
3057 generate additional returns. */
3058 if (optimize_function_for_size_p (cfun)
3059 && TARGET_THUMB2_P (opts->x_target_flags))
3060 opts->x_flag_shrink_wrap = false;
3062 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3064 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3065 - epilogue_insns - does not accurately model the corresponding insns
3066 emitted in the asm file. In particular, see the comment in thumb_exit
3067 'Find out how many of the (return) argument registers we can corrupt'.
3068 As a consequence, the epilogue may clobber registers without fipa-ra
3069 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3070 TODO: Accurately model clobbers for epilogue_insns and reenable
3072 if (TARGET_THUMB1_P (opts->x_target_flags))
3073 opts->x_flag_ipa_ra = 0;
3075 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3077 /* Thumb2 inline assembly code should always use unified syntax.
3078 This will apply to ARM and Thumb1 eventually. */
3079 if (TARGET_THUMB2_P (opts->x_target_flags))
3080 opts->x_inline_asm_unified = true;
3082 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3083 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3087 static sbitmap isa_all_fpubits;
3088 static sbitmap isa_quirkbits;
3090 /* Configure a build target TARGET from the user-specified options OPTS and
3091 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3092 architecture have been specified, but the two are not identical. */
3094 arm_configure_build_target (struct arm_build_target *target,
3095 struct cl_target_option *opts,
3096 struct gcc_options *opts_set,
3097 bool warn_compatible)
3099 const cpu_option *arm_selected_tune = NULL;
3100 const arch_option *arm_selected_arch = NULL;
3101 const cpu_option *arm_selected_cpu = NULL;
3102 const arm_fpu_desc *arm_selected_fpu = NULL;
3103 const char *tune_opts = NULL;
3104 const char *arch_opts = NULL;
3105 const char *cpu_opts = NULL;
3107 bitmap_clear (target->isa);
3108 target->core_name = NULL;
3109 target->arch_name = NULL;
3111 if (opts_set->x_arm_arch_string)
3113 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3115 opts->x_arm_arch_string);
3116 arch_opts = strchr (opts->x_arm_arch_string, '+');
3119 if (opts_set->x_arm_cpu_string)
3121 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3122 opts->x_arm_cpu_string);
3123 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3124 arm_selected_tune = arm_selected_cpu;
3125 /* If taking the tuning from -mcpu, we don't need to rescan the
3126 options for tuning. */
3129 if (opts_set->x_arm_tune_string)
3131 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3132 opts->x_arm_tune_string);
3133 tune_opts = strchr (opts->x_arm_tune_string, '+');
3136 if (arm_selected_arch)
3138 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3139 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3142 if (arm_selected_cpu)
3144 auto_sbitmap cpu_isa (isa_num_bits);
3145 auto_sbitmap isa_delta (isa_num_bits);
3147 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3148 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3150 bitmap_xor (isa_delta, cpu_isa, target->isa);
3151 /* Ignore any bits that are quirk bits. */
3152 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3153 /* Ignore (for now) any bits that might be set by -mfpu. */
3154 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3156 if (!bitmap_empty_p (isa_delta))
3158 if (warn_compatible)
3159 warning (0, "switch %<-mcpu=%s%> conflicts "
3160 "with %<-march=%s%> switch",
3161 arm_selected_cpu->common.name,
3162 arm_selected_arch->common.name);
3163 /* -march wins for code generation.
3164 -mcpu wins for default tuning. */
3165 if (!arm_selected_tune)
3166 arm_selected_tune = arm_selected_cpu;
3168 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3169 target->arch_name = arm_selected_arch->common.name;
3173 /* Architecture and CPU are essentially the same.
3174 Prefer the CPU setting. */
3175 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3176 target->core_name = arm_selected_cpu->common.name;
3177 /* Copy the CPU's capabilities, so that we inherit the
3178 appropriate extensions and quirks. */
3179 bitmap_copy (target->isa, cpu_isa);
3184 /* Pick a CPU based on the architecture. */
3185 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3186 target->arch_name = arm_selected_arch->common.name;
3187 /* Note: target->core_name is left unset in this path. */
3190 else if (arm_selected_cpu)
3192 target->core_name = arm_selected_cpu->common.name;
3193 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3194 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3196 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3198 /* If the user did not specify a processor or architecture, choose
3202 const cpu_option *sel;
3203 auto_sbitmap sought_isa (isa_num_bits);
3204 bitmap_clear (sought_isa);
3205 auto_sbitmap default_isa (isa_num_bits);
3207 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3208 TARGET_CPU_DEFAULT);
3209 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3210 gcc_assert (arm_selected_cpu->common.name);
3212 /* RWE: All of the selection logic below (to the end of this
3213 'if' clause) looks somewhat suspect. It appears to be mostly
3214 there to support forcing thumb support when the default CPU
3215 does not have thumb (somewhat dubious in terms of what the
3216 user might be expecting). I think it should be removed once
3217 support for the pre-thumb era cores is removed. */
3218 sel = arm_selected_cpu;
3219 arm_initialize_isa (default_isa, sel->common.isa_bits);
3220 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3223 /* Now check to see if the user has specified any command line
3224 switches that require certain abilities from the cpu. */
3226 if (TARGET_INTERWORK || TARGET_THUMB)
3227 bitmap_set_bit (sought_isa, isa_bit_thumb);
3229 /* If there are such requirements and the default CPU does not
3230 satisfy them, we need to run over the complete list of
3231 cores looking for one that is satisfactory. */
3232 if (!bitmap_empty_p (sought_isa)
3233 && !bitmap_subset_p (sought_isa, default_isa))
3235 auto_sbitmap candidate_isa (isa_num_bits);
3236 /* We're only interested in a CPU with at least the
3237 capabilities of the default CPU and the required
3238 additional features. */
3239 bitmap_ior (default_isa, default_isa, sought_isa);
3241 /* Try to locate a CPU type that supports all of the abilities
3242 of the default CPU, plus the extra abilities requested by
3244 for (sel = all_cores; sel->common.name != NULL; sel++)
3246 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3247 /* An exact match? */
3248 if (bitmap_equal_p (default_isa, candidate_isa))
3252 if (sel->common.name == NULL)
3254 unsigned current_bit_count = isa_num_bits;
3255 const cpu_option *best_fit = NULL;
3257 /* Ideally we would like to issue an error message here
3258 saying that it was not possible to find a CPU compatible
3259 with the default CPU, but which also supports the command
3260 line options specified by the programmer, and so they
3261 ought to use the -mcpu=<name> command line option to
3262 override the default CPU type.
3264 If we cannot find a CPU that has exactly the
3265 characteristics of the default CPU and the given
3266 command line options we scan the array again looking
3267 for a best match. The best match must have at least
3268 the capabilities of the perfect match. */
3269 for (sel = all_cores; sel->common.name != NULL; sel++)
3271 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3273 if (bitmap_subset_p (default_isa, candidate_isa))
3277 bitmap_and_compl (candidate_isa, candidate_isa,
3279 count = bitmap_popcount (candidate_isa);
3281 if (count < current_bit_count)
3284 current_bit_count = count;
3288 gcc_assert (best_fit);
3292 arm_selected_cpu = sel;
3295 /* Now we know the CPU, we can finally initialize the target
3297 target->core_name = arm_selected_cpu->common.name;
3298 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3299 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3301 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3304 gcc_assert (arm_selected_cpu);
3305 gcc_assert (arm_selected_arch);
3307 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3309 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3310 auto_sbitmap fpu_bits (isa_num_bits);
3312 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3313 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3314 bitmap_ior (target->isa, target->isa, fpu_bits);
3317 if (!arm_selected_tune)
3318 arm_selected_tune = arm_selected_cpu;
3319 else /* Validate the features passed to -mtune. */
3320 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3322 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3324 /* Finish initializing the target structure. */
3325 target->arch_pp_name = arm_selected_arch->arch;
3326 target->base_arch = arm_selected_arch->base_arch;
3327 target->profile = arm_selected_arch->profile;
3329 target->tune_flags = tune_data->tune_flags;
3330 target->tune = tune_data->tune;
3331 target->tune_core = tune_data->scheduler;
3332 arm_option_reconfigure_globals ();
3335 /* Fix up any incompatible options that the user has specified. */
3337 arm_option_override (void)
3339 static const enum isa_feature fpu_bitlist[]
3340 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3341 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3342 cl_target_option opts;
3344 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3345 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3347 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3348 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3350 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3352 if (!global_options_set.x_arm_fpu_index)
3357 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3360 arm_fpu_index = (enum fpu_type) fpu_index;
3363 cl_target_option_save (&opts, &global_options);
3364 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3367 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3368 SUBTARGET_OVERRIDE_OPTIONS;
3371 /* Initialize boolean versions of the architectural flags, for use
3372 in the arm.md file and for enabling feature flags. */
3373 arm_option_reconfigure_globals ();
3375 arm_tune = arm_active_target.tune_core;
3376 tune_flags = arm_active_target.tune_flags;
3377 current_tune = arm_active_target.tune;
3379 /* TBD: Dwarf info for apcs frame is not handled yet. */
3380 if (TARGET_APCS_FRAME)
3381 flag_shrink_wrap = false;
3383 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3385 warning (0, "%<-mapcs-stack-check%> incompatible with "
3386 "%<-mno-apcs-frame%>");
3387 target_flags |= MASK_APCS_FRAME;
3390 if (TARGET_POKE_FUNCTION_NAME)
3391 target_flags |= MASK_APCS_FRAME;
3393 if (TARGET_APCS_REENT && flag_pic)
3394 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3396 if (TARGET_APCS_REENT)
3397 warning (0, "APCS reentrant code not supported. Ignored");
3399 /* Set up some tuning parameters. */
3400 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3401 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3402 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3403 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3404 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3405 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3407 /* For arm2/3 there is no need to do any scheduling if we are doing
3408 software floating-point. */
3409 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3410 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3412 /* Override the default structure alignment for AAPCS ABI. */
3413 if (!global_options_set.x_arm_structure_size_boundary)
3415 if (TARGET_AAPCS_BASED)
3416 arm_structure_size_boundary = 8;
3420 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3422 if (arm_structure_size_boundary != 8
3423 && arm_structure_size_boundary != 32
3424 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3426 if (ARM_DOUBLEWORD_ALIGN)
3428 "structure size boundary can only be set to 8, 32 or 64");
3430 warning (0, "structure size boundary can only be set to 8 or 32");
3431 arm_structure_size_boundary
3432 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3436 if (TARGET_VXWORKS_RTP)
3438 if (!global_options_set.x_arm_pic_data_is_text_relative)
3439 arm_pic_data_is_text_relative = 0;
3442 && !arm_pic_data_is_text_relative
3443 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3444 /* When text & data segments don't have a fixed displacement, the
3445 intended use is with a single, read only, pic base register.
3446 Unless the user explicitly requested not to do that, set
3448 target_flags |= MASK_SINGLE_PIC_BASE;
3450 /* If stack checking is disabled, we can use r10 as the PIC register,
3451 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3452 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3454 if (TARGET_VXWORKS_RTP)
3455 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3456 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3459 if (flag_pic && TARGET_VXWORKS_RTP)
3460 arm_pic_register = 9;
3462 /* If in FDPIC mode then force arm_pic_register to be r9. */
3465 arm_pic_register = FDPIC_REGNUM;
3467 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3470 if (arm_pic_register_string != NULL)
3472 int pic_register = decode_reg_name (arm_pic_register_string);
3475 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3477 /* Prevent the user from choosing an obviously stupid PIC register. */
3478 else if (pic_register < 0 || call_used_regs[pic_register]
3479 || pic_register == HARD_FRAME_POINTER_REGNUM
3480 || pic_register == STACK_POINTER_REGNUM
3481 || pic_register >= PC_REGNUM
3482 || (TARGET_VXWORKS_RTP
3483 && (unsigned int) pic_register != arm_pic_register))
3484 error ("unable to use %qs for PIC register", arm_pic_register_string);
3486 arm_pic_register = pic_register;
3490 target_word_relocations = 1;
3492 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3493 if (fix_cm3_ldrd == 2)
3495 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3501 /* Hot/Cold partitioning is not currently supported, since we can't
3502 handle literal pool placement in that case. */
3503 if (flag_reorder_blocks_and_partition)
3505 inform (input_location,
3506 "%<-freorder-blocks-and-partition%> not supported "
3507 "on this architecture");
3508 flag_reorder_blocks_and_partition = 0;
3509 flag_reorder_blocks = 1;
3513 /* Hoisting PIC address calculations more aggressively provides a small,
3514 but measurable, size reduction for PIC code. Therefore, we decrease
3515 the bar for unrestricted expression hoisting to the cost of PIC address
3516 calculation, which is 2 instructions. */
3517 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3518 global_options.x_param_values,
3519 global_options_set.x_param_values);
3521 /* ARM EABI defaults to strict volatile bitfields. */
3522 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3523 && abi_version_at_least(2))
3524 flag_strict_volatile_bitfields = 1;
3526 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3527 have deemed it beneficial (signified by setting
3528 prefetch.num_slots to 1 or more). */
3529 if (flag_prefetch_loop_arrays < 0
3532 && current_tune->prefetch.num_slots > 0)
3533 flag_prefetch_loop_arrays = 1;
3535 /* Set up parameters to be used in prefetching algorithm. Do not
3536 override the defaults unless we are tuning for a core we have
3537 researched values for. */
3538 if (current_tune->prefetch.num_slots > 0)
3539 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3540 current_tune->prefetch.num_slots,
3541 global_options.x_param_values,
3542 global_options_set.x_param_values);
3543 if (current_tune->prefetch.l1_cache_line_size >= 0)
3544 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3545 current_tune->prefetch.l1_cache_line_size,
3546 global_options.x_param_values,
3547 global_options_set.x_param_values);
3548 if (current_tune->prefetch.l1_cache_size >= 0)
3549 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3550 current_tune->prefetch.l1_cache_size,
3551 global_options.x_param_values,
3552 global_options_set.x_param_values);
3554 /* Use the alternative scheduling-pressure algorithm by default. */
3555 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3556 global_options.x_param_values,
3557 global_options_set.x_param_values);
3559 /* Look through ready list and all of queue for instructions
3560 relevant for L2 auto-prefetcher. */
3561 int param_sched_autopref_queue_depth;
3563 switch (current_tune->sched_autopref)
3565 case tune_params::SCHED_AUTOPREF_OFF:
3566 param_sched_autopref_queue_depth = -1;
3569 case tune_params::SCHED_AUTOPREF_RANK:
3570 param_sched_autopref_queue_depth = 0;
3573 case tune_params::SCHED_AUTOPREF_FULL:
3574 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3581 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3582 param_sched_autopref_queue_depth,
3583 global_options.x_param_values,
3584 global_options_set.x_param_values);
3586 /* Currently, for slow flash data, we just disable literal pools. We also
3587 disable it for pure-code. */
3588 if (target_slow_flash_data || target_pure_code)
3589 arm_disable_literal_pool = true;
3591 /* Disable scheduling fusion by default if it's not armv7 processor
3592 or doesn't prefer ldrd/strd. */
3593 if (flag_schedule_fusion == 2
3594 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3595 flag_schedule_fusion = 0;
3597 /* Need to remember initial options before they are overriden. */
3598 init_optimize = build_optimization_node (&global_options);
3600 arm_options_perform_arch_sanity_checks ();
3601 arm_option_override_internal (&global_options, &global_options_set);
3602 arm_option_check_internal (&global_options);
3603 arm_option_params_internal ();
3605 /* Create the default target_options structure. */
3606 target_option_default_node = target_option_current_node
3607 = build_target_option_node (&global_options);
3609 /* Register global variables with the garbage collector. */
3610 arm_add_gc_roots ();
3612 /* Init initial mode for testing. */
3613 thumb_flipper = TARGET_THUMB;
3617 /* Reconfigure global status flags from the active_target.isa. */
3619 arm_option_reconfigure_globals (void)
3621 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3622 arm_base_arch = arm_active_target.base_arch;
3624 /* Initialize boolean versions of the architectural flags, for use
3625 in the arm.md file. */
3626 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3627 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3628 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3629 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3630 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3631 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3632 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3633 arm_arch6m = arm_arch6 && !arm_arch_notm;
3634 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3635 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3636 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3637 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3638 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3639 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3640 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3641 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3642 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3643 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3644 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3645 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3646 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3647 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3648 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3649 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3650 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3651 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3654 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3655 error ("selected fp16 options are incompatible");
3656 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3659 /* And finally, set up some quirks. */
3660 arm_arch_no_volatile_ce
3661 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3662 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3663 isa_bit_quirk_armv6kz);
3665 /* Use the cp15 method if it is available. */
3666 if (target_thread_pointer == TP_AUTO)
3668 if (arm_arch6k && !TARGET_THUMB1)
3669 target_thread_pointer = TP_CP15;
3671 target_thread_pointer = TP_SOFT;
3675 /* Perform some validation between the desired architecture and the rest of the
3678 arm_options_perform_arch_sanity_checks (void)
3680 /* V5T code we generate is completely interworking capable, so we turn off
3681 TARGET_INTERWORK here to avoid many tests later on. */
3683 /* XXX However, we must pass the right pre-processor defines to CPP
3684 or GLD can get confused. This is a hack. */
3685 if (TARGET_INTERWORK)
3686 arm_cpp_interwork = 1;
3689 target_flags &= ~MASK_INTERWORK;
3691 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3692 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3694 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3695 error ("iwmmxt abi requires an iwmmxt capable cpu");
3697 /* BPABI targets use linker tricks to allow interworking on cores
3698 without thumb support. */
3699 if (TARGET_INTERWORK
3701 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3703 warning (0, "target CPU does not support interworking" );
3704 target_flags &= ~MASK_INTERWORK;
3707 /* If soft-float is specified then don't use FPU. */
3708 if (TARGET_SOFT_FLOAT)
3709 arm_fpu_attr = FPU_NONE;
3711 arm_fpu_attr = FPU_VFP;
3713 if (TARGET_AAPCS_BASED)
3715 if (TARGET_CALLER_INTERWORKING)
3716 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3718 if (TARGET_CALLEE_INTERWORKING)
3719 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3722 /* __fp16 support currently assumes the core has ldrh. */
3723 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3724 sorry ("__fp16 and no ldrh");
3726 if (use_cmse && !arm_arch_cmse)
3727 error ("target CPU does not support ARMv8-M Security Extensions");
3729 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3730 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3731 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3732 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3735 if (TARGET_AAPCS_BASED)
3737 if (arm_abi == ARM_ABI_IWMMXT)
3738 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3739 else if (TARGET_HARD_FLOAT_ABI)
3741 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3742 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3743 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3746 arm_pcs_default = ARM_PCS_AAPCS;
3750 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3751 sorry ("%<-mfloat-abi=hard%> and VFP");
3753 if (arm_abi == ARM_ABI_APCS)
3754 arm_pcs_default = ARM_PCS_APCS;
3756 arm_pcs_default = ARM_PCS_ATPCS;
3760 /* Test whether a local function descriptor is canonical, i.e.,
3761 whether we can use GOTOFFFUNCDESC to compute the address of the
3764 arm_fdpic_local_funcdesc_p (rtx fnx)
3767 enum symbol_visibility vis;
3773 if (! SYMBOL_REF_LOCAL_P (fnx))
3776 fn = SYMBOL_REF_DECL (fnx);
3781 vis = DECL_VISIBILITY (fn);
3783 if (vis == VISIBILITY_PROTECTED)
3784 /* Private function descriptors for protected functions are not
3785 canonical. Temporarily change the visibility to global so that
3786 we can ensure uniqueness of funcdesc pointers. */
3787 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3789 ret = default_binds_local_p_1 (fn, flag_pic);
3791 DECL_VISIBILITY (fn) = vis;
3797 arm_add_gc_roots (void)
3799 gcc_obstack_init(&minipool_obstack);
3800 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3803 /* A table of known ARM exception types.
3804 For use with the interrupt function attribute. */
3808 const char *const arg;
3809 const unsigned long return_value;
3813 static const isr_attribute_arg isr_attribute_args [] =
3815 { "IRQ", ARM_FT_ISR },
3816 { "irq", ARM_FT_ISR },
3817 { "FIQ", ARM_FT_FIQ },
3818 { "fiq", ARM_FT_FIQ },
3819 { "ABORT", ARM_FT_ISR },
3820 { "abort", ARM_FT_ISR },
3821 { "ABORT", ARM_FT_ISR },
3822 { "abort", ARM_FT_ISR },
3823 { "UNDEF", ARM_FT_EXCEPTION },
3824 { "undef", ARM_FT_EXCEPTION },
3825 { "SWI", ARM_FT_EXCEPTION },
3826 { "swi", ARM_FT_EXCEPTION },
3827 { NULL, ARM_FT_NORMAL }
3830 /* Returns the (interrupt) function type of the current
3831 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3833 static unsigned long
3834 arm_isr_value (tree argument)
3836 const isr_attribute_arg * ptr;
3840 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3842 /* No argument - default to IRQ. */
3843 if (argument == NULL_TREE)
3846 /* Get the value of the argument. */
3847 if (TREE_VALUE (argument) == NULL_TREE
3848 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3849 return ARM_FT_UNKNOWN;
3851 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3853 /* Check it against the list of known arguments. */
3854 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3855 if (streq (arg, ptr->arg))
3856 return ptr->return_value;
3858 /* An unrecognized interrupt type. */
3859 return ARM_FT_UNKNOWN;
3862 /* Computes the type of the current function. */
3864 static unsigned long
3865 arm_compute_func_type (void)
3867 unsigned long type = ARM_FT_UNKNOWN;
3871 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3873 /* Decide if the current function is volatile. Such functions
3874 never return, and many memory cycles can be saved by not storing
3875 register values that will never be needed again. This optimization
3876 was added to speed up context switching in a kernel application. */
3878 && (TREE_NOTHROW (current_function_decl)
3879 || !(flag_unwind_tables
3881 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3882 && TREE_THIS_VOLATILE (current_function_decl))
3883 type |= ARM_FT_VOLATILE;
3885 if (cfun->static_chain_decl != NULL)
3886 type |= ARM_FT_NESTED;
3888 attr = DECL_ATTRIBUTES (current_function_decl);
3890 a = lookup_attribute ("naked", attr);
3892 type |= ARM_FT_NAKED;
3894 a = lookup_attribute ("isr", attr);
3896 a = lookup_attribute ("interrupt", attr);
3899 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3901 type |= arm_isr_value (TREE_VALUE (a));
3903 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3904 type |= ARM_FT_CMSE_ENTRY;
3909 /* Returns the type of the current function. */
3912 arm_current_func_type (void)
3914 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3915 cfun->machine->func_type = arm_compute_func_type ();
3917 return cfun->machine->func_type;
3921 arm_allocate_stack_slots_for_args (void)
3923 /* Naked functions should not allocate stack slots for arguments. */
3924 return !IS_NAKED (arm_current_func_type ());
3928 arm_warn_func_return (tree decl)
3930 /* Naked functions are implemented entirely in assembly, including the
3931 return sequence, so suppress warnings about this. */
3932 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3936 /* Output assembler code for a block containing the constant parts
3937 of a trampoline, leaving space for the variable parts.
3939 On the ARM, (if r8 is the static chain regnum, and remembering that
3940 referencing pc adds an offset of 8) the trampoline looks like:
3943 .word static chain value
3944 .word function's address
3945 XXX FIXME: When the trampoline returns, r8 will be clobbered.
3947 In FDPIC mode, the trampoline looks like:
3948 .word trampoline address
3949 .word trampoline GOT address
3950 ldr r12, [pc, #8] ; #4 for Arm mode
3951 ldr r9, [pc, #8] ; #4 for Arm mode
3952 ldr pc, [pc, #8] ; #4 for Arm mode
3953 .word static chain value
3955 .word function's address
3959 arm_asm_trampoline_template (FILE *f)
3961 fprintf (f, "\t.syntax unified\n");
3965 /* The first two words are a function descriptor pointing to the
3966 trampoline code just below. */
3968 fprintf (f, "\t.arm\n");
3969 else if (TARGET_THUMB2)
3970 fprintf (f, "\t.thumb\n");
3972 /* Only ARM and Thumb-2 are supported. */
3975 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3976 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3977 /* Trampoline code which sets the static chain register but also
3978 PIC register before jumping into real code. */
3979 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3980 STATIC_CHAIN_REGNUM, PC_REGNUM,
3981 TARGET_THUMB2 ? 8 : 4);
3982 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3983 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
3984 TARGET_THUMB2 ? 8 : 4);
3985 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
3986 PC_REGNUM, PC_REGNUM,
3987 TARGET_THUMB2 ? 8 : 4);
3988 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3990 else if (TARGET_ARM)
3992 fprintf (f, "\t.arm\n");
3993 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3994 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3996 else if (TARGET_THUMB2)
3998 fprintf (f, "\t.thumb\n");
3999 /* The Thumb-2 trampoline is similar to the arm implementation.
4000 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4001 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4002 STATIC_CHAIN_REGNUM, PC_REGNUM);
4003 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4007 ASM_OUTPUT_ALIGN (f, 2);
4008 fprintf (f, "\t.code\t16\n");
4009 fprintf (f, ".Ltrampoline_start:\n");
4010 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4011 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4012 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4013 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4014 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4015 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4017 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4018 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4021 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4024 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4026 rtx fnaddr, mem, a_tramp;
4028 emit_block_move (m_tramp, assemble_trampoline_template (),
4029 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4033 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4034 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4035 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4036 /* The function start address is at offset 8, but in Thumb mode
4037 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4039 rtx trampoline_code_start
4040 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4042 /* Write initial funcdesc which points to the trampoline. */
4043 mem = adjust_address (m_tramp, SImode, 0);
4044 emit_move_insn (mem, trampoline_code_start);
4045 mem = adjust_address (m_tramp, SImode, 4);
4046 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4047 /* Setup static chain. */
4048 mem = adjust_address (m_tramp, SImode, 20);
4049 emit_move_insn (mem, chain_value);
4050 /* GOT + real function entry point. */
4051 mem = adjust_address (m_tramp, SImode, 24);
4052 emit_move_insn (mem, gotaddr);
4053 mem = adjust_address (m_tramp, SImode, 28);
4054 emit_move_insn (mem, fnaddr);
4058 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4059 emit_move_insn (mem, chain_value);
4061 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4062 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4063 emit_move_insn (mem, fnaddr);
4066 a_tramp = XEXP (m_tramp, 0);
4067 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4068 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4069 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4072 /* Thumb trampolines should be entered in thumb mode, so set
4073 the bottom bit of the address. */
4076 arm_trampoline_adjust_address (rtx addr)
4078 /* For FDPIC don't fix trampoline address since it's a function
4079 descriptor and not a function address. */
4080 if (TARGET_THUMB && !TARGET_FDPIC)
4081 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4082 NULL, 0, OPTAB_LIB_WIDEN);
4086 /* Return 1 if it is possible to return using a single instruction.
4087 If SIBLING is non-null, this is a test for a return before a sibling
4088 call. SIBLING is the call insn, so we can examine its register usage. */
4091 use_return_insn (int iscond, rtx sibling)
4094 unsigned int func_type;
4095 unsigned long saved_int_regs;
4096 unsigned HOST_WIDE_INT stack_adjust;
4097 arm_stack_offsets *offsets;
4099 /* Never use a return instruction before reload has run. */
4100 if (!reload_completed)
4103 func_type = arm_current_func_type ();
4105 /* Naked, volatile and stack alignment functions need special
4107 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4110 /* So do interrupt functions that use the frame pointer and Thumb
4111 interrupt functions. */
4112 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4115 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4116 && !optimize_function_for_size_p (cfun))
4119 offsets = arm_get_frame_offsets ();
4120 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4122 /* As do variadic functions. */
4123 if (crtl->args.pretend_args_size
4124 || cfun->machine->uses_anonymous_args
4125 /* Or if the function calls __builtin_eh_return () */
4126 || crtl->calls_eh_return
4127 /* Or if the function calls alloca */
4128 || cfun->calls_alloca
4129 /* Or if there is a stack adjustment. However, if the stack pointer
4130 is saved on the stack, we can use a pre-incrementing stack load. */
4131 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4132 && stack_adjust == 4))
4133 /* Or if the static chain register was saved above the frame, under the
4134 assumption that the stack pointer isn't saved on the stack. */
4135 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4136 && arm_compute_static_chain_stack_bytes() != 0))
4139 saved_int_regs = offsets->saved_regs_mask;
4141 /* Unfortunately, the insn
4143 ldmib sp, {..., sp, ...}
4145 triggers a bug on most SA-110 based devices, such that the stack
4146 pointer won't be correctly restored if the instruction takes a
4147 page fault. We work around this problem by popping r3 along with
4148 the other registers, since that is never slower than executing
4149 another instruction.
4151 We test for !arm_arch5t here, because code for any architecture
4152 less than this could potentially be run on one of the buggy
4154 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4156 /* Validate that r3 is a call-clobbered register (always true in
4157 the default abi) ... */
4158 if (!call_used_regs[3])
4161 /* ... that it isn't being used for a return value ... */
4162 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4165 /* ... or for a tail-call argument ... */
4168 gcc_assert (CALL_P (sibling));
4170 if (find_regno_fusage (sibling, USE, 3))
4174 /* ... and that there are no call-saved registers in r0-r2
4175 (always true in the default ABI). */
4176 if (saved_int_regs & 0x7)
4180 /* Can't be done if interworking with Thumb, and any registers have been
4182 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4185 /* On StrongARM, conditional returns are expensive if they aren't
4186 taken and multiple registers have been stacked. */
4187 if (iscond && arm_tune_strongarm)
4189 /* Conditional return when just the LR is stored is a simple
4190 conditional-load instruction, that's not expensive. */
4191 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4195 && arm_pic_register != INVALID_REGNUM
4196 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4200 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4201 several instructions if anything needs to be popped. */
4202 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4205 /* If there are saved registers but the LR isn't saved, then we need
4206 two instructions for the return. */
4207 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4210 /* Can't be done if any of the VFP regs are pushed,
4211 since this also requires an insn. */
4212 if (TARGET_HARD_FLOAT)
4213 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4214 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4217 if (TARGET_REALLY_IWMMXT)
4218 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4219 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4225 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4226 shrink-wrapping if possible. This is the case if we need to emit a
4227 prologue, which we can test by looking at the offsets. */
4229 use_simple_return_p (void)
4231 arm_stack_offsets *offsets;
4233 /* Note this function can be called before or after reload. */
4234 if (!reload_completed)
4235 arm_compute_frame_layout ();
4237 offsets = arm_get_frame_offsets ();
4238 return offsets->outgoing_args != 0;
4241 /* Return TRUE if int I is a valid immediate ARM constant. */
4244 const_ok_for_arm (HOST_WIDE_INT i)
4248 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4249 be all zero, or all one. */
4250 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4251 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4252 != ((~(unsigned HOST_WIDE_INT) 0)
4253 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4256 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4258 /* Fast return for 0 and small values. We must do this for zero, since
4259 the code below can't handle that one case. */
4260 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4263 /* Get the number of trailing zeros. */
4264 lowbit = ffs((int) i) - 1;
4266 /* Only even shifts are allowed in ARM mode so round down to the
4267 nearest even number. */
4271 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4276 /* Allow rotated constants in ARM mode. */
4278 && ((i & ~0xc000003f) == 0
4279 || (i & ~0xf000000f) == 0
4280 || (i & ~0xfc000003) == 0))
4283 else if (TARGET_THUMB2)
4287 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4290 if (i == v || i == (v | (v << 8)))
4293 /* Allow repeated pattern 0xXY00XY00. */
4299 else if (TARGET_HAVE_MOVT)
4301 /* Thumb-1 Targets with MOVT. */
4311 /* Return true if I is a valid constant for the operation CODE. */
4313 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4315 if (const_ok_for_arm (i))
4321 /* See if we can use movw. */
4322 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4325 /* Otherwise, try mvn. */
4326 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4329 /* See if we can use addw or subw. */
4331 && ((i & 0xfffff000) == 0
4332 || ((-i) & 0xfffff000) == 0))
4353 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4355 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4361 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4365 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4372 /* Return true if I is a valid di mode constant for the operation CODE. */
4374 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4376 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4377 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4378 rtx hi = GEN_INT (hi_val);
4379 rtx lo = GEN_INT (lo_val);
4389 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4390 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4392 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4399 /* Emit a sequence of insns to handle a large constant.
4400 CODE is the code of the operation required, it can be any of SET, PLUS,
4401 IOR, AND, XOR, MINUS;
4402 MODE is the mode in which the operation is being performed;
4403 VAL is the integer to operate on;
4404 SOURCE is the other operand (a register, or a null-pointer for SET);
4405 SUBTARGETS means it is safe to create scratch registers if that will
4406 either produce a simpler sequence, or we will want to cse the values.
4407 Return value is the number of insns emitted. */
4409 /* ??? Tweak this for thumb2. */
4411 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4412 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4416 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4417 cond = COND_EXEC_TEST (PATTERN (insn));
4421 if (subtargets || code == SET
4422 || (REG_P (target) && REG_P (source)
4423 && REGNO (target) != REGNO (source)))
4425 /* After arm_reorg has been called, we can't fix up expensive
4426 constants by pushing them into memory so we must synthesize
4427 them in-line, regardless of the cost. This is only likely to
4428 be more costly on chips that have load delay slots and we are
4429 compiling without running the scheduler (so no splitting
4430 occurred before the final instruction emission).
4432 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4434 if (!cfun->machine->after_arm_reorg
4436 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4438 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4443 /* Currently SET is the only monadic value for CODE, all
4444 the rest are diadic. */
4445 if (TARGET_USE_MOVT)
4446 arm_emit_movpair (target, GEN_INT (val));
4448 emit_set_insn (target, GEN_INT (val));
4454 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4456 if (TARGET_USE_MOVT)
4457 arm_emit_movpair (temp, GEN_INT (val));
4459 emit_set_insn (temp, GEN_INT (val));
4461 /* For MINUS, the value is subtracted from, since we never
4462 have subtraction of a constant. */
4464 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4466 emit_set_insn (target,
4467 gen_rtx_fmt_ee (code, mode, source, temp));
4473 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4477 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4478 ARM/THUMB2 immediates, and add up to VAL.
4479 Thr function return value gives the number of insns required. */
4481 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4482 struct four_ints *return_sequence)
4484 int best_consecutive_zeros = 0;
4488 struct four_ints tmp_sequence;
4490 /* If we aren't targeting ARM, the best place to start is always at
4491 the bottom, otherwise look more closely. */
4494 for (i = 0; i < 32; i += 2)
4496 int consecutive_zeros = 0;
4498 if (!(val & (3 << i)))
4500 while ((i < 32) && !(val & (3 << i)))
4502 consecutive_zeros += 2;
4505 if (consecutive_zeros > best_consecutive_zeros)
4507 best_consecutive_zeros = consecutive_zeros;
4508 best_start = i - consecutive_zeros;
4515 /* So long as it won't require any more insns to do so, it's
4516 desirable to emit a small constant (in bits 0...9) in the last
4517 insn. This way there is more chance that it can be combined with
4518 a later addressing insn to form a pre-indexed load or store
4519 operation. Consider:
4521 *((volatile int *)0xe0000100) = 1;
4522 *((volatile int *)0xe0000110) = 2;
4524 We want this to wind up as:
4528 str rB, [rA, #0x100]
4530 str rB, [rA, #0x110]
4532 rather than having to synthesize both large constants from scratch.
4534 Therefore, we calculate how many insns would be required to emit
4535 the constant starting from `best_start', and also starting from
4536 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4537 yield a shorter sequence, we may as well use zero. */
4538 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4540 && ((HOST_WIDE_INT_1U << best_start) < val))
4542 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4543 if (insns2 <= insns1)
4545 *return_sequence = tmp_sequence;
4553 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4555 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4556 struct four_ints *return_sequence, int i)
4558 int remainder = val & 0xffffffff;
4561 /* Try and find a way of doing the job in either two or three
4564 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4565 location. We start at position I. This may be the MSB, or
4566 optimial_immediate_sequence may have positioned it at the largest block
4567 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4568 wrapping around to the top of the word when we drop off the bottom.
4569 In the worst case this code should produce no more than four insns.
4571 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4572 constants, shifted to any arbitrary location. We should always start
4577 unsigned int b1, b2, b3, b4;
4578 unsigned HOST_WIDE_INT result;
4581 gcc_assert (insns < 4);
4586 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4587 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4590 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4591 /* We can use addw/subw for the last 12 bits. */
4595 /* Use an 8-bit shifted/rotated immediate. */
4599 result = remainder & ((0x0ff << end)
4600 | ((i < end) ? (0xff >> (32 - end))
4607 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4608 arbitrary shifts. */
4609 i -= TARGET_ARM ? 2 : 1;
4613 /* Next, see if we can do a better job with a thumb2 replicated
4616 We do it this way around to catch the cases like 0x01F001E0 where
4617 two 8-bit immediates would work, but a replicated constant would
4620 TODO: 16-bit constants that don't clear all the bits, but still win.
4621 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4624 b1 = (remainder & 0xff000000) >> 24;
4625 b2 = (remainder & 0x00ff0000) >> 16;
4626 b3 = (remainder & 0x0000ff00) >> 8;
4627 b4 = remainder & 0xff;
4631 /* The 8-bit immediate already found clears b1 (and maybe b2),
4632 but must leave b3 and b4 alone. */
4634 /* First try to find a 32-bit replicated constant that clears
4635 almost everything. We can assume that we can't do it in one,
4636 or else we wouldn't be here. */
4637 unsigned int tmp = b1 & b2 & b3 & b4;
4638 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4640 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4641 + (tmp == b3) + (tmp == b4);
4643 && (matching_bytes >= 3
4644 || (matching_bytes == 2
4645 && const_ok_for_op (remainder & ~tmp2, code))))
4647 /* At least 3 of the bytes match, and the fourth has at
4648 least as many bits set, or two of the bytes match
4649 and it will only require one more insn to finish. */
4657 /* Second, try to find a 16-bit replicated constant that can
4658 leave three of the bytes clear. If b2 or b4 is already
4659 zero, then we can. If the 8-bit from above would not
4660 clear b2 anyway, then we still win. */
4661 else if (b1 == b3 && (!b2 || !b4
4662 || (remainder & 0x00ff0000 & ~result)))
4664 result = remainder & 0xff00ff00;
4670 /* The 8-bit immediate already found clears b2 (and maybe b3)
4671 and we don't get here unless b1 is alredy clear, but it will
4672 leave b4 unchanged. */
4674 /* If we can clear b2 and b4 at once, then we win, since the
4675 8-bits couldn't possibly reach that far. */
4678 result = remainder & 0x00ff00ff;
4684 return_sequence->i[insns++] = result;
4685 remainder &= ~result;
4687 if (code == SET || code == MINUS)
4695 /* Emit an instruction with the indicated PATTERN. If COND is
4696 non-NULL, conditionalize the execution of the instruction on COND
4700 emit_constant_insn (rtx cond, rtx pattern)
4703 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4704 emit_insn (pattern);
4707 /* As above, but extra parameter GENERATE which, if clear, suppresses
4711 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4712 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4713 int subtargets, int generate)
4717 int final_invert = 0;
4719 int set_sign_bit_copies = 0;
4720 int clear_sign_bit_copies = 0;
4721 int clear_zero_bit_copies = 0;
4722 int set_zero_bit_copies = 0;
4723 int insns = 0, neg_insns, inv_insns;
4724 unsigned HOST_WIDE_INT temp1, temp2;
4725 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4726 struct four_ints *immediates;
4727 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4729 /* Find out which operations are safe for a given CODE. Also do a quick
4730 check for degenerate cases; these can occur when DImode operations
4743 if (remainder == 0xffffffff)
4746 emit_constant_insn (cond,
4747 gen_rtx_SET (target,
4748 GEN_INT (ARM_SIGN_EXTEND (val))));
4754 if (reload_completed && rtx_equal_p (target, source))
4758 emit_constant_insn (cond, gen_rtx_SET (target, source));
4767 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4770 if (remainder == 0xffffffff)
4772 if (reload_completed && rtx_equal_p (target, source))
4775 emit_constant_insn (cond, gen_rtx_SET (target, source));
4784 if (reload_completed && rtx_equal_p (target, source))
4787 emit_constant_insn (cond, gen_rtx_SET (target, source));
4791 if (remainder == 0xffffffff)
4794 emit_constant_insn (cond,
4795 gen_rtx_SET (target,
4796 gen_rtx_NOT (mode, source)));
4803 /* We treat MINUS as (val - source), since (source - val) is always
4804 passed as (source + (-val)). */
4808 emit_constant_insn (cond,
4809 gen_rtx_SET (target,
4810 gen_rtx_NEG (mode, source)));
4813 if (const_ok_for_arm (val))
4816 emit_constant_insn (cond,
4817 gen_rtx_SET (target,
4818 gen_rtx_MINUS (mode, GEN_INT (val),
4829 /* If we can do it in one insn get out quickly. */
4830 if (const_ok_for_op (val, code))
4833 emit_constant_insn (cond,
4834 gen_rtx_SET (target,
4836 ? gen_rtx_fmt_ee (code, mode, source,
4842 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4844 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4845 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4849 if (mode == SImode && i == 16)
4850 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4852 emit_constant_insn (cond,
4853 gen_zero_extendhisi2
4854 (target, gen_lowpart (HImode, source)));
4856 /* Extz only supports SImode, but we can coerce the operands
4858 emit_constant_insn (cond,
4859 gen_extzv_t2 (gen_lowpart (SImode, target),
4860 gen_lowpart (SImode, source),
4861 GEN_INT (i), const0_rtx));
4867 /* Calculate a few attributes that may be useful for specific
4869 /* Count number of leading zeros. */
4870 for (i = 31; i >= 0; i--)
4872 if ((remainder & (1 << i)) == 0)
4873 clear_sign_bit_copies++;
4878 /* Count number of leading 1's. */
4879 for (i = 31; i >= 0; i--)
4881 if ((remainder & (1 << i)) != 0)
4882 set_sign_bit_copies++;
4887 /* Count number of trailing zero's. */
4888 for (i = 0; i <= 31; i++)
4890 if ((remainder & (1 << i)) == 0)
4891 clear_zero_bit_copies++;
4896 /* Count number of trailing 1's. */
4897 for (i = 0; i <= 31; i++)
4899 if ((remainder & (1 << i)) != 0)
4900 set_zero_bit_copies++;
4908 /* See if we can do this by sign_extending a constant that is known
4909 to be negative. This is a good, way of doing it, since the shift
4910 may well merge into a subsequent insn. */
4911 if (set_sign_bit_copies > 1)
4913 if (const_ok_for_arm
4914 (temp1 = ARM_SIGN_EXTEND (remainder
4915 << (set_sign_bit_copies - 1))))
4919 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4920 emit_constant_insn (cond,
4921 gen_rtx_SET (new_src, GEN_INT (temp1)));
4922 emit_constant_insn (cond,
4923 gen_ashrsi3 (target, new_src,
4924 GEN_INT (set_sign_bit_copies - 1)));
4928 /* For an inverted constant, we will need to set the low bits,
4929 these will be shifted out of harm's way. */
4930 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4931 if (const_ok_for_arm (~temp1))
4935 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4936 emit_constant_insn (cond,
4937 gen_rtx_SET (new_src, GEN_INT (temp1)));
4938 emit_constant_insn (cond,
4939 gen_ashrsi3 (target, new_src,
4940 GEN_INT (set_sign_bit_copies - 1)));
4946 /* See if we can calculate the value as the difference between two
4947 valid immediates. */
4948 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4950 int topshift = clear_sign_bit_copies & ~1;
4952 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4953 & (0xff000000 >> topshift));
4955 /* If temp1 is zero, then that means the 9 most significant
4956 bits of remainder were 1 and we've caused it to overflow.
4957 When topshift is 0 we don't need to do anything since we
4958 can borrow from 'bit 32'. */
4959 if (temp1 == 0 && topshift != 0)
4960 temp1 = 0x80000000 >> (topshift - 1);
4962 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4964 if (const_ok_for_arm (temp2))
4968 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4969 emit_constant_insn (cond,
4970 gen_rtx_SET (new_src, GEN_INT (temp1)));
4971 emit_constant_insn (cond,
4972 gen_addsi3 (target, new_src,
4980 /* See if we can generate this by setting the bottom (or the top)
4981 16 bits, and then shifting these into the other half of the
4982 word. We only look for the simplest cases, to do more would cost
4983 too much. Be careful, however, not to generate this when the
4984 alternative would take fewer insns. */
4985 if (val & 0xffff0000)
4987 temp1 = remainder & 0xffff0000;
4988 temp2 = remainder & 0x0000ffff;
4990 /* Overlaps outside this range are best done using other methods. */
4991 for (i = 9; i < 24; i++)
4993 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4994 && !const_ok_for_arm (temp2))
4996 rtx new_src = (subtargets
4997 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4999 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5000 source, subtargets, generate);
5008 gen_rtx_ASHIFT (mode, source,
5015 /* Don't duplicate cases already considered. */
5016 for (i = 17; i < 24; i++)
5018 if (((temp1 | (temp1 >> i)) == remainder)
5019 && !const_ok_for_arm (temp1))
5021 rtx new_src = (subtargets
5022 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5024 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5025 source, subtargets, generate);
5030 gen_rtx_SET (target,
5033 gen_rtx_LSHIFTRT (mode, source,
5044 /* If we have IOR or XOR, and the constant can be loaded in a
5045 single instruction, and we can find a temporary to put it in,
5046 then this can be done in two instructions instead of 3-4. */
5048 /* TARGET can't be NULL if SUBTARGETS is 0 */
5049 || (reload_completed && !reg_mentioned_p (target, source)))
5051 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5055 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5057 emit_constant_insn (cond,
5058 gen_rtx_SET (sub, GEN_INT (val)));
5059 emit_constant_insn (cond,
5060 gen_rtx_SET (target,
5061 gen_rtx_fmt_ee (code, mode,
5072 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5073 and the remainder 0s for e.g. 0xfff00000)
5074 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5076 This can be done in 2 instructions by using shifts with mov or mvn.
5081 mvn r0, r0, lsr #12 */
5082 if (set_sign_bit_copies > 8
5083 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5087 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5088 rtx shift = GEN_INT (set_sign_bit_copies);
5094 gen_rtx_ASHIFT (mode,
5099 gen_rtx_SET (target,
5101 gen_rtx_LSHIFTRT (mode, sub,
5108 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5110 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5112 For eg. r0 = r0 | 0xfff
5117 if (set_zero_bit_copies > 8
5118 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5122 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5123 rtx shift = GEN_INT (set_zero_bit_copies);
5129 gen_rtx_LSHIFTRT (mode,
5134 gen_rtx_SET (target,
5136 gen_rtx_ASHIFT (mode, sub,
5142 /* This will never be reached for Thumb2 because orn is a valid
5143 instruction. This is for Thumb1 and the ARM 32 bit cases.
5145 x = y | constant (such that ~constant is a valid constant)
5147 x = ~(~y & ~constant).
5149 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5153 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5154 emit_constant_insn (cond,
5156 gen_rtx_NOT (mode, source)));
5159 sub = gen_reg_rtx (mode);
5160 emit_constant_insn (cond,
5162 gen_rtx_AND (mode, source,
5164 emit_constant_insn (cond,
5165 gen_rtx_SET (target,
5166 gen_rtx_NOT (mode, sub)));
5173 /* See if two shifts will do 2 or more insn's worth of work. */
5174 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5176 HOST_WIDE_INT shift_mask = ((0xffffffff
5177 << (32 - clear_sign_bit_copies))
5180 if ((remainder | shift_mask) != 0xffffffff)
5182 HOST_WIDE_INT new_val
5183 = ARM_SIGN_EXTEND (remainder | shift_mask);
5187 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5188 insns = arm_gen_constant (AND, SImode, cond, new_val,
5189 new_src, source, subtargets, 1);
5194 rtx targ = subtargets ? NULL_RTX : target;
5195 insns = arm_gen_constant (AND, mode, cond, new_val,
5196 targ, source, subtargets, 0);
5202 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5203 rtx shift = GEN_INT (clear_sign_bit_copies);
5205 emit_insn (gen_ashlsi3 (new_src, source, shift));
5206 emit_insn (gen_lshrsi3 (target, new_src, shift));
5212 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5214 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5216 if ((remainder | shift_mask) != 0xffffffff)
5218 HOST_WIDE_INT new_val
5219 = ARM_SIGN_EXTEND (remainder | shift_mask);
5222 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5224 insns = arm_gen_constant (AND, mode, cond, new_val,
5225 new_src, source, subtargets, 1);
5230 rtx targ = subtargets ? NULL_RTX : target;
5232 insns = arm_gen_constant (AND, mode, cond, new_val,
5233 targ, source, subtargets, 0);
5239 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5240 rtx shift = GEN_INT (clear_zero_bit_copies);
5242 emit_insn (gen_lshrsi3 (new_src, source, shift));
5243 emit_insn (gen_ashlsi3 (target, new_src, shift));
5255 /* Calculate what the instruction sequences would be if we generated it
5256 normally, negated, or inverted. */
5258 /* AND cannot be split into multiple insns, so invert and use BIC. */
5261 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5264 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5269 if (can_invert || final_invert)
5270 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5275 immediates = &pos_immediates;
5277 /* Is the negated immediate sequence more efficient? */
5278 if (neg_insns < insns && neg_insns <= inv_insns)
5281 immediates = &neg_immediates;
5286 /* Is the inverted immediate sequence more efficient?
5287 We must allow for an extra NOT instruction for XOR operations, although
5288 there is some chance that the final 'mvn' will get optimized later. */
5289 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5292 immediates = &inv_immediates;
5300 /* Now output the chosen sequence as instructions. */
5303 for (i = 0; i < insns; i++)
5305 rtx new_src, temp1_rtx;
5307 temp1 = immediates->i[i];
5309 if (code == SET || code == MINUS)
5310 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5311 else if ((final_invert || i < (insns - 1)) && subtargets)
5312 new_src = gen_reg_rtx (mode);
5318 else if (can_negate)
5321 temp1 = trunc_int_for_mode (temp1, mode);
5322 temp1_rtx = GEN_INT (temp1);
5326 else if (code == MINUS)
5327 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5329 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5331 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5336 can_negate = can_invert;
5340 else if (code == MINUS)
5348 emit_constant_insn (cond, gen_rtx_SET (target,
5349 gen_rtx_NOT (mode, source)));
5356 /* Canonicalize a comparison so that we are more likely to recognize it.
5357 This can be done for a few constant compares, where we can make the
5358 immediate value easier to load. */
5361 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5362 bool op0_preserve_value)
5365 unsigned HOST_WIDE_INT i, maxval;
5367 mode = GET_MODE (*op0);
5368 if (mode == VOIDmode)
5369 mode = GET_MODE (*op1);
5371 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5373 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5374 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5375 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5376 for GTU/LEU in Thumb mode. */
5380 if (*code == GT || *code == LE
5381 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5383 /* Missing comparison. First try to use an available
5385 if (CONST_INT_P (*op1))
5393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5395 *op1 = GEN_INT (i + 1);
5396 *code = *code == GT ? GE : LT;
5402 if (i != ~((unsigned HOST_WIDE_INT) 0)
5403 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5405 *op1 = GEN_INT (i + 1);
5406 *code = *code == GTU ? GEU : LTU;
5415 /* If that did not work, reverse the condition. */
5416 if (!op0_preserve_value)
5418 std::swap (*op0, *op1);
5419 *code = (int)swap_condition ((enum rtx_code)*code);
5425 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5426 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5427 to facilitate possible combining with a cmp into 'ands'. */
5429 && GET_CODE (*op0) == ZERO_EXTEND
5430 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5431 && GET_MODE (XEXP (*op0, 0)) == QImode
5432 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5433 && subreg_lowpart_p (XEXP (*op0, 0))
5434 && *op1 == const0_rtx)
5435 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5438 /* Comparisons smaller than DImode. Only adjust comparisons against
5439 an out-of-range constant. */
5440 if (!CONST_INT_P (*op1)
5441 || const_ok_for_arm (INTVAL (*op1))
5442 || const_ok_for_arm (- INTVAL (*op1)))
5456 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5458 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5459 *code = *code == GT ? GE : LT;
5467 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5469 *op1 = GEN_INT (i - 1);
5470 *code = *code == GE ? GT : LE;
5477 if (i != ~((unsigned HOST_WIDE_INT) 0)
5478 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5480 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5481 *code = *code == GTU ? GEU : LTU;
5489 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5491 *op1 = GEN_INT (i - 1);
5492 *code = *code == GEU ? GTU : LEU;
5503 /* Define how to find the value returned by a function. */
5506 arm_function_value(const_tree type, const_tree func,
5507 bool outgoing ATTRIBUTE_UNUSED)
5510 int unsignedp ATTRIBUTE_UNUSED;
5511 rtx r ATTRIBUTE_UNUSED;
5513 mode = TYPE_MODE (type);
5515 if (TARGET_AAPCS_BASED)
5516 return aapcs_allocate_return_reg (mode, type, func);
5518 /* Promote integer types. */
5519 if (INTEGRAL_TYPE_P (type))
5520 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5522 /* Promotes small structs returned in a register to full-word size
5523 for big-endian AAPCS. */
5524 if (arm_return_in_msb (type))
5526 HOST_WIDE_INT size = int_size_in_bytes (type);
5527 if (size % UNITS_PER_WORD != 0)
5529 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5530 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5534 return arm_libcall_value_1 (mode);
5537 /* libcall hashtable helpers. */
5539 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5541 static inline hashval_t hash (const rtx_def *);
5542 static inline bool equal (const rtx_def *, const rtx_def *);
5543 static inline void remove (rtx_def *);
5547 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5549 return rtx_equal_p (p1, p2);
5553 libcall_hasher::hash (const rtx_def *p1)
5555 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5558 typedef hash_table<libcall_hasher> libcall_table_type;
5561 add_libcall (libcall_table_type *htab, rtx libcall)
5563 *htab->find_slot (libcall, INSERT) = libcall;
5567 arm_libcall_uses_aapcs_base (const_rtx libcall)
5569 static bool init_done = false;
5570 static libcall_table_type *libcall_htab = NULL;
5576 libcall_htab = new libcall_table_type (31);
5577 add_libcall (libcall_htab,
5578 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5579 add_libcall (libcall_htab,
5580 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5581 add_libcall (libcall_htab,
5582 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5583 add_libcall (libcall_htab,
5584 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5586 add_libcall (libcall_htab,
5587 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5588 add_libcall (libcall_htab,
5589 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5590 add_libcall (libcall_htab,
5591 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5592 add_libcall (libcall_htab,
5593 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5595 add_libcall (libcall_htab,
5596 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5597 add_libcall (libcall_htab,
5598 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5599 add_libcall (libcall_htab,
5600 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5601 add_libcall (libcall_htab,
5602 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5603 add_libcall (libcall_htab,
5604 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5605 add_libcall (libcall_htab,
5606 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5607 add_libcall (libcall_htab,
5608 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5609 add_libcall (libcall_htab,
5610 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5612 /* Values from double-precision helper functions are returned in core
5613 registers if the selected core only supports single-precision
5614 arithmetic, even if we are using the hard-float ABI. The same is
5615 true for single-precision helpers, but we will never be using the
5616 hard-float ABI on a CPU which doesn't support single-precision
5617 operations in hardware. */
5618 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5619 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5620 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5621 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5622 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5623 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5624 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5625 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5626 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5627 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5628 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5629 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5631 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5633 add_libcall (libcall_htab,
5634 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5637 return libcall && libcall_htab->find (libcall) != NULL;
5641 arm_libcall_value_1 (machine_mode mode)
5643 if (TARGET_AAPCS_BASED)
5644 return aapcs_libcall_value (mode);
5645 else if (TARGET_IWMMXT_ABI
5646 && arm_vector_mode_supported_p (mode))
5647 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5649 return gen_rtx_REG (mode, ARG_REGISTER (1));
5652 /* Define how to find the value returned by a library function
5653 assuming the value has mode MODE. */
5656 arm_libcall_value (machine_mode mode, const_rtx libcall)
5658 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5659 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5661 /* The following libcalls return their result in integer registers,
5662 even though they return a floating point value. */
5663 if (arm_libcall_uses_aapcs_base (libcall))
5664 return gen_rtx_REG (mode, ARG_REGISTER(1));
5668 return arm_libcall_value_1 (mode);
5671 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5674 arm_function_value_regno_p (const unsigned int regno)
5676 if (regno == ARG_REGISTER (1)
5678 && TARGET_AAPCS_BASED
5679 && TARGET_HARD_FLOAT
5680 && regno == FIRST_VFP_REGNUM)
5681 || (TARGET_IWMMXT_ABI
5682 && regno == FIRST_IWMMXT_REGNUM))
5688 /* Determine the amount of memory needed to store the possible return
5689 registers of an untyped call. */
5691 arm_apply_result_size (void)
5697 if (TARGET_HARD_FLOAT_ABI)
5699 if (TARGET_IWMMXT_ABI)
5706 /* Decide whether TYPE should be returned in memory (true)
5707 or in a register (false). FNTYPE is the type of the function making
5710 arm_return_in_memory (const_tree type, const_tree fntype)
5714 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5716 if (TARGET_AAPCS_BASED)
5718 /* Simple, non-aggregate types (ie not including vectors and
5719 complex) are always returned in a register (or registers).
5720 We don't care about which register here, so we can short-cut
5721 some of the detail. */
5722 if (!AGGREGATE_TYPE_P (type)
5723 && TREE_CODE (type) != VECTOR_TYPE
5724 && TREE_CODE (type) != COMPLEX_TYPE)
5727 /* Any return value that is no larger than one word can be
5729 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5732 /* Check any available co-processors to see if they accept the
5733 type as a register candidate (VFP, for example, can return
5734 some aggregates in consecutive registers). These aren't
5735 available if the call is variadic. */
5736 if (aapcs_select_return_coproc (type, fntype) >= 0)
5739 /* Vector values should be returned using ARM registers, not
5740 memory (unless they're over 16 bytes, which will break since
5741 we only have four call-clobbered registers to play with). */
5742 if (TREE_CODE (type) == VECTOR_TYPE)
5743 return (size < 0 || size > (4 * UNITS_PER_WORD));
5745 /* The rest go in memory. */
5749 if (TREE_CODE (type) == VECTOR_TYPE)
5750 return (size < 0 || size > (4 * UNITS_PER_WORD));
5752 if (!AGGREGATE_TYPE_P (type) &&
5753 (TREE_CODE (type) != VECTOR_TYPE))
5754 /* All simple types are returned in registers. */
5757 if (arm_abi != ARM_ABI_APCS)
5759 /* ATPCS and later return aggregate types in memory only if they are
5760 larger than a word (or are variable size). */
5761 return (size < 0 || size > UNITS_PER_WORD);
5764 /* For the arm-wince targets we choose to be compatible with Microsoft's
5765 ARM and Thumb compilers, which always return aggregates in memory. */
5767 /* All structures/unions bigger than one word are returned in memory.
5768 Also catch the case where int_size_in_bytes returns -1. In this case
5769 the aggregate is either huge or of variable size, and in either case
5770 we will want to return it via memory and not in a register. */
5771 if (size < 0 || size > UNITS_PER_WORD)
5774 if (TREE_CODE (type) == RECORD_TYPE)
5778 /* For a struct the APCS says that we only return in a register
5779 if the type is 'integer like' and every addressable element
5780 has an offset of zero. For practical purposes this means
5781 that the structure can have at most one non bit-field element
5782 and that this element must be the first one in the structure. */
5784 /* Find the first field, ignoring non FIELD_DECL things which will
5785 have been created by C++. */
5786 for (field = TYPE_FIELDS (type);
5787 field && TREE_CODE (field) != FIELD_DECL;
5788 field = DECL_CHAIN (field))
5792 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5794 /* Check that the first field is valid for returning in a register. */
5796 /* ... Floats are not allowed */
5797 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5800 /* ... Aggregates that are not themselves valid for returning in
5801 a register are not allowed. */
5802 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5805 /* Now check the remaining fields, if any. Only bitfields are allowed,
5806 since they are not addressable. */
5807 for (field = DECL_CHAIN (field);
5809 field = DECL_CHAIN (field))
5811 if (TREE_CODE (field) != FIELD_DECL)
5814 if (!DECL_BIT_FIELD_TYPE (field))
5821 if (TREE_CODE (type) == UNION_TYPE)
5825 /* Unions can be returned in registers if every element is
5826 integral, or can be returned in an integer register. */
5827 for (field = TYPE_FIELDS (type);
5829 field = DECL_CHAIN (field))
5831 if (TREE_CODE (field) != FIELD_DECL)
5834 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5837 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5843 #endif /* not ARM_WINCE */
5845 /* Return all other types in memory. */
5849 const struct pcs_attribute_arg
5853 } pcs_attribute_args[] =
5855 {"aapcs", ARM_PCS_AAPCS},
5856 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5858 /* We could recognize these, but changes would be needed elsewhere
5859 * to implement them. */
5860 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5861 {"atpcs", ARM_PCS_ATPCS},
5862 {"apcs", ARM_PCS_APCS},
5864 {NULL, ARM_PCS_UNKNOWN}
5868 arm_pcs_from_attribute (tree attr)
5870 const struct pcs_attribute_arg *ptr;
5873 /* Get the value of the argument. */
5874 if (TREE_VALUE (attr) == NULL_TREE
5875 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5876 return ARM_PCS_UNKNOWN;
5878 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5880 /* Check it against the list of known arguments. */
5881 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5882 if (streq (arg, ptr->arg))
5885 /* An unrecognized interrupt type. */
5886 return ARM_PCS_UNKNOWN;
5889 /* Get the PCS variant to use for this call. TYPE is the function's type
5890 specification, DECL is the specific declartion. DECL may be null if
5891 the call could be indirect or if this is a library call. */
5893 arm_get_pcs_model (const_tree type, const_tree decl)
5895 bool user_convention = false;
5896 enum arm_pcs user_pcs = arm_pcs_default;
5901 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5904 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5905 user_convention = true;
5908 if (TARGET_AAPCS_BASED)
5910 /* Detect varargs functions. These always use the base rules
5911 (no argument is ever a candidate for a co-processor
5913 bool base_rules = stdarg_p (type);
5915 if (user_convention)
5917 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5918 sorry ("non-AAPCS derived PCS variant");
5919 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5920 error ("variadic functions must use the base AAPCS variant");
5924 return ARM_PCS_AAPCS;
5925 else if (user_convention)
5927 else if (decl && flag_unit_at_a_time)
5929 /* Local functions never leak outside this compilation unit,
5930 so we are free to use whatever conventions are
5932 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5933 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5935 return ARM_PCS_AAPCS_LOCAL;
5938 else if (user_convention && user_pcs != arm_pcs_default)
5939 sorry ("PCS variant");
5941 /* For everything else we use the target's default. */
5942 return arm_pcs_default;
5947 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5948 const_tree fntype ATTRIBUTE_UNUSED,
5949 rtx libcall ATTRIBUTE_UNUSED,
5950 const_tree fndecl ATTRIBUTE_UNUSED)
5952 /* Record the unallocated VFP registers. */
5953 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5954 pcum->aapcs_vfp_reg_alloc = 0;
5957 /* Walk down the type tree of TYPE counting consecutive base elements.
5958 If *MODEP is VOIDmode, then set it to the first valid floating point
5959 type. If a non-floating point type is found, or if a floating point
5960 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5961 otherwise return the count in the sub-tree. */
5963 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5968 switch (TREE_CODE (type))
5971 mode = TYPE_MODE (type);
5972 if (mode != DFmode && mode != SFmode && mode != HFmode)
5975 if (*modep == VOIDmode)
5984 mode = TYPE_MODE (TREE_TYPE (type));
5985 if (mode != DFmode && mode != SFmode)
5988 if (*modep == VOIDmode)
5997 /* Use V2SImode and V4SImode as representatives of all 64-bit
5998 and 128-bit vector types, whether or not those modes are
5999 supported with the present options. */
6000 size = int_size_in_bytes (type);
6013 if (*modep == VOIDmode)
6016 /* Vector modes are considered to be opaque: two vectors are
6017 equivalent for the purposes of being homogeneous aggregates
6018 if they are the same size. */
6027 tree index = TYPE_DOMAIN (type);
6029 /* Can't handle incomplete types nor sizes that are not
6031 if (!COMPLETE_TYPE_P (type)
6032 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6035 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6038 || !TYPE_MAX_VALUE (index)
6039 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6040 || !TYPE_MIN_VALUE (index)
6041 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6045 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6046 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6048 /* There must be no padding. */
6049 if (wi::to_wide (TYPE_SIZE (type))
6050 != count * GET_MODE_BITSIZE (*modep))
6062 /* Can't handle incomplete types nor sizes that are not
6064 if (!COMPLETE_TYPE_P (type)
6065 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6068 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6070 if (TREE_CODE (field) != FIELD_DECL)
6073 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6079 /* There must be no padding. */
6080 if (wi::to_wide (TYPE_SIZE (type))
6081 != count * GET_MODE_BITSIZE (*modep))
6088 case QUAL_UNION_TYPE:
6090 /* These aren't very interesting except in a degenerate case. */
6095 /* Can't handle incomplete types nor sizes that are not
6097 if (!COMPLETE_TYPE_P (type)
6098 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6101 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6103 if (TREE_CODE (field) != FIELD_DECL)
6106 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6109 count = count > sub_count ? count : sub_count;
6112 /* There must be no padding. */
6113 if (wi::to_wide (TYPE_SIZE (type))
6114 != count * GET_MODE_BITSIZE (*modep))
6127 /* Return true if PCS_VARIANT should use VFP registers. */
6129 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6131 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6133 static bool seen_thumb1_vfp = false;
6135 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6137 sorry ("Thumb-1 hard-float VFP ABI");
6138 /* sorry() is not immediately fatal, so only display this once. */
6139 seen_thumb1_vfp = true;
6145 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6148 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6149 (TARGET_VFP_DOUBLE || !is_double));
6152 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6153 suitable for passing or returning in VFP registers for the PCS
6154 variant selected. If it is, then *BASE_MODE is updated to contain
6155 a machine mode describing each element of the argument's type and
6156 *COUNT to hold the number of such elements. */
6158 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6159 machine_mode mode, const_tree type,
6160 machine_mode *base_mode, int *count)
6162 machine_mode new_mode = VOIDmode;
6164 /* If we have the type information, prefer that to working things
6165 out from the mode. */
6168 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6170 if (ag_count > 0 && ag_count <= 4)
6175 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6176 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6177 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6182 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6185 new_mode = (mode == DCmode ? DFmode : SFmode);
6191 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6194 *base_mode = new_mode;
6196 if (TARGET_GENERAL_REGS_ONLY)
6197 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6204 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6205 machine_mode mode, const_tree type)
6207 int count ATTRIBUTE_UNUSED;
6208 machine_mode ag_mode ATTRIBUTE_UNUSED;
6210 if (!use_vfp_abi (pcs_variant, false))
6212 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6217 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6220 if (!use_vfp_abi (pcum->pcs_variant, false))
6223 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6224 &pcum->aapcs_vfp_rmode,
6225 &pcum->aapcs_vfp_rcount);
6228 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6229 for the behaviour of this function. */
6232 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6233 const_tree type ATTRIBUTE_UNUSED)
6236 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6237 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6238 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6241 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6242 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6244 pcum->aapcs_vfp_reg_alloc = mask << regno;
6246 || (mode == TImode && ! TARGET_NEON)
6247 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6250 int rcount = pcum->aapcs_vfp_rcount;
6252 machine_mode rmode = pcum->aapcs_vfp_rmode;
6256 /* Avoid using unsupported vector modes. */
6257 if (rmode == V2SImode)
6259 else if (rmode == V4SImode)
6266 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6267 for (i = 0; i < rcount; i++)
6269 rtx tmp = gen_rtx_REG (rmode,
6270 FIRST_VFP_REGNUM + regno + i * rshift);
6271 tmp = gen_rtx_EXPR_LIST
6273 GEN_INT (i * GET_MODE_SIZE (rmode)));
6274 XVECEXP (par, 0, i) = tmp;
6277 pcum->aapcs_reg = par;
6280 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6286 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6287 comment there for the behaviour of this function. */
6290 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6292 const_tree type ATTRIBUTE_UNUSED)
6294 if (!use_vfp_abi (pcs_variant, false))
6298 || (GET_MODE_CLASS (mode) == MODE_INT
6299 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6303 machine_mode ag_mode;
6308 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6313 if (ag_mode == V2SImode)
6315 else if (ag_mode == V4SImode)
6321 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6322 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6323 for (i = 0; i < count; i++)
6325 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6326 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6327 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6328 XVECEXP (par, 0, i) = tmp;
6334 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6338 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6339 machine_mode mode ATTRIBUTE_UNUSED,
6340 const_tree type ATTRIBUTE_UNUSED)
6342 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6343 pcum->aapcs_vfp_reg_alloc = 0;
6347 #define AAPCS_CP(X) \
6349 aapcs_ ## X ## _cum_init, \
6350 aapcs_ ## X ## _is_call_candidate, \
6351 aapcs_ ## X ## _allocate, \
6352 aapcs_ ## X ## _is_return_candidate, \
6353 aapcs_ ## X ## _allocate_return_reg, \
6354 aapcs_ ## X ## _advance \
6357 /* Table of co-processors that can be used to pass arguments in
6358 registers. Idealy no arugment should be a candidate for more than
6359 one co-processor table entry, but the table is processed in order
6360 and stops after the first match. If that entry then fails to put
6361 the argument into a co-processor register, the argument will go on
6365 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6366 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6368 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6369 BLKmode) is a candidate for this co-processor's registers; this
6370 function should ignore any position-dependent state in
6371 CUMULATIVE_ARGS and only use call-type dependent information. */
6372 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6374 /* Return true if the argument does get a co-processor register; it
6375 should set aapcs_reg to an RTX of the register allocated as is
6376 required for a return from FUNCTION_ARG. */
6377 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6379 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6380 be returned in this co-processor's registers. */
6381 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6383 /* Allocate and return an RTX element to hold the return type of a call. This
6384 routine must not fail and will only be called if is_return_candidate
6385 returned true with the same parameters. */
6386 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6388 /* Finish processing this argument and prepare to start processing
6390 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6391 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6399 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6404 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6405 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6412 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6414 /* We aren't passed a decl, so we can't check that a call is local.
6415 However, it isn't clear that that would be a win anyway, since it
6416 might limit some tail-calling opportunities. */
6417 enum arm_pcs pcs_variant;
6421 const_tree fndecl = NULL_TREE;
6423 if (TREE_CODE (fntype) == FUNCTION_DECL)
6426 fntype = TREE_TYPE (fntype);
6429 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6432 pcs_variant = arm_pcs_default;
6434 if (pcs_variant != ARM_PCS_AAPCS)
6438 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6439 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6448 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6451 /* We aren't passed a decl, so we can't check that a call is local.
6452 However, it isn't clear that that would be a win anyway, since it
6453 might limit some tail-calling opportunities. */
6454 enum arm_pcs pcs_variant;
6455 int unsignedp ATTRIBUTE_UNUSED;
6459 const_tree fndecl = NULL_TREE;
6461 if (TREE_CODE (fntype) == FUNCTION_DECL)
6464 fntype = TREE_TYPE (fntype);
6467 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6470 pcs_variant = arm_pcs_default;
6472 /* Promote integer types. */
6473 if (type && INTEGRAL_TYPE_P (type))
6474 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6476 if (pcs_variant != ARM_PCS_AAPCS)
6480 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6481 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6483 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6487 /* Promotes small structs returned in a register to full-word size
6488 for big-endian AAPCS. */
6489 if (type && arm_return_in_msb (type))
6491 HOST_WIDE_INT size = int_size_in_bytes (type);
6492 if (size % UNITS_PER_WORD != 0)
6494 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6495 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6499 return gen_rtx_REG (mode, R0_REGNUM);
6503 aapcs_libcall_value (machine_mode mode)
6505 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6506 && GET_MODE_SIZE (mode) <= 4)
6509 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6512 /* Lay out a function argument using the AAPCS rules. The rule
6513 numbers referred to here are those in the AAPCS. */
6515 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6516 const_tree type, bool named)
6521 /* We only need to do this once per argument. */
6522 if (pcum->aapcs_arg_processed)
6525 pcum->aapcs_arg_processed = true;
6527 /* Special case: if named is false then we are handling an incoming
6528 anonymous argument which is on the stack. */
6532 /* Is this a potential co-processor register candidate? */
6533 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6535 int slot = aapcs_select_call_coproc (pcum, mode, type);
6536 pcum->aapcs_cprc_slot = slot;
6538 /* We don't have to apply any of the rules from part B of the
6539 preparation phase, these are handled elsewhere in the
6544 /* A Co-processor register candidate goes either in its own
6545 class of registers or on the stack. */
6546 if (!pcum->aapcs_cprc_failed[slot])
6548 /* C1.cp - Try to allocate the argument to co-processor
6550 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6553 /* C2.cp - Put the argument on the stack and note that we
6554 can't assign any more candidates in this slot. We also
6555 need to note that we have allocated stack space, so that
6556 we won't later try to split a non-cprc candidate between
6557 core registers and the stack. */
6558 pcum->aapcs_cprc_failed[slot] = true;
6559 pcum->can_split = false;
6562 /* We didn't get a register, so this argument goes on the
6564 gcc_assert (pcum->can_split == false);
6569 /* C3 - For double-word aligned arguments, round the NCRN up to the
6570 next even number. */
6571 ncrn = pcum->aapcs_ncrn;
6574 int res = arm_needs_doubleword_align (mode, type);
6575 /* Only warn during RTL expansion of call stmts, otherwise we would
6576 warn e.g. during gimplification even on functions that will be
6577 always inlined, and we'd warn multiple times. Don't warn when
6578 called in expand_function_start either, as we warn instead in
6579 arm_function_arg_boundary in that case. */
6580 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6581 inform (input_location, "parameter passing for argument of type "
6582 "%qT changed in GCC 7.1", type);
6587 nregs = ARM_NUM_REGS2(mode, type);
6589 /* Sigh, this test should really assert that nregs > 0, but a GCC
6590 extension allows empty structs and then gives them empty size; it
6591 then allows such a structure to be passed by value. For some of
6592 the code below we have to pretend that such an argument has
6593 non-zero size so that we 'locate' it correctly either in
6594 registers or on the stack. */
6595 gcc_assert (nregs >= 0);
6597 nregs2 = nregs ? nregs : 1;
6599 /* C4 - Argument fits entirely in core registers. */
6600 if (ncrn + nregs2 <= NUM_ARG_REGS)
6602 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6603 pcum->aapcs_next_ncrn = ncrn + nregs;
6607 /* C5 - Some core registers left and there are no arguments already
6608 on the stack: split this argument between the remaining core
6609 registers and the stack. */
6610 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6612 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6613 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6614 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6618 /* C6 - NCRN is set to 4. */
6619 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6621 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6625 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6626 for a call to a function whose data type is FNTYPE.
6627 For a library call, FNTYPE is NULL. */
6629 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6631 tree fndecl ATTRIBUTE_UNUSED)
6633 /* Long call handling. */
6635 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6637 pcum->pcs_variant = arm_pcs_default;
6639 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6641 if (arm_libcall_uses_aapcs_base (libname))
6642 pcum->pcs_variant = ARM_PCS_AAPCS;
6644 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6645 pcum->aapcs_reg = NULL_RTX;
6646 pcum->aapcs_partial = 0;
6647 pcum->aapcs_arg_processed = false;
6648 pcum->aapcs_cprc_slot = -1;
6649 pcum->can_split = true;
6651 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6655 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6657 pcum->aapcs_cprc_failed[i] = false;
6658 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6666 /* On the ARM, the offset starts at 0. */
6668 pcum->iwmmxt_nregs = 0;
6669 pcum->can_split = true;
6671 /* Varargs vectors are treated the same as long long.
6672 named_count avoids having to change the way arm handles 'named' */
6673 pcum->named_count = 0;
6676 if (TARGET_REALLY_IWMMXT && fntype)
6680 for (fn_arg = TYPE_ARG_TYPES (fntype);
6682 fn_arg = TREE_CHAIN (fn_arg))
6683 pcum->named_count += 1;
6685 if (! pcum->named_count)
6686 pcum->named_count = INT_MAX;
6690 /* Return 2 if double word alignment is required for argument passing,
6691 but wasn't required before the fix for PR88469.
6692 Return 1 if double word alignment is required for argument passing.
6693 Return -1 if double word alignment used to be required for argument
6694 passing before PR77728 ABI fix, but is not required anymore.
6695 Return 0 if double word alignment is not required and wasn't requried
6698 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6701 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6703 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6704 if (!AGGREGATE_TYPE_P (type))
6705 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6707 /* Array types: Use member alignment of element type. */
6708 if (TREE_CODE (type) == ARRAY_TYPE)
6709 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6713 /* Record/aggregate types: Use greatest member alignment of any member. */
6714 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6715 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6717 if (TREE_CODE (field) == FIELD_DECL)
6720 /* Before PR77728 fix, we were incorrectly considering also
6721 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6722 Make sure we can warn about that with -Wpsabi. */
6725 else if (TREE_CODE (field) == FIELD_DECL
6726 && DECL_BIT_FIELD_TYPE (field)
6727 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6737 /* Determine where to put an argument to a function.
6738 Value is zero to push the argument on the stack,
6739 or a hard register in which to store the argument.
6741 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6742 the preceding args and about the function being called.
6743 ARG is a description of the argument.
6745 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6746 other arguments are passed on the stack. If (NAMED == 0) (which happens
6747 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6748 defined), say it is passed in the stack (function_prologue will
6749 indeed make it pass in the stack if necessary). */
6752 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
6754 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6757 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6758 a call insn (op3 of a call_value insn). */
6759 if (arg.end_marker_p ())
6762 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6764 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6765 return pcum->aapcs_reg;
6768 /* Varargs vectors are treated the same as long long.
6769 named_count avoids having to change the way arm handles 'named' */
6770 if (TARGET_IWMMXT_ABI
6771 && arm_vector_mode_supported_p (arg.mode)
6772 && pcum->named_count > pcum->nargs + 1)
6774 if (pcum->iwmmxt_nregs <= 9)
6775 return gen_rtx_REG (arg.mode,
6776 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6779 pcum->can_split = false;
6784 /* Put doubleword aligned quantities in even register pairs. */
6785 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6787 int res = arm_needs_doubleword_align (arg.mode, arg.type);
6788 if (res < 0 && warn_psabi)
6789 inform (input_location, "parameter passing for argument of type "
6790 "%qT changed in GCC 7.1", arg.type);
6794 if (res > 1 && warn_psabi)
6795 inform (input_location, "parameter passing for argument of type "
6796 "%qT changed in GCC 9.1", arg.type);
6800 /* Only allow splitting an arg between regs and memory if all preceding
6801 args were allocated to regs. For args passed by reference we only count
6802 the reference pointer. */
6803 if (pcum->can_split)
6806 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
6808 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
6811 return gen_rtx_REG (arg.mode, pcum->nregs);
6815 arm_function_arg_boundary (machine_mode mode, const_tree type)
6817 if (!ARM_DOUBLEWORD_ALIGN)
6818 return PARM_BOUNDARY;
6820 int res = arm_needs_doubleword_align (mode, type);
6821 if (res < 0 && warn_psabi)
6822 inform (input_location, "parameter passing for argument of type %qT "
6823 "changed in GCC 7.1", type);
6824 if (res > 1 && warn_psabi)
6825 inform (input_location, "parameter passing for argument of type "
6826 "%qT changed in GCC 9.1", type);
6828 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6832 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6834 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6835 int nregs = pcum->nregs;
6837 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6839 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6840 return pcum->aapcs_partial;
6843 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6846 if (NUM_ARG_REGS > nregs
6847 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6849 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6854 /* Update the data in PCUM to advance over argument ARG. */
6857 arm_function_arg_advance (cumulative_args_t pcum_v,
6858 const function_arg_info &arg)
6860 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6862 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6864 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6866 if (pcum->aapcs_cprc_slot >= 0)
6868 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
6870 pcum->aapcs_cprc_slot = -1;
6873 /* Generic stuff. */
6874 pcum->aapcs_arg_processed = false;
6875 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6876 pcum->aapcs_reg = NULL_RTX;
6877 pcum->aapcs_partial = 0;
6882 if (arm_vector_mode_supported_p (arg.mode)
6883 && pcum->named_count > pcum->nargs
6884 && TARGET_IWMMXT_ABI)
6885 pcum->iwmmxt_nregs += 1;
6887 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
6891 /* Variable sized types are passed by reference. This is a GCC
6892 extension to the ARM ABI. */
6895 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6897 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6900 /* Encode the current state of the #pragma [no_]long_calls. */
6903 OFF, /* No #pragma [no_]long_calls is in effect. */
6904 LONG, /* #pragma long_calls is in effect. */
6905 SHORT /* #pragma no_long_calls is in effect. */
6908 static arm_pragma_enum arm_pragma_long_calls = OFF;
6911 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6913 arm_pragma_long_calls = LONG;
6917 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6919 arm_pragma_long_calls = SHORT;
6923 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6925 arm_pragma_long_calls = OFF;
6928 /* Handle an attribute requiring a FUNCTION_DECL;
6929 arguments as in struct attribute_spec.handler. */
6931 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6932 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6934 if (TREE_CODE (*node) != FUNCTION_DECL)
6936 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6938 *no_add_attrs = true;
6944 /* Handle an "interrupt" or "isr" attribute;
6945 arguments as in struct attribute_spec.handler. */
6947 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6952 if (TREE_CODE (*node) != FUNCTION_DECL)
6954 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6956 *no_add_attrs = true;
6958 /* FIXME: the argument if any is checked for type attributes;
6959 should it be checked for decl ones? */
6963 if (TREE_CODE (*node) == FUNCTION_TYPE
6964 || TREE_CODE (*node) == METHOD_TYPE)
6966 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6968 warning (OPT_Wattributes, "%qE attribute ignored",
6970 *no_add_attrs = true;
6973 else if (TREE_CODE (*node) == POINTER_TYPE
6974 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6975 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6976 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6978 *node = build_variant_type_copy (*node);
6979 TREE_TYPE (*node) = build_type_attribute_variant
6981 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6982 *no_add_attrs = true;
6986 /* Possibly pass this attribute on from the type to a decl. */
6987 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6988 | (int) ATTR_FLAG_FUNCTION_NEXT
6989 | (int) ATTR_FLAG_ARRAY_NEXT))
6991 *no_add_attrs = true;
6992 return tree_cons (name, args, NULL_TREE);
6996 warning (OPT_Wattributes, "%qE attribute ignored",
7005 /* Handle a "pcs" attribute; arguments as in struct
7006 attribute_spec.handler. */
7008 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7009 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7011 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7013 warning (OPT_Wattributes, "%qE attribute ignored", name);
7014 *no_add_attrs = true;
7019 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7020 /* Handle the "notshared" attribute. This attribute is another way of
7021 requesting hidden visibility. ARM's compiler supports
7022 "__declspec(notshared)"; we support the same thing via an
7026 arm_handle_notshared_attribute (tree *node,
7027 tree name ATTRIBUTE_UNUSED,
7028 tree args ATTRIBUTE_UNUSED,
7029 int flags ATTRIBUTE_UNUSED,
7032 tree decl = TYPE_NAME (*node);
7036 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7037 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7038 *no_add_attrs = false;
7044 /* This function returns true if a function with declaration FNDECL and type
7045 FNTYPE uses the stack to pass arguments or return variables and false
7046 otherwise. This is used for functions with the attributes
7047 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7048 diagnostic messages if the stack is used. NAME is the name of the attribute
7052 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7054 function_args_iterator args_iter;
7055 CUMULATIVE_ARGS args_so_far_v;
7056 cumulative_args_t args_so_far;
7057 bool first_param = true;
7058 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7060 /* Error out if any argument is passed on the stack. */
7061 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7062 args_so_far = pack_cumulative_args (&args_so_far_v);
7063 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7067 prev_arg_type = arg_type;
7068 if (VOID_TYPE_P (arg_type))
7071 function_arg_info arg (arg_type, /*named=*/true);
7073 /* ??? We should advance after processing the argument and pass
7074 the argument we're advancing past. */
7075 arm_function_arg_advance (args_so_far, arg);
7076 arg_rtx = arm_function_arg (args_so_far, arg);
7077 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7079 error ("%qE attribute not available to functions with arguments "
7080 "passed on the stack", name);
7083 first_param = false;
7086 /* Error out for variadic functions since we cannot control how many
7087 arguments will be passed and thus stack could be used. stdarg_p () is not
7088 used for the checking to avoid browsing arguments twice. */
7089 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7091 error ("%qE attribute not available to functions with variable number "
7092 "of arguments", name);
7096 /* Error out if return value is passed on the stack. */
7097 ret_type = TREE_TYPE (fntype);
7098 if (arm_return_in_memory (ret_type, fntype))
7100 error ("%qE attribute not available to functions that return value on "
7107 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7108 function will check whether the attribute is allowed here and will add the
7109 attribute to the function declaration tree or otherwise issue a warning. */
7112 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7121 *no_add_attrs = true;
7122 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7127 /* Ignore attribute for function types. */
7128 if (TREE_CODE (*node) != FUNCTION_DECL)
7130 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7132 *no_add_attrs = true;
7138 /* Warn for static linkage functions. */
7139 if (!TREE_PUBLIC (fndecl))
7141 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7142 "with static linkage", name);
7143 *no_add_attrs = true;
7147 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7148 TREE_TYPE (fndecl));
7153 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7154 function will check whether the attribute is allowed here and will add the
7155 attribute to the function type tree or otherwise issue a diagnostic. The
7156 reason we check this at declaration time is to only allow the use of the
7157 attribute with declarations of function pointers and not function
7158 declarations. This function checks NODE is of the expected type and issues
7159 diagnostics otherwise using NAME. If it is not of the expected type
7160 *NO_ADD_ATTRS will be set to true. */
7163 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7168 tree decl = NULL_TREE, fntype = NULL_TREE;
7173 *no_add_attrs = true;
7174 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7179 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7182 fntype = TREE_TYPE (decl);
7185 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7186 fntype = TREE_TYPE (fntype);
7188 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7190 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7191 "function pointer", name);
7192 *no_add_attrs = true;
7196 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7201 /* Prevent trees being shared among function types with and without
7202 cmse_nonsecure_call attribute. */
7203 type = TREE_TYPE (decl);
7205 type = build_distinct_type_copy (type);
7206 TREE_TYPE (decl) = type;
7209 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7212 fntype = TREE_TYPE (fntype);
7213 fntype = build_distinct_type_copy (fntype);
7214 TREE_TYPE (type) = fntype;
7217 /* Construct a type attribute and add it to the function type. */
7218 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7219 TYPE_ATTRIBUTES (fntype));
7220 TYPE_ATTRIBUTES (fntype) = attrs;
7224 /* Return 0 if the attributes for two types are incompatible, 1 if they
7225 are compatible, and 2 if they are nearly compatible (which causes a
7226 warning to be generated). */
7228 arm_comp_type_attributes (const_tree type1, const_tree type2)
7232 /* Check for mismatch of non-default calling convention. */
7233 if (TREE_CODE (type1) != FUNCTION_TYPE)
7236 /* Check for mismatched call attributes. */
7237 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7238 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7239 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7240 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7242 /* Only bother to check if an attribute is defined. */
7243 if (l1 | l2 | s1 | s2)
7245 /* If one type has an attribute, the other must have the same attribute. */
7246 if ((l1 != l2) || (s1 != s2))
7249 /* Disallow mixed attributes. */
7250 if ((l1 & s2) || (l2 & s1))
7254 /* Check for mismatched ISR attribute. */
7255 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7257 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7258 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7260 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7264 l1 = lookup_attribute ("cmse_nonsecure_call",
7265 TYPE_ATTRIBUTES (type1)) != NULL;
7266 l2 = lookup_attribute ("cmse_nonsecure_call",
7267 TYPE_ATTRIBUTES (type2)) != NULL;
7275 /* Assigns default attributes to newly defined type. This is used to
7276 set short_call/long_call attributes for function types of
7277 functions defined inside corresponding #pragma scopes. */
7279 arm_set_default_type_attributes (tree type)
7281 /* Add __attribute__ ((long_call)) to all functions, when
7282 inside #pragma long_calls or __attribute__ ((short_call)),
7283 when inside #pragma no_long_calls. */
7284 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7286 tree type_attr_list, attr_name;
7287 type_attr_list = TYPE_ATTRIBUTES (type);
7289 if (arm_pragma_long_calls == LONG)
7290 attr_name = get_identifier ("long_call");
7291 else if (arm_pragma_long_calls == SHORT)
7292 attr_name = get_identifier ("short_call");
7296 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7297 TYPE_ATTRIBUTES (type) = type_attr_list;
7301 /* Return true if DECL is known to be linked into section SECTION. */
7304 arm_function_in_section_p (tree decl, section *section)
7306 /* We can only be certain about the prevailing symbol definition. */
7307 if (!decl_binds_to_current_def_p (decl))
7310 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7311 if (!DECL_SECTION_NAME (decl))
7313 /* Make sure that we will not create a unique section for DECL. */
7314 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7318 return function_section (decl) == section;
7321 /* Return nonzero if a 32-bit "long_call" should be generated for
7322 a call from the current function to DECL. We generate a long_call
7325 a. has an __attribute__((long call))
7326 or b. is within the scope of a #pragma long_calls
7327 or c. the -mlong-calls command line switch has been specified
7329 However we do not generate a long call if the function:
7331 d. has an __attribute__ ((short_call))
7332 or e. is inside the scope of a #pragma no_long_calls
7333 or f. is defined in the same section as the current function. */
7336 arm_is_long_call_p (tree decl)
7341 return TARGET_LONG_CALLS;
7343 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7344 if (lookup_attribute ("short_call", attrs))
7347 /* For "f", be conservative, and only cater for cases in which the
7348 whole of the current function is placed in the same section. */
7349 if (!flag_reorder_blocks_and_partition
7350 && TREE_CODE (decl) == FUNCTION_DECL
7351 && arm_function_in_section_p (decl, current_function_section ()))
7354 if (lookup_attribute ("long_call", attrs))
7357 return TARGET_LONG_CALLS;
7360 /* Return nonzero if it is ok to make a tail-call to DECL. */
7362 arm_function_ok_for_sibcall (tree decl, tree exp)
7364 unsigned long func_type;
7366 if (cfun->machine->sibcall_blocked)
7371 /* In FDPIC, never tailcall something for which we have no decl:
7372 the target function could be in a different module, requiring
7373 a different FDPIC register value. */
7378 /* Never tailcall something if we are generating code for Thumb-1. */
7382 /* The PIC register is live on entry to VxWorks PLT entries, so we
7383 must make the call before restoring the PIC register. */
7384 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7387 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7388 may be used both as target of the call and base register for restoring
7389 the VFP registers */
7390 if (TARGET_APCS_FRAME && TARGET_ARM
7391 && TARGET_HARD_FLOAT
7392 && decl && arm_is_long_call_p (decl))
7395 /* If we are interworking and the function is not declared static
7396 then we can't tail-call it unless we know that it exists in this
7397 compilation unit (since it might be a Thumb routine). */
7398 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7399 && !TREE_ASM_WRITTEN (decl))
7402 func_type = arm_current_func_type ();
7403 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7404 if (IS_INTERRUPT (func_type))
7407 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7408 generated for entry functions themselves. */
7409 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7412 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7413 this would complicate matters for later code generation. */
7414 if (TREE_CODE (exp) == CALL_EXPR)
7416 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7417 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7421 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7423 /* Check that the return value locations are the same. For
7424 example that we aren't returning a value from the sibling in
7425 a VFP register but then need to transfer it to a core
7428 tree decl_or_type = decl;
7430 /* If it is an indirect function pointer, get the function type. */
7432 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7434 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7435 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7437 if (!rtx_equal_p (a, b))
7441 /* Never tailcall if function may be called with a misaligned SP. */
7442 if (IS_STACKALIGN (func_type))
7445 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7446 references should become a NOP. Don't convert such calls into
7448 if (TARGET_AAPCS_BASED
7449 && arm_abi == ARM_ABI_AAPCS
7451 && DECL_WEAK (decl))
7454 /* We cannot do a tailcall for an indirect call by descriptor if all the
7455 argument registers are used because the only register left to load the
7456 address is IP and it will already contain the static chain. */
7457 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7459 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7460 CUMULATIVE_ARGS cum;
7461 cumulative_args_t cum_v;
7463 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7464 cum_v = pack_cumulative_args (&cum);
7466 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7468 tree type = TREE_VALUE (t);
7469 if (!VOID_TYPE_P (type))
7471 function_arg_info arg (type, /*named=*/true);
7472 arm_function_arg_advance (cum_v, arg);
7476 function_arg_info arg (integer_type_node, /*named=*/true);
7477 if (!arm_function_arg (cum_v, arg))
7481 /* Everything else is ok. */
7486 /* Addressing mode support functions. */
7488 /* Return nonzero if X is a legitimate immediate operand when compiling
7489 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7491 legitimate_pic_operand_p (rtx x)
7493 if (GET_CODE (x) == SYMBOL_REF
7494 || (GET_CODE (x) == CONST
7495 && GET_CODE (XEXP (x, 0)) == PLUS
7496 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7502 /* Record that the current function needs a PIC register. If PIC_REG is null,
7503 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7504 both case cfun->machine->pic_reg is initialized if we have not already done
7505 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7506 PIC register is reloaded in the current position of the instruction stream
7507 irregardless of whether it was loaded before. Otherwise, it is only loaded
7508 if not already done so (crtl->uses_pic_offset_table is null). Note that
7509 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7510 is only supported iff COMPUTE_NOW is false. */
7513 require_pic_register (rtx pic_reg, bool compute_now)
7515 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7517 /* A lot of the logic here is made obscure by the fact that this
7518 routine gets called as part of the rtx cost estimation process.
7519 We don't want those calls to affect any assumptions about the real
7520 function; and further, we can't call entry_of_function() until we
7521 start the real expansion process. */
7522 if (!crtl->uses_pic_offset_table || compute_now)
7524 gcc_assert (can_create_pseudo_p ()
7525 || (pic_reg != NULL_RTX
7527 && GET_MODE (pic_reg) == Pmode));
7528 if (arm_pic_register != INVALID_REGNUM
7530 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7532 if (!cfun->machine->pic_reg)
7533 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7535 /* Play games to avoid marking the function as needing pic
7536 if we are being called as part of the cost-estimation
7538 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7539 crtl->uses_pic_offset_table = 1;
7543 rtx_insn *seq, *insn;
7545 if (pic_reg == NULL_RTX)
7546 pic_reg = gen_reg_rtx (Pmode);
7547 if (!cfun->machine->pic_reg)
7548 cfun->machine->pic_reg = pic_reg;
7550 /* Play games to avoid marking the function as needing pic
7551 if we are being called as part of the cost-estimation
7553 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7555 crtl->uses_pic_offset_table = 1;
7558 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7559 && arm_pic_register > LAST_LO_REGNUM
7561 emit_move_insn (cfun->machine->pic_reg,
7562 gen_rtx_REG (Pmode, arm_pic_register));
7564 arm_load_pic_register (0UL, pic_reg);
7569 for (insn = seq; insn; insn = NEXT_INSN (insn))
7571 INSN_LOCATION (insn) = prologue_location;
7573 /* We can be called during expansion of PHI nodes, where
7574 we can't yet emit instructions directly in the final
7575 insn stream. Queue the insns on the entry edge, they will
7576 be committed after everything else is expanded. */
7577 if (currently_expanding_to_rtl)
7578 insert_insn_on_edge (seq,
7580 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7588 /* Generate insns to calculate the address of ORIG in pic mode. */
7590 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7595 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7597 /* Make the MEM as close to a constant as possible. */
7598 mem = SET_SRC (pat);
7599 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7600 MEM_READONLY_P (mem) = 1;
7601 MEM_NOTRAP_P (mem) = 1;
7603 return emit_insn (pat);
7606 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7607 created to hold the result of the load. If not NULL, PIC_REG indicates
7608 which register to use as PIC register, otherwise it is decided by register
7609 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7610 location in the instruction stream, irregardless of whether it was loaded
7611 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7612 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7614 Returns the register REG into which the PIC load is performed. */
7617 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7620 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7622 if (GET_CODE (orig) == SYMBOL_REF
7623 || GET_CODE (orig) == LABEL_REF)
7627 gcc_assert (can_create_pseudo_p ());
7628 reg = gen_reg_rtx (Pmode);
7631 /* VxWorks does not impose a fixed gap between segments; the run-time
7632 gap can be different from the object-file gap. We therefore can't
7633 use GOTOFF unless we are absolutely sure that the symbol is in the
7634 same segment as the GOT. Unfortunately, the flexibility of linker
7635 scripts means that we can't be sure of that in general, so assume
7636 that GOTOFF is never valid on VxWorks. */
7637 /* References to weak symbols cannot be resolved locally: they
7638 may be overridden by a non-weak definition at link time. */
7640 if ((GET_CODE (orig) == LABEL_REF
7641 || (GET_CODE (orig) == SYMBOL_REF
7642 && SYMBOL_REF_LOCAL_P (orig)
7643 && (SYMBOL_REF_DECL (orig)
7644 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7645 && (!SYMBOL_REF_FUNCTION_P (orig)
7646 || arm_fdpic_local_funcdesc_p (orig))))
7648 && arm_pic_data_is_text_relative)
7649 insn = arm_pic_static_addr (orig, reg);
7652 /* If this function doesn't have a pic register, create one now. */
7653 require_pic_register (pic_reg, compute_now);
7655 if (pic_reg == NULL_RTX)
7656 pic_reg = cfun->machine->pic_reg;
7658 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7661 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7663 set_unique_reg_note (insn, REG_EQUAL, orig);
7667 else if (GET_CODE (orig) == CONST)
7671 if (GET_CODE (XEXP (orig, 0)) == PLUS
7672 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7675 /* Handle the case where we have: const (UNSPEC_TLS). */
7676 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7677 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7680 /* Handle the case where we have:
7681 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7683 if (GET_CODE (XEXP (orig, 0)) == PLUS
7684 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7685 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7687 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7693 gcc_assert (can_create_pseudo_p ());
7694 reg = gen_reg_rtx (Pmode);
7697 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7699 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7700 pic_reg, compute_now);
7701 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7702 base == reg ? 0 : reg, pic_reg,
7705 if (CONST_INT_P (offset))
7707 /* The base register doesn't really matter, we only want to
7708 test the index for the appropriate mode. */
7709 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7711 gcc_assert (can_create_pseudo_p ());
7712 offset = force_reg (Pmode, offset);
7715 if (CONST_INT_P (offset))
7716 return plus_constant (Pmode, base, INTVAL (offset));
7719 if (GET_MODE_SIZE (mode) > 4
7720 && (GET_MODE_CLASS (mode) == MODE_INT
7721 || TARGET_SOFT_FLOAT))
7723 emit_insn (gen_addsi3 (reg, base, offset));
7727 return gen_rtx_PLUS (Pmode, base, offset);
7734 /* Whether a register is callee saved or not. This is necessary because high
7735 registers are marked as caller saved when optimizing for size on Thumb-1
7736 targets despite being callee saved in order to avoid using them. */
7737 #define callee_saved_reg_p(reg) \
7738 (!call_used_regs[reg] \
7739 || (TARGET_THUMB1 && optimize_size \
7740 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7742 /* Return a mask for the call-clobbered low registers that are unused
7743 at the end of the prologue. */
7744 static unsigned long
7745 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7747 unsigned long mask = 0;
7748 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7750 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7751 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7752 mask |= 1 << (reg - FIRST_LO_REGNUM);
7756 /* Similarly for the start of the epilogue. */
7757 static unsigned long
7758 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7760 unsigned long mask = 0;
7761 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7763 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7764 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7765 mask |= 1 << (reg - FIRST_LO_REGNUM);
7769 /* Find a spare register to use during the prolog of a function. */
7772 thumb_find_work_register (unsigned long pushed_regs_mask)
7776 unsigned long unused_regs
7777 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7779 /* Check the argument registers first as these are call-used. The
7780 register allocation order means that sometimes r3 might be used
7781 but earlier argument registers might not, so check them all. */
7782 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7783 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7786 /* Otherwise look for a call-saved register that is going to be pushed. */
7787 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7788 if (pushed_regs_mask & (1 << reg))
7793 /* Thumb-2 can use high regs. */
7794 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7795 if (pushed_regs_mask & (1 << reg))
7798 /* Something went wrong - thumb_compute_save_reg_mask()
7799 should have arranged for a suitable register to be pushed. */
7803 static GTY(()) int pic_labelno;
7805 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7809 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7811 rtx l1, labelno, pic_tmp, pic_rtx;
7813 if (crtl->uses_pic_offset_table == 0
7814 || TARGET_SINGLE_PIC_BASE
7818 gcc_assert (flag_pic);
7820 if (pic_reg == NULL_RTX)
7821 pic_reg = cfun->machine->pic_reg;
7822 if (TARGET_VXWORKS_RTP)
7824 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7825 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7826 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7828 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7830 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7831 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7835 /* We use an UNSPEC rather than a LABEL_REF because this label
7836 never appears in the code stream. */
7838 labelno = GEN_INT (pic_labelno++);
7839 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7840 l1 = gen_rtx_CONST (VOIDmode, l1);
7842 /* On the ARM the PC register contains 'dot + 8' at the time of the
7843 addition, on the Thumb it is 'dot + 4'. */
7844 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7845 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7847 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7851 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7853 else /* TARGET_THUMB1 */
7855 if (arm_pic_register != INVALID_REGNUM
7856 && REGNO (pic_reg) > LAST_LO_REGNUM)
7858 /* We will have pushed the pic register, so we should always be
7859 able to find a work register. */
7860 pic_tmp = gen_rtx_REG (SImode,
7861 thumb_find_work_register (saved_regs));
7862 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7863 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7864 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7866 else if (arm_pic_register != INVALID_REGNUM
7867 && arm_pic_register > LAST_LO_REGNUM
7868 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7870 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7871 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7872 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7875 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7879 /* Need to emit this whether or not we obey regdecls,
7880 since setjmp/longjmp can cause life info to screw up. */
7884 /* Try to determine whether an object, referenced via ORIG, will be
7885 placed in the text or data segment. This is used in FDPIC mode, to
7886 decide which relocations to use when accessing ORIG. *IS_READONLY
7887 is set to true if ORIG is a read-only location, false otherwise.
7888 Return true if we could determine the location of ORIG, false
7889 otherwise. *IS_READONLY is valid only when we return true. */
7891 arm_is_segment_info_known (rtx orig, bool *is_readonly)
7893 *is_readonly = false;
7895 if (GET_CODE (orig) == LABEL_REF)
7897 *is_readonly = true;
7901 if (SYMBOL_REF_P (orig))
7903 if (CONSTANT_POOL_ADDRESS_P (orig))
7905 *is_readonly = true;
7908 if (SYMBOL_REF_LOCAL_P (orig)
7909 && !SYMBOL_REF_EXTERNAL_P (orig)
7910 && SYMBOL_REF_DECL (orig)
7911 && (!DECL_P (SYMBOL_REF_DECL (orig))
7912 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
7914 tree decl = SYMBOL_REF_DECL (orig);
7915 tree init = (TREE_CODE (decl) == VAR_DECL)
7916 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
7919 bool named_section, readonly;
7921 if (init && init != error_mark_node)
7922 reloc = compute_reloc_for_constant (init);
7924 named_section = TREE_CODE (decl) == VAR_DECL
7925 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
7926 readonly = decl_readonly_section (decl, reloc);
7928 /* We don't know where the link script will put a named
7929 section, so return false in such a case. */
7933 *is_readonly = readonly;
7937 /* We don't know. */
7944 /* Generate code to load the address of a static var when flag_pic is set. */
7946 arm_pic_static_addr (rtx orig, rtx reg)
7948 rtx l1, labelno, offset_rtx;
7951 gcc_assert (flag_pic);
7953 bool is_readonly = false;
7954 bool info_known = false;
7957 && SYMBOL_REF_P (orig)
7958 && !SYMBOL_REF_FUNCTION_P (orig))
7959 info_known = arm_is_segment_info_known (orig, &is_readonly);
7962 && SYMBOL_REF_P (orig)
7963 && !SYMBOL_REF_FUNCTION_P (orig)
7966 /* We don't know where orig is stored, so we have be
7967 pessimistic and use a GOT relocation. */
7968 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
7970 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7972 else if (TARGET_FDPIC
7973 && SYMBOL_REF_P (orig)
7974 && (SYMBOL_REF_FUNCTION_P (orig)
7977 /* We use the GOTOFF relocation. */
7978 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
7980 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
7981 emit_insn (gen_movsi (reg, l1));
7982 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
7986 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
7987 PC-relative access. */
7988 /* We use an UNSPEC rather than a LABEL_REF because this label
7989 never appears in the code stream. */
7990 labelno = GEN_INT (pic_labelno++);
7991 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7992 l1 = gen_rtx_CONST (VOIDmode, l1);
7994 /* On the ARM the PC register contains 'dot + 8' at the time of the
7995 addition, on the Thumb it is 'dot + 4'. */
7996 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7997 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7998 UNSPEC_SYMBOL_OFFSET);
7999 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8001 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8008 /* Return nonzero if X is valid as an ARM state addressing register. */
8010 arm_address_register_rtx_p (rtx x, int strict_p)
8020 return ARM_REGNO_OK_FOR_BASE_P (regno);
8022 return (regno <= LAST_ARM_REGNUM
8023 || regno >= FIRST_PSEUDO_REGISTER
8024 || regno == FRAME_POINTER_REGNUM
8025 || regno == ARG_POINTER_REGNUM);
8028 /* Return TRUE if this rtx is the difference of a symbol and a label,
8029 and will reduce to a PC-relative relocation in the object file.
8030 Expressions like this can be left alone when generating PIC, rather
8031 than forced through the GOT. */
8033 pcrel_constant_p (rtx x)
8035 if (GET_CODE (x) == MINUS)
8036 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8041 /* Return true if X will surely end up in an index register after next
8044 will_be_in_index_register (const_rtx x)
8046 /* arm.md: calculate_pic_address will split this into a register. */
8047 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8050 /* Return nonzero if X is a valid ARM state address operand. */
8052 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8056 enum rtx_code code = GET_CODE (x);
8058 if (arm_address_register_rtx_p (x, strict_p))
8061 use_ldrd = (TARGET_LDRD
8062 && (mode == DImode || mode == DFmode));
8064 if (code == POST_INC || code == PRE_DEC
8065 || ((code == PRE_INC || code == POST_DEC)
8066 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8067 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8069 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8070 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8071 && GET_CODE (XEXP (x, 1)) == PLUS
8072 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8074 rtx addend = XEXP (XEXP (x, 1), 1);
8076 /* Don't allow ldrd post increment by register because it's hard
8077 to fixup invalid register choices. */
8079 && GET_CODE (x) == POST_MODIFY
8083 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8084 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8087 /* After reload constants split into minipools will have addresses
8088 from a LABEL_REF. */
8089 else if (reload_completed
8090 && (code == LABEL_REF
8092 && GET_CODE (XEXP (x, 0)) == PLUS
8093 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8094 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8097 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8100 else if (code == PLUS)
8102 rtx xop0 = XEXP (x, 0);
8103 rtx xop1 = XEXP (x, 1);
8105 return ((arm_address_register_rtx_p (xop0, strict_p)
8106 && ((CONST_INT_P (xop1)
8107 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8108 || (!strict_p && will_be_in_index_register (xop1))))
8109 || (arm_address_register_rtx_p (xop1, strict_p)
8110 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8114 /* Reload currently can't handle MINUS, so disable this for now */
8115 else if (GET_CODE (x) == MINUS)
8117 rtx xop0 = XEXP (x, 0);
8118 rtx xop1 = XEXP (x, 1);
8120 return (arm_address_register_rtx_p (xop0, strict_p)
8121 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8125 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8126 && code == SYMBOL_REF
8127 && CONSTANT_POOL_ADDRESS_P (x)
8129 && symbol_mentioned_p (get_pool_constant (x))
8130 && ! pcrel_constant_p (get_pool_constant (x))))
8136 /* Return true if we can avoid creating a constant pool entry for x. */
8138 can_avoid_literal_pool_for_label_p (rtx x)
8140 /* Normally we can assign constant values to target registers without
8141 the help of constant pool. But there are cases we have to use constant
8143 1) assign a label to register.
8144 2) sign-extend a 8bit value to 32bit and then assign to register.
8146 Constant pool access in format:
8147 (set (reg r0) (mem (symbol_ref (".LC0"))))
8148 will cause the use of literal pool (later in function arm_reorg).
8149 So here we mark such format as an invalid format, then the compiler
8150 will adjust it into:
8151 (set (reg r0) (symbol_ref (".LC0")))
8152 (set (reg r0) (mem (reg r0))).
8153 No extra register is required, and (mem (reg r0)) won't cause the use
8154 of literal pools. */
8155 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8156 && CONSTANT_POOL_ADDRESS_P (x))
8162 /* Return nonzero if X is a valid Thumb-2 address operand. */
8164 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8167 enum rtx_code code = GET_CODE (x);
8169 if (arm_address_register_rtx_p (x, strict_p))
8172 use_ldrd = (TARGET_LDRD
8173 && (mode == DImode || mode == DFmode));
8175 if (code == POST_INC || code == PRE_DEC
8176 || ((code == PRE_INC || code == POST_DEC)
8177 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8178 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8180 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8181 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8182 && GET_CODE (XEXP (x, 1)) == PLUS
8183 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8185 /* Thumb-2 only has autoincrement by constant. */
8186 rtx addend = XEXP (XEXP (x, 1), 1);
8187 HOST_WIDE_INT offset;
8189 if (!CONST_INT_P (addend))
8192 offset = INTVAL(addend);
8193 if (GET_MODE_SIZE (mode) <= 4)
8194 return (offset > -256 && offset < 256);
8196 return (use_ldrd && offset > -1024 && offset < 1024
8197 && (offset & 3) == 0);
8200 /* After reload constants split into minipools will have addresses
8201 from a LABEL_REF. */
8202 else if (reload_completed
8203 && (code == LABEL_REF
8205 && GET_CODE (XEXP (x, 0)) == PLUS
8206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8207 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8210 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8213 else if (code == PLUS)
8215 rtx xop0 = XEXP (x, 0);
8216 rtx xop1 = XEXP (x, 1);
8218 return ((arm_address_register_rtx_p (xop0, strict_p)
8219 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8220 || (!strict_p && will_be_in_index_register (xop1))))
8221 || (arm_address_register_rtx_p (xop1, strict_p)
8222 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8225 else if (can_avoid_literal_pool_for_label_p (x))
8228 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8229 && code == SYMBOL_REF
8230 && CONSTANT_POOL_ADDRESS_P (x)
8232 && symbol_mentioned_p (get_pool_constant (x))
8233 && ! pcrel_constant_p (get_pool_constant (x))))
8239 /* Return nonzero if INDEX is valid for an address index operand in
8242 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8245 HOST_WIDE_INT range;
8246 enum rtx_code code = GET_CODE (index);
8248 /* Standard coprocessor addressing modes. */
8249 if (TARGET_HARD_FLOAT
8250 && (mode == SFmode || mode == DFmode))
8251 return (code == CONST_INT && INTVAL (index) < 1024
8252 && INTVAL (index) > -1024
8253 && (INTVAL (index) & 3) == 0);
8255 /* For quad modes, we restrict the constant offset to be slightly less
8256 than what the instruction format permits. We do this because for
8257 quad mode moves, we will actually decompose them into two separate
8258 double-mode reads or writes. INDEX must therefore be a valid
8259 (double-mode) offset and so should INDEX+8. */
8260 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8261 return (code == CONST_INT
8262 && INTVAL (index) < 1016
8263 && INTVAL (index) > -1024
8264 && (INTVAL (index) & 3) == 0);
8266 /* We have no such constraint on double mode offsets, so we permit the
8267 full range of the instruction format. */
8268 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8269 return (code == CONST_INT
8270 && INTVAL (index) < 1024
8271 && INTVAL (index) > -1024
8272 && (INTVAL (index) & 3) == 0);
8274 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8275 return (code == CONST_INT
8276 && INTVAL (index) < 1024
8277 && INTVAL (index) > -1024
8278 && (INTVAL (index) & 3) == 0);
8280 if (arm_address_register_rtx_p (index, strict_p)
8281 && (GET_MODE_SIZE (mode) <= 4))
8284 if (mode == DImode || mode == DFmode)
8286 if (code == CONST_INT)
8288 HOST_WIDE_INT val = INTVAL (index);
8290 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8291 If vldr is selected it uses arm_coproc_mem_operand. */
8293 return val > -256 && val < 256;
8295 return val > -4096 && val < 4092;
8298 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8301 if (GET_MODE_SIZE (mode) <= 4
8305 || (mode == QImode && outer == SIGN_EXTEND))))
8309 rtx xiop0 = XEXP (index, 0);
8310 rtx xiop1 = XEXP (index, 1);
8312 return ((arm_address_register_rtx_p (xiop0, strict_p)
8313 && power_of_two_operand (xiop1, SImode))
8314 || (arm_address_register_rtx_p (xiop1, strict_p)
8315 && power_of_two_operand (xiop0, SImode)));
8317 else if (code == LSHIFTRT || code == ASHIFTRT
8318 || code == ASHIFT || code == ROTATERT)
8320 rtx op = XEXP (index, 1);
8322 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8325 && INTVAL (op) <= 31);
8329 /* For ARM v4 we may be doing a sign-extend operation during the
8335 || (outer == SIGN_EXTEND && mode == QImode))
8341 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8343 return (code == CONST_INT
8344 && INTVAL (index) < range
8345 && INTVAL (index) > -range);
8348 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8349 index operand. i.e. 1, 2, 4 or 8. */
8351 thumb2_index_mul_operand (rtx op)
8355 if (!CONST_INT_P (op))
8359 return (val == 1 || val == 2 || val == 4 || val == 8);
8362 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8364 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8366 enum rtx_code code = GET_CODE (index);
8368 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8369 /* Standard coprocessor addressing modes. */
8370 if (TARGET_HARD_FLOAT
8371 && (mode == SFmode || mode == DFmode))
8372 return (code == CONST_INT && INTVAL (index) < 1024
8373 /* Thumb-2 allows only > -256 index range for it's core register
8374 load/stores. Since we allow SF/DF in core registers, we have
8375 to use the intersection between -256~4096 (core) and -1024~1024
8377 && INTVAL (index) > -256
8378 && (INTVAL (index) & 3) == 0);
8380 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8382 /* For DImode assume values will usually live in core regs
8383 and only allow LDRD addressing modes. */
8384 if (!TARGET_LDRD || mode != DImode)
8385 return (code == CONST_INT
8386 && INTVAL (index) < 1024
8387 && INTVAL (index) > -1024
8388 && (INTVAL (index) & 3) == 0);
8391 /* For quad modes, we restrict the constant offset to be slightly less
8392 than what the instruction format permits. We do this because for
8393 quad mode moves, we will actually decompose them into two separate
8394 double-mode reads or writes. INDEX must therefore be a valid
8395 (double-mode) offset and so should INDEX+8. */
8396 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8397 return (code == CONST_INT
8398 && INTVAL (index) < 1016
8399 && INTVAL (index) > -1024
8400 && (INTVAL (index) & 3) == 0);
8402 /* We have no such constraint on double mode offsets, so we permit the
8403 full range of the instruction format. */
8404 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8405 return (code == CONST_INT
8406 && INTVAL (index) < 1024
8407 && INTVAL (index) > -1024
8408 && (INTVAL (index) & 3) == 0);
8410 if (arm_address_register_rtx_p (index, strict_p)
8411 && (GET_MODE_SIZE (mode) <= 4))
8414 if (mode == DImode || mode == DFmode)
8416 if (code == CONST_INT)
8418 HOST_WIDE_INT val = INTVAL (index);
8419 /* Thumb-2 ldrd only has reg+const addressing modes.
8420 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8421 If vldr is selected it uses arm_coproc_mem_operand. */
8423 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8425 return IN_RANGE (val, -255, 4095 - 4);
8433 rtx xiop0 = XEXP (index, 0);
8434 rtx xiop1 = XEXP (index, 1);
8436 return ((arm_address_register_rtx_p (xiop0, strict_p)
8437 && thumb2_index_mul_operand (xiop1))
8438 || (arm_address_register_rtx_p (xiop1, strict_p)
8439 && thumb2_index_mul_operand (xiop0)));
8441 else if (code == ASHIFT)
8443 rtx op = XEXP (index, 1);
8445 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8448 && INTVAL (op) <= 3);
8451 return (code == CONST_INT
8452 && INTVAL (index) < 4096
8453 && INTVAL (index) > -256);
8456 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8458 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8468 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8470 return (regno <= LAST_LO_REGNUM
8471 || regno > LAST_VIRTUAL_REGISTER
8472 || regno == FRAME_POINTER_REGNUM
8473 || (GET_MODE_SIZE (mode) >= 4
8474 && (regno == STACK_POINTER_REGNUM
8475 || regno >= FIRST_PSEUDO_REGISTER
8476 || x == hard_frame_pointer_rtx
8477 || x == arg_pointer_rtx)));
8480 /* Return nonzero if x is a legitimate index register. This is the case
8481 for any base register that can access a QImode object. */
8483 thumb1_index_register_rtx_p (rtx x, int strict_p)
8485 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8488 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8490 The AP may be eliminated to either the SP or the FP, so we use the
8491 least common denominator, e.g. SImode, and offsets from 0 to 64.
8493 ??? Verify whether the above is the right approach.
8495 ??? Also, the FP may be eliminated to the SP, so perhaps that
8496 needs special handling also.
8498 ??? Look at how the mips16 port solves this problem. It probably uses
8499 better ways to solve some of these problems.
8501 Although it is not incorrect, we don't accept QImode and HImode
8502 addresses based on the frame pointer or arg pointer until the
8503 reload pass starts. This is so that eliminating such addresses
8504 into stack based ones won't produce impossible code. */
8506 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8508 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8511 /* ??? Not clear if this is right. Experiment. */
8512 if (GET_MODE_SIZE (mode) < 4
8513 && !(reload_in_progress || reload_completed)
8514 && (reg_mentioned_p (frame_pointer_rtx, x)
8515 || reg_mentioned_p (arg_pointer_rtx, x)
8516 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8517 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8518 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8519 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8522 /* Accept any base register. SP only in SImode or larger. */
8523 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8526 /* This is PC relative data before arm_reorg runs. */
8527 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8528 && GET_CODE (x) == SYMBOL_REF
8529 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8532 /* This is PC relative data after arm_reorg runs. */
8533 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8535 && (GET_CODE (x) == LABEL_REF
8536 || (GET_CODE (x) == CONST
8537 && GET_CODE (XEXP (x, 0)) == PLUS
8538 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8539 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8542 /* Post-inc indexing only supported for SImode and larger. */
8543 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8544 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8547 else if (GET_CODE (x) == PLUS)
8549 /* REG+REG address can be any two index registers. */
8550 /* We disallow FRAME+REG addressing since we know that FRAME
8551 will be replaced with STACK, and SP relative addressing only
8552 permits SP+OFFSET. */
8553 if (GET_MODE_SIZE (mode) <= 4
8554 && XEXP (x, 0) != frame_pointer_rtx
8555 && XEXP (x, 1) != frame_pointer_rtx
8556 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8557 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8558 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8561 /* REG+const has 5-7 bit offset for non-SP registers. */
8562 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8563 || XEXP (x, 0) == arg_pointer_rtx)
8564 && CONST_INT_P (XEXP (x, 1))
8565 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8568 /* REG+const has 10-bit offset for SP, but only SImode and
8569 larger is supported. */
8570 /* ??? Should probably check for DI/DFmode overflow here
8571 just like GO_IF_LEGITIMATE_OFFSET does. */
8572 else if (REG_P (XEXP (x, 0))
8573 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8574 && GET_MODE_SIZE (mode) >= 4
8575 && CONST_INT_P (XEXP (x, 1))
8576 && INTVAL (XEXP (x, 1)) >= 0
8577 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8578 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8581 else if (REG_P (XEXP (x, 0))
8582 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8583 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8584 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8585 && REGNO (XEXP (x, 0))
8586 <= LAST_VIRTUAL_POINTER_REGISTER))
8587 && GET_MODE_SIZE (mode) >= 4
8588 && CONST_INT_P (XEXP (x, 1))
8589 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8593 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8594 && GET_MODE_SIZE (mode) == 4
8595 && GET_CODE (x) == SYMBOL_REF
8596 && CONSTANT_POOL_ADDRESS_P (x)
8598 && symbol_mentioned_p (get_pool_constant (x))
8599 && ! pcrel_constant_p (get_pool_constant (x))))
8605 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8606 instruction of mode MODE. */
8608 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8610 switch (GET_MODE_SIZE (mode))
8613 return val >= 0 && val < 32;
8616 return val >= 0 && val < 64 && (val & 1) == 0;
8620 && (val + GET_MODE_SIZE (mode)) <= 128
8626 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8629 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8630 else if (TARGET_THUMB2)
8631 return thumb2_legitimate_address_p (mode, x, strict_p);
8632 else /* if (TARGET_THUMB1) */
8633 return thumb1_legitimate_address_p (mode, x, strict_p);
8636 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8638 Given an rtx X being reloaded into a reg required to be
8639 in class CLASS, return the class of reg to actually use.
8640 In general this is just CLASS, but for the Thumb core registers and
8641 immediate constants we prefer a LO_REGS class or a subset. */
8644 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8650 if (rclass == GENERAL_REGS)
8657 /* Build the SYMBOL_REF for __tls_get_addr. */
8659 static GTY(()) rtx tls_get_addr_libfunc;
8662 get_tls_get_addr (void)
8664 if (!tls_get_addr_libfunc)
8665 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8666 return tls_get_addr_libfunc;
8670 arm_load_tp (rtx target)
8673 target = gen_reg_rtx (SImode);
8677 /* Can return in any reg. */
8678 emit_insn (gen_load_tp_hard (target));
8682 /* Always returned in r0. Immediately copy the result into a pseudo,
8683 otherwise other uses of r0 (e.g. setting up function arguments) may
8684 clobber the value. */
8690 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8691 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
8693 emit_insn (gen_load_tp_soft_fdpic ());
8696 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
8699 emit_insn (gen_load_tp_soft ());
8701 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8702 emit_move_insn (target, tmp);
8708 load_tls_operand (rtx x, rtx reg)
8712 if (reg == NULL_RTX)
8713 reg = gen_reg_rtx (SImode);
8715 tmp = gen_rtx_CONST (SImode, x);
8717 emit_move_insn (reg, tmp);
8723 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8725 rtx label, labelno = NULL_RTX, sum;
8727 gcc_assert (reloc != TLS_DESCSEQ);
8732 sum = gen_rtx_UNSPEC (Pmode,
8733 gen_rtvec (2, x, GEN_INT (reloc)),
8738 labelno = GEN_INT (pic_labelno++);
8739 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8740 label = gen_rtx_CONST (VOIDmode, label);
8742 sum = gen_rtx_UNSPEC (Pmode,
8743 gen_rtvec (4, x, GEN_INT (reloc), label,
8744 GEN_INT (TARGET_ARM ? 8 : 4)),
8747 reg = load_tls_operand (sum, reg);
8750 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8751 else if (TARGET_ARM)
8752 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8754 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8756 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8757 LCT_PURE, /* LCT_CONST? */
8760 rtx_insn *insns = get_insns ();
8767 arm_tls_descseq_addr (rtx x, rtx reg)
8769 rtx labelno = GEN_INT (pic_labelno++);
8770 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8771 rtx sum = gen_rtx_UNSPEC (Pmode,
8772 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8773 gen_rtx_CONST (VOIDmode, label),
8774 GEN_INT (!TARGET_ARM)),
8776 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8778 emit_insn (gen_tlscall (x, labelno));
8780 reg = gen_reg_rtx (SImode);
8782 gcc_assert (REGNO (reg) != R0_REGNUM);
8784 emit_move_insn (reg, reg0);
8791 legitimize_tls_address (rtx x, rtx reg)
8793 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8795 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8799 case TLS_MODEL_GLOBAL_DYNAMIC:
8800 if (TARGET_GNU2_TLS)
8802 gcc_assert (!TARGET_FDPIC);
8804 reg = arm_tls_descseq_addr (x, reg);
8806 tp = arm_load_tp (NULL_RTX);
8808 dest = gen_rtx_PLUS (Pmode, tp, reg);
8812 /* Original scheme */
8814 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
8816 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8817 dest = gen_reg_rtx (Pmode);
8818 emit_libcall_block (insns, dest, ret, x);
8822 case TLS_MODEL_LOCAL_DYNAMIC:
8823 if (TARGET_GNU2_TLS)
8825 gcc_assert (!TARGET_FDPIC);
8827 reg = arm_tls_descseq_addr (x, reg);
8829 tp = arm_load_tp (NULL_RTX);
8831 dest = gen_rtx_PLUS (Pmode, tp, reg);
8836 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
8838 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8840 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8841 share the LDM result with other LD model accesses. */
8842 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8844 dest = gen_reg_rtx (Pmode);
8845 emit_libcall_block (insns, dest, ret, eqv);
8847 /* Load the addend. */
8848 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8849 GEN_INT (TLS_LDO32)),
8851 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8852 dest = gen_rtx_PLUS (Pmode, dest, addend);
8856 case TLS_MODEL_INITIAL_EXEC:
8859 sum = gen_rtx_UNSPEC (Pmode,
8860 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
8862 reg = load_tls_operand (sum, reg);
8863 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8864 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
8868 labelno = GEN_INT (pic_labelno++);
8869 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8870 label = gen_rtx_CONST (VOIDmode, label);
8871 sum = gen_rtx_UNSPEC (Pmode,
8872 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8873 GEN_INT (TARGET_ARM ? 8 : 4)),
8875 reg = load_tls_operand (sum, reg);
8878 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8879 else if (TARGET_THUMB2)
8880 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8883 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8884 emit_move_insn (reg, gen_const_mem (SImode, reg));
8888 tp = arm_load_tp (NULL_RTX);
8890 return gen_rtx_PLUS (Pmode, tp, reg);
8892 case TLS_MODEL_LOCAL_EXEC:
8893 tp = arm_load_tp (NULL_RTX);
8895 reg = gen_rtx_UNSPEC (Pmode,
8896 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8898 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8900 return gen_rtx_PLUS (Pmode, tp, reg);
8907 /* Try machine-dependent ways of modifying an illegitimate address
8908 to be legitimate. If we find one, return the new, valid address. */
8910 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8912 if (arm_tls_referenced_p (x))
8916 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8918 addend = XEXP (XEXP (x, 0), 1);
8919 x = XEXP (XEXP (x, 0), 0);
8922 if (GET_CODE (x) != SYMBOL_REF)
8925 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8927 x = legitimize_tls_address (x, NULL_RTX);
8931 x = gen_rtx_PLUS (SImode, x, addend);
8940 /* TODO: legitimize_address for Thumb2. */
8943 return thumb_legitimize_address (x, orig_x, mode);
8946 if (GET_CODE (x) == PLUS)
8948 rtx xop0 = XEXP (x, 0);
8949 rtx xop1 = XEXP (x, 1);
8951 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8952 xop0 = force_reg (SImode, xop0);
8954 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8955 && !symbol_mentioned_p (xop1))
8956 xop1 = force_reg (SImode, xop1);
8958 if (ARM_BASE_REGISTER_RTX_P (xop0)
8959 && CONST_INT_P (xop1))
8961 HOST_WIDE_INT n, low_n;
8965 /* VFP addressing modes actually allow greater offsets, but for
8966 now we just stick with the lowest common denominator. */
8967 if (mode == DImode || mode == DFmode)
8979 low_n = ((mode) == TImode ? 0
8980 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8984 base_reg = gen_reg_rtx (SImode);
8985 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8986 emit_move_insn (base_reg, val);
8987 x = plus_constant (Pmode, base_reg, low_n);
8989 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8990 x = gen_rtx_PLUS (SImode, xop0, xop1);
8993 /* XXX We don't allow MINUS any more -- see comment in
8994 arm_legitimate_address_outer_p (). */
8995 else if (GET_CODE (x) == MINUS)
8997 rtx xop0 = XEXP (x, 0);
8998 rtx xop1 = XEXP (x, 1);
9000 if (CONSTANT_P (xop0))
9001 xop0 = force_reg (SImode, xop0);
9003 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9004 xop1 = force_reg (SImode, xop1);
9006 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9007 x = gen_rtx_MINUS (SImode, xop0, xop1);
9010 /* Make sure to take full advantage of the pre-indexed addressing mode
9011 with absolute addresses which often allows for the base register to
9012 be factorized for multiple adjacent memory references, and it might
9013 even allows for the mini pool to be avoided entirely. */
9014 else if (CONST_INT_P (x) && optimize > 0)
9017 HOST_WIDE_INT mask, base, index;
9020 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
9021 use a 8-bit index. So let's use a 12-bit index for SImode only and
9022 hope that arm_gen_constant will enable ldrb to use more bits. */
9023 bits = (mode == SImode) ? 12 : 8;
9024 mask = (1 << bits) - 1;
9025 base = INTVAL (x) & ~mask;
9026 index = INTVAL (x) & mask;
9027 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
9029 /* It'll most probably be more efficient to generate the base
9030 with more bits set and use a negative index instead. */
9034 base_reg = force_reg (SImode, GEN_INT (base));
9035 x = plus_constant (Pmode, base_reg, index);
9040 /* We need to find and carefully transform any SYMBOL and LABEL
9041 references; so go back to the original address expression. */
9042 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9043 false /*compute_now*/);
9045 if (new_x != orig_x)
9053 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9054 to be legitimate. If we find one, return the new, valid address. */
9056 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9058 if (GET_CODE (x) == PLUS
9059 && CONST_INT_P (XEXP (x, 1))
9060 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9061 || INTVAL (XEXP (x, 1)) < 0))
9063 rtx xop0 = XEXP (x, 0);
9064 rtx xop1 = XEXP (x, 1);
9065 HOST_WIDE_INT offset = INTVAL (xop1);
9067 /* Try and fold the offset into a biasing of the base register and
9068 then offsetting that. Don't do this when optimizing for space
9069 since it can cause too many CSEs. */
9070 if (optimize_size && offset >= 0
9071 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9073 HOST_WIDE_INT delta;
9076 delta = offset - (256 - GET_MODE_SIZE (mode));
9077 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9078 delta = 31 * GET_MODE_SIZE (mode);
9080 delta = offset & (~31 * GET_MODE_SIZE (mode));
9082 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9084 x = plus_constant (Pmode, xop0, delta);
9086 else if (offset < 0 && offset > -256)
9087 /* Small negative offsets are best done with a subtract before the
9088 dereference, forcing these into a register normally takes two
9090 x = force_operand (x, NULL_RTX);
9093 /* For the remaining cases, force the constant into a register. */
9094 xop1 = force_reg (SImode, xop1);
9095 x = gen_rtx_PLUS (SImode, xop0, xop1);
9098 else if (GET_CODE (x) == PLUS
9099 && s_register_operand (XEXP (x, 1), SImode)
9100 && !s_register_operand (XEXP (x, 0), SImode))
9102 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9104 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9109 /* We need to find and carefully transform any SYMBOL and LABEL
9110 references; so go back to the original address expression. */
9111 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9112 false /*compute_now*/);
9114 if (new_x != orig_x)
9121 /* Return TRUE if X contains any TLS symbol references. */
9124 arm_tls_referenced_p (rtx x)
9126 if (! TARGET_HAVE_TLS)
9129 subrtx_iterator::array_type array;
9130 FOR_EACH_SUBRTX (iter, array, x, ALL)
9132 const_rtx x = *iter;
9133 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9135 /* ARM currently does not provide relocations to encode TLS variables
9136 into AArch32 instructions, only data, so there is no way to
9137 currently implement these if a literal pool is disabled. */
9138 if (arm_disable_literal_pool)
9139 sorry ("accessing thread-local storage is not currently supported "
9140 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9145 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9146 TLS offsets, not real symbol references. */
9147 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9148 iter.skip_subrtxes ();
9153 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9155 On the ARM, allow any integer (invalid ones are removed later by insn
9156 patterns), nice doubles and symbol_refs which refer to the function's
9159 When generating pic allow anything. */
9162 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9164 return flag_pic || !label_mentioned_p (x);
9168 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9170 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9171 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9172 for ARMv8-M Baseline or later the result is valid. */
9173 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9176 return (CONST_INT_P (x)
9177 || CONST_DOUBLE_P (x)
9178 || CONSTANT_ADDRESS_P (x)
9179 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9184 arm_legitimate_constant_p (machine_mode mode, rtx x)
9186 return (!arm_cannot_force_const_mem (mode, x)
9188 ? arm_legitimate_constant_p_1 (mode, x)
9189 : thumb_legitimate_constant_p (mode, x)));
9192 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9195 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9198 split_const (x, &base, &offset);
9200 if (SYMBOL_REF_P (base))
9202 /* Function symbols cannot have an offset due to the Thumb bit. */
9203 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9204 && INTVAL (offset) != 0)
9207 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9208 && !offset_within_block_p (base, INTVAL (offset)))
9211 return arm_tls_referenced_p (x);
9214 #define REG_OR_SUBREG_REG(X) \
9216 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9218 #define REG_OR_SUBREG_RTX(X) \
9219 (REG_P (X) ? (X) : SUBREG_REG (X))
9222 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9224 machine_mode mode = GET_MODE (x);
9233 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9240 return COSTS_N_INSNS (1);
9243 if (arm_arch6m && arm_m_profile_small_mul)
9244 return COSTS_N_INSNS (32);
9246 if (CONST_INT_P (XEXP (x, 1)))
9249 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9256 return COSTS_N_INSNS (2) + cycles;
9258 return COSTS_N_INSNS (1) + 16;
9261 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9263 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9264 return (COSTS_N_INSNS (words)
9265 + 4 * ((MEM_P (SET_SRC (x)))
9266 + MEM_P (SET_DEST (x))));
9271 if (UINTVAL (x) < 256
9272 /* 16-bit constant. */
9273 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9275 if (thumb_shiftable_const (INTVAL (x)))
9276 return COSTS_N_INSNS (2);
9277 return COSTS_N_INSNS (3);
9279 else if ((outer == PLUS || outer == COMPARE)
9280 && INTVAL (x) < 256 && INTVAL (x) > -256)
9282 else if ((outer == IOR || outer == XOR || outer == AND)
9283 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9284 return COSTS_N_INSNS (1);
9285 else if (outer == AND)
9288 /* This duplicates the tests in the andsi3 expander. */
9289 for (i = 9; i <= 31; i++)
9290 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9291 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9292 return COSTS_N_INSNS (2);
9294 else if (outer == ASHIFT || outer == ASHIFTRT
9295 || outer == LSHIFTRT)
9297 return COSTS_N_INSNS (2);
9303 return COSTS_N_INSNS (3);
9321 /* XXX another guess. */
9322 /* Memory costs quite a lot for the first word, but subsequent words
9323 load at the equivalent of a single insn each. */
9324 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9325 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9330 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9336 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9337 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9343 return total + COSTS_N_INSNS (1);
9345 /* Assume a two-shift sequence. Increase the cost slightly so
9346 we prefer actual shifts over an extend operation. */
9347 return total + 1 + COSTS_N_INSNS (2);
9354 /* Estimates the size cost of thumb1 instructions.
9355 For now most of the code is copied from thumb1_rtx_costs. We need more
9356 fine grain tuning when we have more related test cases. */
9358 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9360 machine_mode mode = GET_MODE (x);
9369 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9373 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9374 defined by RTL expansion, especially for the expansion of
9376 if ((GET_CODE (XEXP (x, 0)) == MULT
9377 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9378 || (GET_CODE (XEXP (x, 1)) == MULT
9379 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9380 return COSTS_N_INSNS (2);
9385 return COSTS_N_INSNS (1);
9388 if (CONST_INT_P (XEXP (x, 1)))
9390 /* Thumb1 mul instruction can't operate on const. We must Load it
9391 into a register first. */
9392 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9393 /* For the targets which have a very small and high-latency multiply
9394 unit, we prefer to synthesize the mult with up to 5 instructions,
9395 giving a good balance between size and performance. */
9396 if (arm_arch6m && arm_m_profile_small_mul)
9397 return COSTS_N_INSNS (5);
9399 return COSTS_N_INSNS (1) + const_size;
9401 return COSTS_N_INSNS (1);
9404 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9406 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9407 cost = COSTS_N_INSNS (words);
9408 if (satisfies_constraint_J (SET_SRC (x))
9409 || satisfies_constraint_K (SET_SRC (x))
9410 /* Too big an immediate for a 2-byte mov, using MOVT. */
9411 || (CONST_INT_P (SET_SRC (x))
9412 && UINTVAL (SET_SRC (x)) >= 256
9414 && satisfies_constraint_j (SET_SRC (x)))
9415 /* thumb1_movdi_insn. */
9416 || ((words > 1) && MEM_P (SET_SRC (x))))
9417 cost += COSTS_N_INSNS (1);
9423 if (UINTVAL (x) < 256)
9424 return COSTS_N_INSNS (1);
9425 /* movw is 4byte long. */
9426 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9427 return COSTS_N_INSNS (2);
9428 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9429 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9430 return COSTS_N_INSNS (2);
9431 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9432 if (thumb_shiftable_const (INTVAL (x)))
9433 return COSTS_N_INSNS (2);
9434 return COSTS_N_INSNS (3);
9436 else if ((outer == PLUS || outer == COMPARE)
9437 && INTVAL (x) < 256 && INTVAL (x) > -256)
9439 else if ((outer == IOR || outer == XOR || outer == AND)
9440 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9441 return COSTS_N_INSNS (1);
9442 else if (outer == AND)
9445 /* This duplicates the tests in the andsi3 expander. */
9446 for (i = 9; i <= 31; i++)
9447 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9448 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9449 return COSTS_N_INSNS (2);
9451 else if (outer == ASHIFT || outer == ASHIFTRT
9452 || outer == LSHIFTRT)
9454 return COSTS_N_INSNS (2);
9460 return COSTS_N_INSNS (3);
9474 return COSTS_N_INSNS (1);
9477 return (COSTS_N_INSNS (1)
9479 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9480 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9481 ? COSTS_N_INSNS (1) : 0));
9485 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9490 /* XXX still guessing. */
9491 switch (GET_MODE (XEXP (x, 0)))
9494 return (1 + (mode == DImode ? 4 : 0)
9495 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9498 return (4 + (mode == DImode ? 4 : 0)
9499 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9502 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9513 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9514 operand, then return the operand that is being shifted. If the shift
9515 is not by a constant, then set SHIFT_REG to point to the operand.
9516 Return NULL if OP is not a shifter operand. */
9518 shifter_op_p (rtx op, rtx *shift_reg)
9520 enum rtx_code code = GET_CODE (op);
9522 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9523 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9524 return XEXP (op, 0);
9525 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9526 return XEXP (op, 0);
9527 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9528 || code == ASHIFTRT)
9530 if (!CONST_INT_P (XEXP (op, 1)))
9531 *shift_reg = XEXP (op, 1);
9532 return XEXP (op, 0);
9539 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9541 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9542 rtx_code code = GET_CODE (x);
9543 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9545 switch (XINT (x, 1))
9547 case UNSPEC_UNALIGNED_LOAD:
9548 /* We can only do unaligned loads into the integer unit, and we can't
9550 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9552 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9553 + extra_cost->ldst.load_unaligned);
9556 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9557 ADDR_SPACE_GENERIC, speed_p);
9561 case UNSPEC_UNALIGNED_STORE:
9562 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9564 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9565 + extra_cost->ldst.store_unaligned);
9567 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9569 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9570 ADDR_SPACE_GENERIC, speed_p);
9581 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9585 *cost = COSTS_N_INSNS (2);
9591 /* Cost of a libcall. We assume one insn per argument, an amount for the
9592 call (one insn for -Os) and then one for processing the result. */
9593 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9595 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9598 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9599 if (shift_op != NULL \
9600 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9605 *cost += extra_cost->alu.arith_shift_reg; \
9606 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9607 ASHIFT, 1, speed_p); \
9610 *cost += extra_cost->alu.arith_shift; \
9612 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9613 ASHIFT, 0, speed_p) \
9614 + rtx_cost (XEXP (x, 1 - IDX), \
9615 GET_MODE (shift_op), \
9622 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9623 considering the costs of the addressing mode and memory access
9626 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9627 int *cost, bool speed_p)
9629 machine_mode mode = GET_MODE (x);
9631 *cost = COSTS_N_INSNS (1);
9634 && GET_CODE (XEXP (x, 0)) == PLUS
9635 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9636 /* This will be split into two instructions. Add the cost of the
9637 additional instruction here. The cost of the memory access is computed
9638 below. See arm.md:calculate_pic_address. */
9639 *cost += COSTS_N_INSNS (1);
9641 /* Calculate cost of the addressing mode. */
9644 arm_addr_mode_op op_type;
9645 switch (GET_CODE (XEXP (x, 0)))
9649 op_type = AMO_DEFAULT;
9652 /* MINUS does not appear in RTL, but the architecture supports it,
9653 so handle this case defensively. */
9656 op_type = AMO_NO_WB;
9668 if (VECTOR_MODE_P (mode))
9669 *cost += current_tune->addr_mode_costs->vector[op_type];
9670 else if (FLOAT_MODE_P (mode))
9671 *cost += current_tune->addr_mode_costs->fp[op_type];
9673 *cost += current_tune->addr_mode_costs->integer[op_type];
9676 /* Calculate cost of memory access. */
9679 if (FLOAT_MODE_P (mode))
9681 if (GET_MODE_SIZE (mode) == 8)
9682 *cost += extra_cost->ldst.loadd;
9684 *cost += extra_cost->ldst.loadf;
9686 else if (VECTOR_MODE_P (mode))
9687 *cost += extra_cost->ldst.loadv;
9691 if (GET_MODE_SIZE (mode) == 8)
9692 *cost += extra_cost->ldst.ldrd;
9694 *cost += extra_cost->ldst.load;
9701 /* RTX costs. Make an estimate of the cost of executing the operation
9702 X, which is contained within an operation with code OUTER_CODE.
9703 SPEED_P indicates whether the cost desired is the performance cost,
9704 or the size cost. The estimate is stored in COST and the return
9705 value is TRUE if the cost calculation is final, or FALSE if the
9706 caller should recurse through the operands of X to add additional
9709 We currently make no attempt to model the size savings of Thumb-2
9710 16-bit instructions. At the normal points in compilation where
9711 this code is called we have no measure of whether the condition
9712 flags are live or not, and thus no realistic way to determine what
9713 the size will eventually be. */
9715 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9716 const struct cpu_cost_table *extra_cost,
9717 int *cost, bool speed_p)
9719 machine_mode mode = GET_MODE (x);
9721 *cost = COSTS_N_INSNS (1);
9726 *cost = thumb1_rtx_costs (x, code, outer_code);
9728 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9736 /* SET RTXs don't have a mode so we get it from the destination. */
9737 mode = GET_MODE (SET_DEST (x));
9739 if (REG_P (SET_SRC (x))
9740 && REG_P (SET_DEST (x)))
9742 /* Assume that most copies can be done with a single insn,
9743 unless we don't have HW FP, in which case everything
9744 larger than word mode will require two insns. */
9745 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9746 && GET_MODE_SIZE (mode) > 4)
9749 /* Conditional register moves can be encoded
9750 in 16 bits in Thumb mode. */
9751 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9757 if (CONST_INT_P (SET_SRC (x)))
9759 /* Handle CONST_INT here, since the value doesn't have a mode
9760 and we would otherwise be unable to work out the true cost. */
9761 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9764 /* Slightly lower the cost of setting a core reg to a constant.
9765 This helps break up chains and allows for better scheduling. */
9766 if (REG_P (SET_DEST (x))
9767 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9770 /* Immediate moves with an immediate in the range [0, 255] can be
9771 encoded in 16 bits in Thumb mode. */
9772 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9773 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9775 goto const_int_cost;
9781 return arm_mem_costs (x, extra_cost, cost, speed_p);
9785 /* Calculations of LDM costs are complex. We assume an initial cost
9786 (ldm_1st) which will load the number of registers mentioned in
9787 ldm_regs_per_insn_1st registers; then each additional
9788 ldm_regs_per_insn_subsequent registers cost one more insn. The
9789 formula for N regs is thus:
9791 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9792 + ldm_regs_per_insn_subsequent - 1)
9793 / ldm_regs_per_insn_subsequent).
9795 Additional costs may also be added for addressing. A similar
9796 formula is used for STM. */
9798 bool is_ldm = load_multiple_operation (x, SImode);
9799 bool is_stm = store_multiple_operation (x, SImode);
9801 if (is_ldm || is_stm)
9805 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9806 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9807 ? extra_cost->ldst.ldm_regs_per_insn_1st
9808 : extra_cost->ldst.stm_regs_per_insn_1st;
9809 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9810 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9811 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9813 *cost += regs_per_insn_1st
9814 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9815 + regs_per_insn_sub - 1)
9816 / regs_per_insn_sub);
9825 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9826 && (mode == SFmode || !TARGET_VFP_SINGLE))
9827 *cost += COSTS_N_INSNS (speed_p
9828 ? extra_cost->fp[mode != SFmode].div : 0);
9829 else if (mode == SImode && TARGET_IDIV)
9830 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9832 *cost = LIBCALL_COST (2);
9834 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9835 possible udiv is prefered. */
9836 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9837 return false; /* All arguments must be in registers. */
9840 /* MOD by a power of 2 can be expanded as:
9842 and r0, r0, #(n - 1)
9843 and r1, r1, #(n - 1)
9844 rsbpl r0, r1, #0. */
9845 if (CONST_INT_P (XEXP (x, 1))
9846 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9849 *cost += COSTS_N_INSNS (3);
9852 *cost += 2 * extra_cost->alu.logical
9853 + extra_cost->alu.arith;
9859 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9860 possible udiv is prefered. */
9861 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9862 return false; /* All arguments must be in registers. */
9865 if (mode == SImode && REG_P (XEXP (x, 1)))
9867 *cost += (COSTS_N_INSNS (1)
9868 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9870 *cost += extra_cost->alu.shift_reg;
9878 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9880 *cost += (COSTS_N_INSNS (2)
9881 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9883 *cost += 2 * extra_cost->alu.shift;
9884 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9885 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9889 else if (mode == SImode)
9891 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9892 /* Slightly disparage register shifts at -Os, but not by much. */
9893 if (!CONST_INT_P (XEXP (x, 1)))
9894 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9895 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9898 else if (GET_MODE_CLASS (mode) == MODE_INT
9899 && GET_MODE_SIZE (mode) < 4)
9903 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9904 /* Slightly disparage register shifts at -Os, but not by
9906 if (!CONST_INT_P (XEXP (x, 1)))
9907 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9908 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9910 else if (code == LSHIFTRT || code == ASHIFTRT)
9912 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9914 /* Can use SBFX/UBFX. */
9916 *cost += extra_cost->alu.bfx;
9917 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9921 *cost += COSTS_N_INSNS (1);
9922 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9925 if (CONST_INT_P (XEXP (x, 1)))
9926 *cost += 2 * extra_cost->alu.shift;
9928 *cost += (extra_cost->alu.shift
9929 + extra_cost->alu.shift_reg);
9932 /* Slightly disparage register shifts. */
9933 *cost += !CONST_INT_P (XEXP (x, 1));
9938 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9939 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9942 if (CONST_INT_P (XEXP (x, 1)))
9943 *cost += (2 * extra_cost->alu.shift
9944 + extra_cost->alu.log_shift);
9946 *cost += (extra_cost->alu.shift
9947 + extra_cost->alu.shift_reg
9948 + extra_cost->alu.log_shift_reg);
9954 *cost = LIBCALL_COST (2);
9963 *cost += extra_cost->alu.rev;
9970 /* No rev instruction available. Look at arm_legacy_rev
9971 and thumb_legacy_rev for the form of RTL used then. */
9974 *cost += COSTS_N_INSNS (9);
9978 *cost += 6 * extra_cost->alu.shift;
9979 *cost += 3 * extra_cost->alu.logical;
9984 *cost += COSTS_N_INSNS (4);
9988 *cost += 2 * extra_cost->alu.shift;
9989 *cost += extra_cost->alu.arith_shift;
9990 *cost += 2 * extra_cost->alu.logical;
9998 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9999 && (mode == SFmode || !TARGET_VFP_SINGLE))
10001 if (GET_CODE (XEXP (x, 0)) == MULT
10002 || GET_CODE (XEXP (x, 1)) == MULT)
10004 rtx mul_op0, mul_op1, sub_op;
10007 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10009 if (GET_CODE (XEXP (x, 0)) == MULT)
10011 mul_op0 = XEXP (XEXP (x, 0), 0);
10012 mul_op1 = XEXP (XEXP (x, 0), 1);
10013 sub_op = XEXP (x, 1);
10017 mul_op0 = XEXP (XEXP (x, 1), 0);
10018 mul_op1 = XEXP (XEXP (x, 1), 1);
10019 sub_op = XEXP (x, 0);
10022 /* The first operand of the multiply may be optionally
10024 if (GET_CODE (mul_op0) == NEG)
10025 mul_op0 = XEXP (mul_op0, 0);
10027 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10028 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10029 + rtx_cost (sub_op, mode, code, 0, speed_p));
10035 *cost += extra_cost->fp[mode != SFmode].addsub;
10039 if (mode == SImode)
10041 rtx shift_by_reg = NULL;
10045 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
10046 if (shift_op == NULL)
10048 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
10049 non_shift_op = XEXP (x, 0);
10052 non_shift_op = XEXP (x, 1);
10054 if (shift_op != NULL)
10056 if (shift_by_reg != NULL)
10059 *cost += extra_cost->alu.arith_shift_reg;
10060 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10063 *cost += extra_cost->alu.arith_shift;
10065 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10066 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10070 if (arm_arch_thumb2
10071 && GET_CODE (XEXP (x, 1)) == MULT)
10075 *cost += extra_cost->mult[0].add;
10076 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10077 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10078 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10082 if (CONST_INT_P (XEXP (x, 0)))
10084 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10085 INTVAL (XEXP (x, 0)), NULL_RTX,
10087 *cost = COSTS_N_INSNS (insns);
10089 *cost += insns * extra_cost->alu.arith;
10090 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10094 *cost += extra_cost->alu.arith;
10099 if (GET_MODE_CLASS (mode) == MODE_INT
10100 && GET_MODE_SIZE (mode) < 4)
10102 rtx shift_op, shift_reg;
10105 /* We check both sides of the MINUS for shifter operands since,
10106 unlike PLUS, it's not commutative. */
10108 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10109 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10111 /* Slightly disparage, as we might need to widen the result. */
10114 *cost += extra_cost->alu.arith;
10116 if (CONST_INT_P (XEXP (x, 0)))
10118 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10125 if (mode == DImode)
10127 *cost += COSTS_N_INSNS (1);
10129 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10131 rtx op1 = XEXP (x, 1);
10134 *cost += 2 * extra_cost->alu.arith;
10136 if (GET_CODE (op1) == ZERO_EXTEND)
10137 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10140 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10141 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10145 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10148 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10149 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10151 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10154 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10155 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10158 *cost += (extra_cost->alu.arith
10159 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10160 ? extra_cost->alu.arith
10161 : extra_cost->alu.arith_shift));
10162 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10163 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10164 GET_CODE (XEXP (x, 1)), 0, speed_p));
10169 *cost += 2 * extra_cost->alu.arith;
10175 *cost = LIBCALL_COST (2);
10179 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10180 && (mode == SFmode || !TARGET_VFP_SINGLE))
10182 if (GET_CODE (XEXP (x, 0)) == MULT)
10184 rtx mul_op0, mul_op1, add_op;
10187 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10189 mul_op0 = XEXP (XEXP (x, 0), 0);
10190 mul_op1 = XEXP (XEXP (x, 0), 1);
10191 add_op = XEXP (x, 1);
10193 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10194 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10195 + rtx_cost (add_op, mode, code, 0, speed_p));
10201 *cost += extra_cost->fp[mode != SFmode].addsub;
10204 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10206 *cost = LIBCALL_COST (2);
10210 /* Narrow modes can be synthesized in SImode, but the range
10211 of useful sub-operations is limited. Check for shift operations
10212 on one of the operands. Only left shifts can be used in the
10214 if (GET_MODE_CLASS (mode) == MODE_INT
10215 && GET_MODE_SIZE (mode) < 4)
10217 rtx shift_op, shift_reg;
10220 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10222 if (CONST_INT_P (XEXP (x, 1)))
10224 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10225 INTVAL (XEXP (x, 1)), NULL_RTX,
10227 *cost = COSTS_N_INSNS (insns);
10229 *cost += insns * extra_cost->alu.arith;
10230 /* Slightly penalize a narrow operation as the result may
10232 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10236 /* Slightly penalize a narrow operation as the result may
10240 *cost += extra_cost->alu.arith;
10245 if (mode == SImode)
10247 rtx shift_op, shift_reg;
10249 if (TARGET_INT_SIMD
10250 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10251 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10253 /* UXTA[BH] or SXTA[BH]. */
10255 *cost += extra_cost->alu.extend_arith;
10256 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10258 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10263 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10264 if (shift_op != NULL)
10269 *cost += extra_cost->alu.arith_shift_reg;
10270 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10273 *cost += extra_cost->alu.arith_shift;
10275 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10276 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10279 if (GET_CODE (XEXP (x, 0)) == MULT)
10281 rtx mul_op = XEXP (x, 0);
10283 if (TARGET_DSP_MULTIPLY
10284 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10285 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10286 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10287 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10288 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10289 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10290 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10291 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10292 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10293 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10294 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10295 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10298 /* SMLA[BT][BT]. */
10300 *cost += extra_cost->mult[0].extend_add;
10301 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10302 SIGN_EXTEND, 0, speed_p)
10303 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10304 SIGN_EXTEND, 0, speed_p)
10305 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10310 *cost += extra_cost->mult[0].add;
10311 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10312 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10313 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10316 if (CONST_INT_P (XEXP (x, 1)))
10318 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10319 INTVAL (XEXP (x, 1)), NULL_RTX,
10321 *cost = COSTS_N_INSNS (insns);
10323 *cost += insns * extra_cost->alu.arith;
10324 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10328 *cost += extra_cost->alu.arith;
10333 if (mode == DImode)
10335 if (GET_CODE (XEXP (x, 0)) == MULT
10336 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10337 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10338 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10339 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10342 *cost += extra_cost->mult[1].extend_add;
10343 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10344 ZERO_EXTEND, 0, speed_p)
10345 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10346 ZERO_EXTEND, 0, speed_p)
10347 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10351 *cost += COSTS_N_INSNS (1);
10353 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10354 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10357 *cost += (extra_cost->alu.arith
10358 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10359 ? extra_cost->alu.arith
10360 : extra_cost->alu.arith_shift));
10362 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10364 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10369 *cost += 2 * extra_cost->alu.arith;
10374 *cost = LIBCALL_COST (2);
10377 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10380 *cost += extra_cost->alu.rev;
10384 /* Fall through. */
10385 case AND: case XOR:
10386 if (mode == SImode)
10388 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10389 rtx op0 = XEXP (x, 0);
10390 rtx shift_op, shift_reg;
10394 || (code == IOR && TARGET_THUMB2)))
10395 op0 = XEXP (op0, 0);
10398 shift_op = shifter_op_p (op0, &shift_reg);
10399 if (shift_op != NULL)
10404 *cost += extra_cost->alu.log_shift_reg;
10405 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10408 *cost += extra_cost->alu.log_shift;
10410 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10411 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10415 if (CONST_INT_P (XEXP (x, 1)))
10417 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10418 INTVAL (XEXP (x, 1)), NULL_RTX,
10421 *cost = COSTS_N_INSNS (insns);
10423 *cost += insns * extra_cost->alu.logical;
10424 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10429 *cost += extra_cost->alu.logical;
10430 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10431 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10435 if (mode == DImode)
10437 rtx op0 = XEXP (x, 0);
10438 enum rtx_code subcode = GET_CODE (op0);
10440 *cost += COSTS_N_INSNS (1);
10444 || (code == IOR && TARGET_THUMB2)))
10445 op0 = XEXP (op0, 0);
10447 if (GET_CODE (op0) == ZERO_EXTEND)
10450 *cost += 2 * extra_cost->alu.logical;
10452 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10454 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10457 else if (GET_CODE (op0) == SIGN_EXTEND)
10460 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10462 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10464 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10469 *cost += 2 * extra_cost->alu.logical;
10475 *cost = LIBCALL_COST (2);
10479 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10480 && (mode == SFmode || !TARGET_VFP_SINGLE))
10482 rtx op0 = XEXP (x, 0);
10484 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10485 op0 = XEXP (op0, 0);
10488 *cost += extra_cost->fp[mode != SFmode].mult;
10490 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10491 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10494 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10496 *cost = LIBCALL_COST (2);
10500 if (mode == SImode)
10502 if (TARGET_DSP_MULTIPLY
10503 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10504 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10505 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10506 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10507 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10508 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10509 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10510 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10511 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10512 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10513 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10514 && (INTVAL (XEXP (XEXP (x, 1), 1))
10517 /* SMUL[TB][TB]. */
10519 *cost += extra_cost->mult[0].extend;
10520 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10521 SIGN_EXTEND, 0, speed_p);
10522 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10523 SIGN_EXTEND, 1, speed_p);
10527 *cost += extra_cost->mult[0].simple;
10531 if (mode == DImode)
10533 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10534 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10535 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10536 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10539 *cost += extra_cost->mult[1].extend;
10540 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10541 ZERO_EXTEND, 0, speed_p)
10542 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10543 ZERO_EXTEND, 0, speed_p));
10547 *cost = LIBCALL_COST (2);
10552 *cost = LIBCALL_COST (2);
10556 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10557 && (mode == SFmode || !TARGET_VFP_SINGLE))
10559 if (GET_CODE (XEXP (x, 0)) == MULT)
10562 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10567 *cost += extra_cost->fp[mode != SFmode].neg;
10571 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10573 *cost = LIBCALL_COST (1);
10577 if (mode == SImode)
10579 if (GET_CODE (XEXP (x, 0)) == ABS)
10581 *cost += COSTS_N_INSNS (1);
10582 /* Assume the non-flag-changing variant. */
10584 *cost += (extra_cost->alu.log_shift
10585 + extra_cost->alu.arith_shift);
10586 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10590 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10591 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10593 *cost += COSTS_N_INSNS (1);
10594 /* No extra cost for MOV imm and MVN imm. */
10595 /* If the comparison op is using the flags, there's no further
10596 cost, otherwise we need to add the cost of the comparison. */
10597 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10598 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10599 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10601 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10602 *cost += (COSTS_N_INSNS (1)
10603 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10605 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10608 *cost += extra_cost->alu.arith;
10614 *cost += extra_cost->alu.arith;
10618 if (GET_MODE_CLASS (mode) == MODE_INT
10619 && GET_MODE_SIZE (mode) < 4)
10621 /* Slightly disparage, as we might need an extend operation. */
10624 *cost += extra_cost->alu.arith;
10628 if (mode == DImode)
10630 *cost += COSTS_N_INSNS (1);
10632 *cost += 2 * extra_cost->alu.arith;
10637 *cost = LIBCALL_COST (1);
10641 if (mode == SImode)
10644 rtx shift_reg = NULL;
10646 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10650 if (shift_reg != NULL)
10653 *cost += extra_cost->alu.log_shift_reg;
10654 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10657 *cost += extra_cost->alu.log_shift;
10658 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10663 *cost += extra_cost->alu.logical;
10666 if (mode == DImode)
10668 *cost += COSTS_N_INSNS (1);
10674 *cost += LIBCALL_COST (1);
10679 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10681 *cost += COSTS_N_INSNS (3);
10684 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10685 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10687 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10688 /* Assume that if one arm of the if_then_else is a register,
10689 that it will be tied with the result and eliminate the
10690 conditional insn. */
10691 if (REG_P (XEXP (x, 1)))
10693 else if (REG_P (XEXP (x, 2)))
10699 if (extra_cost->alu.non_exec_costs_exec)
10700 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10702 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10705 *cost += op1cost + op2cost;
10711 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10715 machine_mode op0mode;
10716 /* We'll mostly assume that the cost of a compare is the cost of the
10717 LHS. However, there are some notable exceptions. */
10719 /* Floating point compares are never done as side-effects. */
10720 op0mode = GET_MODE (XEXP (x, 0));
10721 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10722 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10725 *cost += extra_cost->fp[op0mode != SFmode].compare;
10727 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10729 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10735 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10737 *cost = LIBCALL_COST (2);
10741 /* DImode compares normally take two insns. */
10742 if (op0mode == DImode)
10744 *cost += COSTS_N_INSNS (1);
10746 *cost += 2 * extra_cost->alu.arith;
10750 if (op0mode == SImode)
10755 if (XEXP (x, 1) == const0_rtx
10756 && !(REG_P (XEXP (x, 0))
10757 || (GET_CODE (XEXP (x, 0)) == SUBREG
10758 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10760 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10762 /* Multiply operations that set the flags are often
10763 significantly more expensive. */
10765 && GET_CODE (XEXP (x, 0)) == MULT
10766 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10767 *cost += extra_cost->mult[0].flag_setting;
10770 && GET_CODE (XEXP (x, 0)) == PLUS
10771 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10772 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10774 *cost += extra_cost->mult[0].flag_setting;
10779 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10780 if (shift_op != NULL)
10782 if (shift_reg != NULL)
10784 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10787 *cost += extra_cost->alu.arith_shift_reg;
10790 *cost += extra_cost->alu.arith_shift;
10791 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10792 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10797 *cost += extra_cost->alu.arith;
10798 if (CONST_INT_P (XEXP (x, 1))
10799 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10801 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10809 *cost = LIBCALL_COST (2);
10832 if (outer_code == SET)
10834 /* Is it a store-flag operation? */
10835 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10836 && XEXP (x, 1) == const0_rtx)
10838 /* Thumb also needs an IT insn. */
10839 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10842 if (XEXP (x, 1) == const0_rtx)
10847 /* LSR Rd, Rn, #31. */
10849 *cost += extra_cost->alu.shift;
10859 *cost += COSTS_N_INSNS (1);
10863 /* RSBS T1, Rn, Rn, LSR #31
10865 *cost += COSTS_N_INSNS (1);
10867 *cost += extra_cost->alu.arith_shift;
10871 /* RSB Rd, Rn, Rn, ASR #1
10872 LSR Rd, Rd, #31. */
10873 *cost += COSTS_N_INSNS (1);
10875 *cost += (extra_cost->alu.arith_shift
10876 + extra_cost->alu.shift);
10882 *cost += COSTS_N_INSNS (1);
10884 *cost += extra_cost->alu.shift;
10888 /* Remaining cases are either meaningless or would take
10889 three insns anyway. */
10890 *cost = COSTS_N_INSNS (3);
10893 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10898 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10899 if (CONST_INT_P (XEXP (x, 1))
10900 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10902 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10909 /* Not directly inside a set. If it involves the condition code
10910 register it must be the condition for a branch, cond_exec or
10911 I_T_E operation. Since the comparison is performed elsewhere
10912 this is just the control part which has no additional
10914 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10915 && XEXP (x, 1) == const0_rtx)
10923 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10924 && (mode == SFmode || !TARGET_VFP_SINGLE))
10927 *cost += extra_cost->fp[mode != SFmode].neg;
10931 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10933 *cost = LIBCALL_COST (1);
10937 if (mode == SImode)
10940 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10944 *cost = LIBCALL_COST (1);
10948 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10949 && MEM_P (XEXP (x, 0)))
10951 if (mode == DImode)
10952 *cost += COSTS_N_INSNS (1);
10957 if (GET_MODE (XEXP (x, 0)) == SImode)
10958 *cost += extra_cost->ldst.load;
10960 *cost += extra_cost->ldst.load_sign_extend;
10962 if (mode == DImode)
10963 *cost += extra_cost->alu.shift;
10968 /* Widening from less than 32-bits requires an extend operation. */
10969 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10971 /* We have SXTB/SXTH. */
10972 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10974 *cost += extra_cost->alu.extend;
10976 else if (GET_MODE (XEXP (x, 0)) != SImode)
10978 /* Needs two shifts. */
10979 *cost += COSTS_N_INSNS (1);
10980 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10982 *cost += 2 * extra_cost->alu.shift;
10985 /* Widening beyond 32-bits requires one more insn. */
10986 if (mode == DImode)
10988 *cost += COSTS_N_INSNS (1);
10990 *cost += extra_cost->alu.shift;
10997 || GET_MODE (XEXP (x, 0)) == SImode
10998 || GET_MODE (XEXP (x, 0)) == QImode)
10999 && MEM_P (XEXP (x, 0)))
11001 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11003 if (mode == DImode)
11004 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11009 /* Widening from less than 32-bits requires an extend operation. */
11010 if (GET_MODE (XEXP (x, 0)) == QImode)
11012 /* UXTB can be a shorter instruction in Thumb2, but it might
11013 be slower than the AND Rd, Rn, #255 alternative. When
11014 optimizing for speed it should never be slower to use
11015 AND, and we don't really model 16-bit vs 32-bit insns
11018 *cost += extra_cost->alu.logical;
11020 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11022 /* We have UXTB/UXTH. */
11023 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11025 *cost += extra_cost->alu.extend;
11027 else if (GET_MODE (XEXP (x, 0)) != SImode)
11029 /* Needs two shifts. It's marginally preferable to use
11030 shifts rather than two BIC instructions as the second
11031 shift may merge with a subsequent insn as a shifter
11033 *cost = COSTS_N_INSNS (2);
11034 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11036 *cost += 2 * extra_cost->alu.shift;
11039 /* Widening beyond 32-bits requires one more insn. */
11040 if (mode == DImode)
11042 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11049 /* CONST_INT has no mode, so we cannot tell for sure how many
11050 insns are really going to be needed. The best we can do is
11051 look at the value passed. If it fits in SImode, then assume
11052 that's the mode it will be used for. Otherwise assume it
11053 will be used in DImode. */
11054 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11059 /* Avoid blowing up in arm_gen_constant (). */
11060 if (!(outer_code == PLUS
11061 || outer_code == AND
11062 || outer_code == IOR
11063 || outer_code == XOR
11064 || outer_code == MINUS))
11068 if (mode == SImode)
11070 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11071 INTVAL (x), NULL, NULL,
11077 *cost += COSTS_N_INSNS (arm_gen_constant
11078 (outer_code, SImode, NULL,
11079 trunc_int_for_mode (INTVAL (x), SImode),
11081 + arm_gen_constant (outer_code, SImode, NULL,
11082 INTVAL (x) >> 32, NULL,
11094 if (arm_arch_thumb2 && !flag_pic)
11095 *cost += COSTS_N_INSNS (1);
11097 *cost += extra_cost->ldst.load;
11100 *cost += COSTS_N_INSNS (1);
11104 *cost += COSTS_N_INSNS (1);
11106 *cost += extra_cost->alu.arith;
11112 *cost = COSTS_N_INSNS (4);
11117 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11118 && (mode == SFmode || !TARGET_VFP_SINGLE))
11120 if (vfp3_const_double_rtx (x))
11123 *cost += extra_cost->fp[mode == DFmode].fpconst;
11129 if (mode == DFmode)
11130 *cost += extra_cost->ldst.loadd;
11132 *cost += extra_cost->ldst.loadf;
11135 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11139 *cost = COSTS_N_INSNS (4);
11145 && TARGET_HARD_FLOAT
11146 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11147 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11148 *cost = COSTS_N_INSNS (1);
11150 *cost = COSTS_N_INSNS (4);
11155 /* When optimizing for size, we prefer constant pool entries to
11156 MOVW/MOVT pairs, so bump the cost of these slightly. */
11163 *cost += extra_cost->alu.clz;
11167 if (XEXP (x, 1) == const0_rtx)
11170 *cost += extra_cost->alu.log_shift;
11171 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11174 /* Fall through. */
11178 *cost += COSTS_N_INSNS (1);
11182 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11183 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11184 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11185 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11186 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11187 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11188 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11189 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11193 *cost += extra_cost->mult[1].extend;
11194 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11195 ZERO_EXTEND, 0, speed_p)
11196 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11197 ZERO_EXTEND, 0, speed_p));
11200 *cost = LIBCALL_COST (1);
11203 case UNSPEC_VOLATILE:
11205 return arm_unspec_cost (x, outer_code, speed_p, cost);
11208 /* Reading the PC is like reading any other register. Writing it
11209 is more expensive, but we take that into account elsewhere. */
11214 /* TODO: Simple zero_extract of bottom bits using AND. */
11215 /* Fall through. */
11219 && CONST_INT_P (XEXP (x, 1))
11220 && CONST_INT_P (XEXP (x, 2)))
11223 *cost += extra_cost->alu.bfx;
11224 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11227 /* Without UBFX/SBFX, need to resort to shift operations. */
11228 *cost += COSTS_N_INSNS (1);
11230 *cost += 2 * extra_cost->alu.shift;
11231 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11235 if (TARGET_HARD_FLOAT)
11238 *cost += extra_cost->fp[mode == DFmode].widen;
11240 && GET_MODE (XEXP (x, 0)) == HFmode)
11242 /* Pre v8, widening HF->DF is a two-step process, first
11243 widening to SFmode. */
11244 *cost += COSTS_N_INSNS (1);
11246 *cost += extra_cost->fp[0].widen;
11248 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11252 *cost = LIBCALL_COST (1);
11255 case FLOAT_TRUNCATE:
11256 if (TARGET_HARD_FLOAT)
11259 *cost += extra_cost->fp[mode == DFmode].narrow;
11260 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11262 /* Vector modes? */
11264 *cost = LIBCALL_COST (1);
11268 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11270 rtx op0 = XEXP (x, 0);
11271 rtx op1 = XEXP (x, 1);
11272 rtx op2 = XEXP (x, 2);
11275 /* vfms or vfnma. */
11276 if (GET_CODE (op0) == NEG)
11277 op0 = XEXP (op0, 0);
11279 /* vfnms or vfnma. */
11280 if (GET_CODE (op2) == NEG)
11281 op2 = XEXP (op2, 0);
11283 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11284 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11285 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11288 *cost += extra_cost->fp[mode ==DFmode].fma;
11293 *cost = LIBCALL_COST (3);
11298 if (TARGET_HARD_FLOAT)
11300 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11301 a vcvt fixed-point conversion. */
11302 if (code == FIX && mode == SImode
11303 && GET_CODE (XEXP (x, 0)) == FIX
11304 && GET_MODE (XEXP (x, 0)) == SFmode
11305 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11306 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11310 *cost += extra_cost->fp[0].toint;
11312 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11317 if (GET_MODE_CLASS (mode) == MODE_INT)
11319 mode = GET_MODE (XEXP (x, 0));
11321 *cost += extra_cost->fp[mode == DFmode].toint;
11322 /* Strip of the 'cost' of rounding towards zero. */
11323 if (GET_CODE (XEXP (x, 0)) == FIX)
11324 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11327 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11328 /* ??? Increase the cost to deal with transferring from
11329 FP -> CORE registers? */
11332 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11336 *cost += extra_cost->fp[mode == DFmode].roundint;
11339 /* Vector costs? */
11341 *cost = LIBCALL_COST (1);
11345 case UNSIGNED_FLOAT:
11346 if (TARGET_HARD_FLOAT)
11348 /* ??? Increase the cost to deal with transferring from CORE
11349 -> FP registers? */
11351 *cost += extra_cost->fp[mode == DFmode].fromint;
11354 *cost = LIBCALL_COST (1);
11362 /* Just a guess. Guess number of instructions in the asm
11363 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11364 though (see PR60663). */
11365 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11366 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11368 *cost = COSTS_N_INSNS (asm_length + num_operands);
11372 if (mode != VOIDmode)
11373 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11375 *cost = COSTS_N_INSNS (4); /* Who knows? */
11380 #undef HANDLE_NARROW_SHIFT_ARITH
11382 /* RTX costs entry point. */
11385 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11386 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11389 int code = GET_CODE (x);
11390 gcc_assert (current_tune->insn_extra_cost);
11392 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11393 (enum rtx_code) outer_code,
11394 current_tune->insn_extra_cost,
11397 if (dump_file && arm_verbose_cost)
11399 print_rtl_single (dump_file, x);
11400 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11401 *total, result ? "final" : "partial");
11406 /* All address computations that can be done are free, but rtx cost returns
11407 the same for practically all of them. So we weight the different types
11408 of address here in the order (most pref first):
11409 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11411 arm_arm_address_cost (rtx x)
11413 enum rtx_code c = GET_CODE (x);
11415 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11417 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11422 if (CONST_INT_P (XEXP (x, 1)))
11425 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11435 arm_thumb_address_cost (rtx x)
11437 enum rtx_code c = GET_CODE (x);
11442 && REG_P (XEXP (x, 0))
11443 && CONST_INT_P (XEXP (x, 1)))
11450 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11451 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11453 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11456 /* Adjust cost hook for XScale. */
11458 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11461 /* Some true dependencies can have a higher cost depending
11462 on precisely how certain input operands are used. */
11464 && recog_memoized (insn) >= 0
11465 && recog_memoized (dep) >= 0)
11467 int shift_opnum = get_attr_shift (insn);
11468 enum attr_type attr_type = get_attr_type (dep);
11470 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11471 operand for INSN. If we have a shifted input operand and the
11472 instruction we depend on is another ALU instruction, then we may
11473 have to account for an additional stall. */
11474 if (shift_opnum != 0
11475 && (attr_type == TYPE_ALU_SHIFT_IMM
11476 || attr_type == TYPE_ALUS_SHIFT_IMM
11477 || attr_type == TYPE_LOGIC_SHIFT_IMM
11478 || attr_type == TYPE_LOGICS_SHIFT_IMM
11479 || attr_type == TYPE_ALU_SHIFT_REG
11480 || attr_type == TYPE_ALUS_SHIFT_REG
11481 || attr_type == TYPE_LOGIC_SHIFT_REG
11482 || attr_type == TYPE_LOGICS_SHIFT_REG
11483 || attr_type == TYPE_MOV_SHIFT
11484 || attr_type == TYPE_MVN_SHIFT
11485 || attr_type == TYPE_MOV_SHIFT_REG
11486 || attr_type == TYPE_MVN_SHIFT_REG))
11488 rtx shifted_operand;
11491 /* Get the shifted operand. */
11492 extract_insn (insn);
11493 shifted_operand = recog_data.operand[shift_opnum];
11495 /* Iterate over all the operands in DEP. If we write an operand
11496 that overlaps with SHIFTED_OPERAND, then we have increase the
11497 cost of this dependency. */
11498 extract_insn (dep);
11499 preprocess_constraints (dep);
11500 for (opno = 0; opno < recog_data.n_operands; opno++)
11502 /* We can ignore strict inputs. */
11503 if (recog_data.operand_type[opno] == OP_IN)
11506 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11518 /* Adjust cost hook for Cortex A9. */
11520 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11530 case REG_DEP_OUTPUT:
11531 if (recog_memoized (insn) >= 0
11532 && recog_memoized (dep) >= 0)
11534 if (GET_CODE (PATTERN (insn)) == SET)
11537 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11539 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11541 enum attr_type attr_type_insn = get_attr_type (insn);
11542 enum attr_type attr_type_dep = get_attr_type (dep);
11544 /* By default all dependencies of the form
11547 have an extra latency of 1 cycle because
11548 of the input and output dependency in this
11549 case. However this gets modeled as an true
11550 dependency and hence all these checks. */
11551 if (REG_P (SET_DEST (PATTERN (insn)))
11552 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11554 /* FMACS is a special case where the dependent
11555 instruction can be issued 3 cycles before
11556 the normal latency in case of an output
11558 if ((attr_type_insn == TYPE_FMACS
11559 || attr_type_insn == TYPE_FMACD)
11560 && (attr_type_dep == TYPE_FMACS
11561 || attr_type_dep == TYPE_FMACD))
11563 if (dep_type == REG_DEP_OUTPUT)
11564 *cost = insn_default_latency (dep) - 3;
11566 *cost = insn_default_latency (dep);
11571 if (dep_type == REG_DEP_OUTPUT)
11572 *cost = insn_default_latency (dep) + 1;
11574 *cost = insn_default_latency (dep);
11584 gcc_unreachable ();
11590 /* Adjust cost hook for FA726TE. */
11592 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11595 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11596 have penalty of 3. */
11597 if (dep_type == REG_DEP_TRUE
11598 && recog_memoized (insn) >= 0
11599 && recog_memoized (dep) >= 0
11600 && get_attr_conds (dep) == CONDS_SET)
11602 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11603 if (get_attr_conds (insn) == CONDS_USE
11604 && get_attr_type (insn) != TYPE_BRANCH)
11610 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11611 || get_attr_conds (insn) == CONDS_USE)
11621 /* Implement TARGET_REGISTER_MOVE_COST.
11623 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11624 it is typically more expensive than a single memory access. We set
11625 the cost to less than two memory accesses so that floating
11626 point to integer conversion does not go through memory. */
11629 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11630 reg_class_t from, reg_class_t to)
11634 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11635 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11637 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11638 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11640 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11647 if (from == HI_REGS || to == HI_REGS)
11654 /* Implement TARGET_MEMORY_MOVE_COST. */
11657 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11658 bool in ATTRIBUTE_UNUSED)
11664 if (GET_MODE_SIZE (mode) < 4)
11667 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11671 /* Vectorizer cost model implementation. */
11673 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11675 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11677 int misalign ATTRIBUTE_UNUSED)
11681 switch (type_of_cost)
11684 return current_tune->vec_costs->scalar_stmt_cost;
11687 return current_tune->vec_costs->scalar_load_cost;
11690 return current_tune->vec_costs->scalar_store_cost;
11693 return current_tune->vec_costs->vec_stmt_cost;
11696 return current_tune->vec_costs->vec_align_load_cost;
11699 return current_tune->vec_costs->vec_store_cost;
11701 case vec_to_scalar:
11702 return current_tune->vec_costs->vec_to_scalar_cost;
11704 case scalar_to_vec:
11705 return current_tune->vec_costs->scalar_to_vec_cost;
11707 case unaligned_load:
11708 case vector_gather_load:
11709 return current_tune->vec_costs->vec_unalign_load_cost;
11711 case unaligned_store:
11712 case vector_scatter_store:
11713 return current_tune->vec_costs->vec_unalign_store_cost;
11715 case cond_branch_taken:
11716 return current_tune->vec_costs->cond_taken_branch_cost;
11718 case cond_branch_not_taken:
11719 return current_tune->vec_costs->cond_not_taken_branch_cost;
11722 case vec_promote_demote:
11723 return current_tune->vec_costs->vec_stmt_cost;
11725 case vec_construct:
11726 elements = TYPE_VECTOR_SUBPARTS (vectype);
11727 return elements / 2 + 1;
11730 gcc_unreachable ();
11734 /* Implement targetm.vectorize.add_stmt_cost. */
11737 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11738 struct _stmt_vec_info *stmt_info, int misalign,
11739 enum vect_cost_model_location where)
11741 unsigned *cost = (unsigned *) data;
11742 unsigned retval = 0;
11744 if (flag_vect_cost_model)
11746 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11747 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11749 /* Statements in an inner loop relative to the loop being
11750 vectorized are weighted more heavily. The value here is
11751 arbitrary and could potentially be improved with analysis. */
11752 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11753 count *= 50; /* FIXME. */
11755 retval = (unsigned) (count * stmt_cost);
11756 cost[where] += retval;
11762 /* Return true if and only if this insn can dual-issue only as older. */
11764 cortexa7_older_only (rtx_insn *insn)
11766 if (recog_memoized (insn) < 0)
11769 switch (get_attr_type (insn))
11771 case TYPE_ALU_DSP_REG:
11772 case TYPE_ALU_SREG:
11773 case TYPE_ALUS_SREG:
11774 case TYPE_LOGIC_REG:
11775 case TYPE_LOGICS_REG:
11777 case TYPE_ADCS_REG:
11782 case TYPE_SHIFT_IMM:
11783 case TYPE_SHIFT_REG:
11784 case TYPE_LOAD_BYTE:
11787 case TYPE_FFARITHS:
11789 case TYPE_FFARITHD:
11807 case TYPE_F_STORES:
11814 /* Return true if and only if this insn can dual-issue as younger. */
11816 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11818 if (recog_memoized (insn) < 0)
11821 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11825 switch (get_attr_type (insn))
11828 case TYPE_ALUS_IMM:
11829 case TYPE_LOGIC_IMM:
11830 case TYPE_LOGICS_IMM:
11835 case TYPE_MOV_SHIFT:
11836 case TYPE_MOV_SHIFT_REG:
11846 /* Look for an instruction that can dual issue only as an older
11847 instruction, and move it in front of any instructions that can
11848 dual-issue as younger, while preserving the relative order of all
11849 other instructions in the ready list. This is a hueuristic to help
11850 dual-issue in later cycles, by postponing issue of more flexible
11851 instructions. This heuristic may affect dual issue opportunities
11852 in the current cycle. */
11854 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11855 int *n_readyp, int clock)
11858 int first_older_only = -1, first_younger = -1;
11862 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11866 /* Traverse the ready list from the head (the instruction to issue
11867 first), and looking for the first instruction that can issue as
11868 younger and the first instruction that can dual-issue only as
11870 for (i = *n_readyp - 1; i >= 0; i--)
11872 rtx_insn *insn = ready[i];
11873 if (cortexa7_older_only (insn))
11875 first_older_only = i;
11877 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11880 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11884 /* Nothing to reorder because either no younger insn found or insn
11885 that can dual-issue only as older appears before any insn that
11886 can dual-issue as younger. */
11887 if (first_younger == -1)
11890 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11894 /* Nothing to reorder because no older-only insn in the ready list. */
11895 if (first_older_only == -1)
11898 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11902 /* Move first_older_only insn before first_younger. */
11904 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11905 INSN_UID(ready [first_older_only]),
11906 INSN_UID(ready [first_younger]));
11907 rtx_insn *first_older_only_insn = ready [first_older_only];
11908 for (i = first_older_only; i < first_younger; i++)
11910 ready[i] = ready[i+1];
11913 ready[i] = first_older_only_insn;
11917 /* Implement TARGET_SCHED_REORDER. */
11919 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11924 case TARGET_CPU_cortexa7:
11925 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11928 /* Do nothing for other cores. */
11932 return arm_issue_rate ();
11935 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11936 It corrects the value of COST based on the relationship between
11937 INSN and DEP through the dependence LINK. It returns the new
11938 value. There is a per-core adjust_cost hook to adjust scheduler costs
11939 and the per-core hook can choose to completely override the generic
11940 adjust_cost function. Only put bits of code into arm_adjust_cost that
11941 are common across all cores. */
11943 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11948 /* When generating Thumb-1 code, we want to place flag-setting operations
11949 close to a conditional branch which depends on them, so that we can
11950 omit the comparison. */
11953 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11954 && recog_memoized (dep) >= 0
11955 && get_attr_conds (dep) == CONDS_SET)
11958 if (current_tune->sched_adjust_cost != NULL)
11960 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11964 /* XXX Is this strictly true? */
11965 if (dep_type == REG_DEP_ANTI
11966 || dep_type == REG_DEP_OUTPUT)
11969 /* Call insns don't incur a stall, even if they follow a load. */
11974 if ((i_pat = single_set (insn)) != NULL
11975 && MEM_P (SET_SRC (i_pat))
11976 && (d_pat = single_set (dep)) != NULL
11977 && MEM_P (SET_DEST (d_pat)))
11979 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11980 /* This is a load after a store, there is no conflict if the load reads
11981 from a cached area. Assume that loads from the stack, and from the
11982 constant pool are cached, and that others will miss. This is a
11985 if ((GET_CODE (src_mem) == SYMBOL_REF
11986 && CONSTANT_POOL_ADDRESS_P (src_mem))
11987 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11988 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11989 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11997 arm_max_conditional_execute (void)
11999 return max_insns_skipped;
12003 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12006 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12008 return (optimize > 0) ? 2 : 0;
12012 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12014 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12017 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12018 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12019 sequences of non-executed instructions in IT blocks probably take the same
12020 amount of time as executed instructions (and the IT instruction itself takes
12021 space in icache). This function was experimentally determined to give good
12022 results on a popular embedded benchmark. */
12025 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12027 return (TARGET_32BIT && speed_p) ? 1
12028 : arm_default_branch_cost (speed_p, predictable_p);
12032 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12034 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12037 static bool fp_consts_inited = false;
12039 static REAL_VALUE_TYPE value_fp0;
12042 init_fp_table (void)
12046 r = REAL_VALUE_ATOF ("0", DFmode);
12048 fp_consts_inited = true;
12051 /* Return TRUE if rtx X is a valid immediate FP constant. */
12053 arm_const_double_rtx (rtx x)
12055 const REAL_VALUE_TYPE *r;
12057 if (!fp_consts_inited)
12060 r = CONST_DOUBLE_REAL_VALUE (x);
12061 if (REAL_VALUE_MINUS_ZERO (*r))
12064 if (real_equal (r, &value_fp0))
12070 /* VFPv3 has a fairly wide range of representable immediates, formed from
12071 "quarter-precision" floating-point values. These can be evaluated using this
12072 formula (with ^ for exponentiation):
12076 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12077 16 <= n <= 31 and 0 <= r <= 7.
12079 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12081 - A (most-significant) is the sign bit.
12082 - BCD are the exponent (encoded as r XOR 3).
12083 - EFGH are the mantissa (encoded as n - 16).
12086 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12087 fconst[sd] instruction, or -1 if X isn't suitable. */
12089 vfp3_const_double_index (rtx x)
12091 REAL_VALUE_TYPE r, m;
12092 int sign, exponent;
12093 unsigned HOST_WIDE_INT mantissa, mant_hi;
12094 unsigned HOST_WIDE_INT mask;
12095 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12098 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12101 r = *CONST_DOUBLE_REAL_VALUE (x);
12103 /* We can't represent these things, so detect them first. */
12104 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12107 /* Extract sign, exponent and mantissa. */
12108 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12109 r = real_value_abs (&r);
12110 exponent = REAL_EXP (&r);
12111 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12112 highest (sign) bit, with a fixed binary point at bit point_pos.
12113 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12114 bits for the mantissa, this may fail (low bits would be lost). */
12115 real_ldexp (&m, &r, point_pos - exponent);
12116 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12117 mantissa = w.elt (0);
12118 mant_hi = w.elt (1);
12120 /* If there are bits set in the low part of the mantissa, we can't
12121 represent this value. */
12125 /* Now make it so that mantissa contains the most-significant bits, and move
12126 the point_pos to indicate that the least-significant bits have been
12128 point_pos -= HOST_BITS_PER_WIDE_INT;
12129 mantissa = mant_hi;
12131 /* We can permit four significant bits of mantissa only, plus a high bit
12132 which is always 1. */
12133 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12134 if ((mantissa & mask) != 0)
12137 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12138 mantissa >>= point_pos - 5;
12140 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12141 floating-point immediate zero with Neon using an integer-zero load, but
12142 that case is handled elsewhere.) */
12146 gcc_assert (mantissa >= 16 && mantissa <= 31);
12148 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12149 normalized significands are in the range [1, 2). (Our mantissa is shifted
12150 left 4 places at this point relative to normalized IEEE754 values). GCC
12151 internally uses [0.5, 1) (see real.c), so the exponent returned from
12152 REAL_EXP must be altered. */
12153 exponent = 5 - exponent;
12155 if (exponent < 0 || exponent > 7)
12158 /* Sign, mantissa and exponent are now in the correct form to plug into the
12159 formula described in the comment above. */
12160 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12163 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12165 vfp3_const_double_rtx (rtx x)
12170 return vfp3_const_double_index (x) != -1;
12173 /* Recognize immediates which can be used in various Neon instructions. Legal
12174 immediates are described by the following table (for VMVN variants, the
12175 bitwise inverse of the constant shown is recognized. In either case, VMOV
12176 is output and the correct instruction to use for a given constant is chosen
12177 by the assembler). The constant shown is replicated across all elements of
12178 the destination vector.
12180 insn elems variant constant (binary)
12181 ---- ----- ------- -----------------
12182 vmov i32 0 00000000 00000000 00000000 abcdefgh
12183 vmov i32 1 00000000 00000000 abcdefgh 00000000
12184 vmov i32 2 00000000 abcdefgh 00000000 00000000
12185 vmov i32 3 abcdefgh 00000000 00000000 00000000
12186 vmov i16 4 00000000 abcdefgh
12187 vmov i16 5 abcdefgh 00000000
12188 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12189 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12190 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12191 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12192 vmvn i16 10 00000000 abcdefgh
12193 vmvn i16 11 abcdefgh 00000000
12194 vmov i32 12 00000000 00000000 abcdefgh 11111111
12195 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12196 vmov i32 14 00000000 abcdefgh 11111111 11111111
12197 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12198 vmov i8 16 abcdefgh
12199 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12200 eeeeeeee ffffffff gggggggg hhhhhhhh
12201 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12202 vmov f32 19 00000000 00000000 00000000 00000000
12204 For case 18, B = !b. Representable values are exactly those accepted by
12205 vfp3_const_double_index, but are output as floating-point numbers rather
12208 For case 19, we will change it to vmov.i32 when assembling.
12210 Variants 0-5 (inclusive) may also be used as immediates for the second
12211 operand of VORR/VBIC instructions.
12213 The INVERSE argument causes the bitwise inverse of the given operand to be
12214 recognized instead (used for recognizing legal immediates for the VAND/VORN
12215 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12216 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12217 output, rather than the real insns vbic/vorr).
12219 INVERSE makes no difference to the recognition of float vectors.
12221 The return value is the variant of immediate as shown in the above table, or
12222 -1 if the given value doesn't match any of the listed patterns.
12225 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12226 rtx *modconst, int *elementwidth)
12228 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12230 for (i = 0; i < idx; i += (STRIDE)) \
12235 immtype = (CLASS); \
12236 elsize = (ELSIZE); \
12240 unsigned int i, elsize = 0, idx = 0, n_elts;
12241 unsigned int innersize;
12242 unsigned char bytes[16];
12243 int immtype = -1, matches;
12244 unsigned int invmask = inverse ? 0xff : 0;
12245 bool vector = GET_CODE (op) == CONST_VECTOR;
12248 n_elts = CONST_VECTOR_NUNITS (op);
12252 gcc_assert (mode != VOIDmode);
12255 innersize = GET_MODE_UNIT_SIZE (mode);
12257 /* Vectors of float constants. */
12258 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12260 rtx el0 = CONST_VECTOR_ELT (op, 0);
12262 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12265 /* FP16 vectors cannot be represented. */
12266 if (GET_MODE_INNER (mode) == HFmode)
12269 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12270 are distinct in this context. */
12271 if (!const_vec_duplicate_p (op))
12275 *modconst = CONST_VECTOR_ELT (op, 0);
12280 if (el0 == CONST0_RTX (GET_MODE (el0)))
12286 /* The tricks done in the code below apply for little-endian vector layout.
12287 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12288 FIXME: Implement logic for big-endian vectors. */
12289 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12292 /* Splat vector constant out into a byte vector. */
12293 for (i = 0; i < n_elts; i++)
12295 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12296 unsigned HOST_WIDE_INT elpart;
12298 gcc_assert (CONST_INT_P (el));
12299 elpart = INTVAL (el);
12301 for (unsigned int byte = 0; byte < innersize; byte++)
12303 bytes[idx++] = (elpart & 0xff) ^ invmask;
12304 elpart >>= BITS_PER_UNIT;
12308 /* Sanity check. */
12309 gcc_assert (idx == GET_MODE_SIZE (mode));
12313 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12314 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12316 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12317 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12319 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12320 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12322 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12323 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12325 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12327 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12329 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12330 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12332 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12333 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12335 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12336 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12338 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12339 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12341 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12343 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12345 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12346 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12348 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12349 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12351 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12352 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12354 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12355 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12357 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12359 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12360 && bytes[i] == bytes[(i + 8) % idx]);
12368 *elementwidth = elsize;
12372 unsigned HOST_WIDE_INT imm = 0;
12374 /* Un-invert bytes of recognized vector, if necessary. */
12376 for (i = 0; i < idx; i++)
12377 bytes[i] ^= invmask;
12381 /* FIXME: Broken on 32-bit H_W_I hosts. */
12382 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12384 for (i = 0; i < 8; i++)
12385 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12386 << (i * BITS_PER_UNIT);
12388 *modconst = GEN_INT (imm);
12392 unsigned HOST_WIDE_INT imm = 0;
12394 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12395 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12397 *modconst = GEN_INT (imm);
12405 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12406 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12407 float elements), and a modified constant (whatever should be output for a
12408 VMOV) in *MODCONST. */
12411 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12412 rtx *modconst, int *elementwidth)
12416 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12422 *modconst = tmpconst;
12425 *elementwidth = tmpwidth;
12430 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12431 the immediate is valid, write a constant suitable for using as an operand
12432 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12433 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12436 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12437 rtx *modconst, int *elementwidth)
12441 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12443 if (retval < 0 || retval > 5)
12447 *modconst = tmpconst;
12450 *elementwidth = tmpwidth;
12455 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12456 the immediate is valid, write a constant suitable for using as an operand
12457 to VSHR/VSHL to *MODCONST and the corresponding element width to
12458 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12459 because they have different limitations. */
12462 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12463 rtx *modconst, int *elementwidth,
12466 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12467 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12468 unsigned HOST_WIDE_INT last_elt = 0;
12469 unsigned HOST_WIDE_INT maxshift;
12471 /* Split vector constant out into a byte vector. */
12472 for (i = 0; i < n_elts; i++)
12474 rtx el = CONST_VECTOR_ELT (op, i);
12475 unsigned HOST_WIDE_INT elpart;
12477 if (CONST_INT_P (el))
12478 elpart = INTVAL (el);
12479 else if (CONST_DOUBLE_P (el))
12482 gcc_unreachable ();
12484 if (i != 0 && elpart != last_elt)
12490 /* Shift less than element size. */
12491 maxshift = innersize * 8;
12495 /* Left shift immediate value can be from 0 to <size>-1. */
12496 if (last_elt >= maxshift)
12501 /* Right shift immediate value can be from 1 to <size>. */
12502 if (last_elt == 0 || last_elt > maxshift)
12507 *elementwidth = innersize * 8;
12510 *modconst = CONST_VECTOR_ELT (op, 0);
12515 /* Return a string suitable for output of Neon immediate logic operation
12519 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12520 int inverse, int quad)
12522 int width, is_valid;
12523 static char templ[40];
12525 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12527 gcc_assert (is_valid != 0);
12530 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12532 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12537 /* Return a string suitable for output of Neon immediate shift operation
12538 (VSHR or VSHL) MNEM. */
12541 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12542 machine_mode mode, int quad,
12545 int width, is_valid;
12546 static char templ[40];
12548 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12549 gcc_assert (is_valid != 0);
12552 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12554 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12559 /* Output a sequence of pairwise operations to implement a reduction.
12560 NOTE: We do "too much work" here, because pairwise operations work on two
12561 registers-worth of operands in one go. Unfortunately we can't exploit those
12562 extra calculations to do the full operation in fewer steps, I don't think.
12563 Although all vector elements of the result but the first are ignored, we
12564 actually calculate the same result in each of the elements. An alternative
12565 such as initially loading a vector with zero to use as each of the second
12566 operands would use up an additional register and take an extra instruction,
12567 for no particular gain. */
12570 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12571 rtx (*reduc) (rtx, rtx, rtx))
12573 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12576 for (i = parts / 2; i >= 1; i /= 2)
12578 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12579 emit_insn (reduc (dest, tmpsum, tmpsum));
12584 /* If VALS is a vector constant that can be loaded into a register
12585 using VDUP, generate instructions to do so and return an RTX to
12586 assign to the register. Otherwise return NULL_RTX. */
12589 neon_vdup_constant (rtx vals)
12591 machine_mode mode = GET_MODE (vals);
12592 machine_mode inner_mode = GET_MODE_INNER (mode);
12595 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12598 if (!const_vec_duplicate_p (vals, &x))
12599 /* The elements are not all the same. We could handle repeating
12600 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12601 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12605 /* We can load this constant by using VDUP and a constant in a
12606 single ARM register. This will be cheaper than a vector
12609 x = copy_to_mode_reg (inner_mode, x);
12610 return gen_vec_duplicate (mode, x);
12613 /* Generate code to load VALS, which is a PARALLEL containing only
12614 constants (for vec_init) or CONST_VECTOR, efficiently into a
12615 register. Returns an RTX to copy into the register, or NULL_RTX
12616 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12619 neon_make_constant (rtx vals)
12621 machine_mode mode = GET_MODE (vals);
12623 rtx const_vec = NULL_RTX;
12624 int n_elts = GET_MODE_NUNITS (mode);
12628 if (GET_CODE (vals) == CONST_VECTOR)
12630 else if (GET_CODE (vals) == PARALLEL)
12632 /* A CONST_VECTOR must contain only CONST_INTs and
12633 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12634 Only store valid constants in a CONST_VECTOR. */
12635 for (i = 0; i < n_elts; ++i)
12637 rtx x = XVECEXP (vals, 0, i);
12638 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12641 if (n_const == n_elts)
12642 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12645 gcc_unreachable ();
12647 if (const_vec != NULL
12648 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12649 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12651 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12652 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12653 pipeline cycle; creating the constant takes one or two ARM
12654 pipeline cycles. */
12656 else if (const_vec != NULL_RTX)
12657 /* Load from constant pool. On Cortex-A8 this takes two cycles
12658 (for either double or quad vectors). We cannot take advantage
12659 of single-cycle VLD1 because we need a PC-relative addressing
12663 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12664 We cannot construct an initializer. */
12668 /* Initialize vector TARGET to VALS. */
12671 neon_expand_vector_init (rtx target, rtx vals)
12673 machine_mode mode = GET_MODE (target);
12674 machine_mode inner_mode = GET_MODE_INNER (mode);
12675 int n_elts = GET_MODE_NUNITS (mode);
12676 int n_var = 0, one_var = -1;
12677 bool all_same = true;
12681 for (i = 0; i < n_elts; ++i)
12683 x = XVECEXP (vals, 0, i);
12684 if (!CONSTANT_P (x))
12685 ++n_var, one_var = i;
12687 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12693 rtx constant = neon_make_constant (vals);
12694 if (constant != NULL_RTX)
12696 emit_move_insn (target, constant);
12701 /* Splat a single non-constant element if we can. */
12702 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12704 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12705 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12709 /* One field is non-constant. Load constant then overwrite varying
12710 field. This is more efficient than using the stack. */
12713 rtx copy = copy_rtx (vals);
12714 rtx merge_mask = GEN_INT (1 << one_var);
12716 /* Load constant part of vector, substitute neighboring value for
12717 varying element. */
12718 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12719 neon_expand_vector_init (target, copy);
12721 /* Insert variable. */
12722 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12723 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12727 /* Construct the vector in memory one field at a time
12728 and load the whole vector. */
12729 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12730 for (i = 0; i < n_elts; i++)
12731 emit_move_insn (adjust_address_nv (mem, inner_mode,
12732 i * GET_MODE_SIZE (inner_mode)),
12733 XVECEXP (vals, 0, i));
12734 emit_move_insn (target, mem);
12737 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12738 ERR if it doesn't. EXP indicates the source location, which includes the
12739 inlining history for intrinsics. */
12742 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12743 const_tree exp, const char *desc)
12745 HOST_WIDE_INT lane;
12747 gcc_assert (CONST_INT_P (operand));
12749 lane = INTVAL (operand);
12751 if (lane < low || lane >= high)
12754 error ("%K%s %wd out of range %wd - %wd",
12755 exp, desc, lane, low, high - 1);
12757 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12761 /* Bounds-check lanes. */
12764 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12767 bounds_check (operand, low, high, exp, "lane");
12770 /* Bounds-check constants. */
12773 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12775 bounds_check (operand, low, high, NULL_TREE, "constant");
12779 neon_element_bits (machine_mode mode)
12781 return GET_MODE_UNIT_BITSIZE (mode);
12785 /* Predicates for `match_operand' and `match_operator'. */
12787 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12788 WB is true if full writeback address modes are allowed and is false
12789 if limited writeback address modes (POST_INC and PRE_DEC) are
12793 arm_coproc_mem_operand (rtx op, bool wb)
12797 /* Reject eliminable registers. */
12798 if (! (reload_in_progress || reload_completed || lra_in_progress)
12799 && ( reg_mentioned_p (frame_pointer_rtx, op)
12800 || reg_mentioned_p (arg_pointer_rtx, op)
12801 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12802 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12803 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12804 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12807 /* Constants are converted into offsets from labels. */
12811 ind = XEXP (op, 0);
12813 if (reload_completed
12814 && (GET_CODE (ind) == LABEL_REF
12815 || (GET_CODE (ind) == CONST
12816 && GET_CODE (XEXP (ind, 0)) == PLUS
12817 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12818 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12821 /* Match: (mem (reg)). */
12823 return arm_address_register_rtx_p (ind, 0);
12825 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12826 acceptable in any case (subject to verification by
12827 arm_address_register_rtx_p). We need WB to be true to accept
12828 PRE_INC and POST_DEC. */
12829 if (GET_CODE (ind) == POST_INC
12830 || GET_CODE (ind) == PRE_DEC
12832 && (GET_CODE (ind) == PRE_INC
12833 || GET_CODE (ind) == POST_DEC)))
12834 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12837 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12838 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12839 && GET_CODE (XEXP (ind, 1)) == PLUS
12840 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12841 ind = XEXP (ind, 1);
12846 if (GET_CODE (ind) == PLUS
12847 && REG_P (XEXP (ind, 0))
12848 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12849 && CONST_INT_P (XEXP (ind, 1))
12850 && INTVAL (XEXP (ind, 1)) > -1024
12851 && INTVAL (XEXP (ind, 1)) < 1024
12852 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12858 /* Return TRUE if OP is a memory operand which we can load or store a vector
12859 to/from. TYPE is one of the following values:
12860 0 - Vector load/stor (vldr)
12861 1 - Core registers (ldm)
12862 2 - Element/structure loads (vld1)
12865 neon_vector_mem_operand (rtx op, int type, bool strict)
12869 /* Reject eliminable registers. */
12870 if (strict && ! (reload_in_progress || reload_completed)
12871 && (reg_mentioned_p (frame_pointer_rtx, op)
12872 || reg_mentioned_p (arg_pointer_rtx, op)
12873 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12874 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12875 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12876 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12879 /* Constants are converted into offsets from labels. */
12883 ind = XEXP (op, 0);
12885 if (reload_completed
12886 && (GET_CODE (ind) == LABEL_REF
12887 || (GET_CODE (ind) == CONST
12888 && GET_CODE (XEXP (ind, 0)) == PLUS
12889 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12890 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12893 /* Match: (mem (reg)). */
12895 return arm_address_register_rtx_p (ind, 0);
12897 /* Allow post-increment with Neon registers. */
12898 if ((type != 1 && GET_CODE (ind) == POST_INC)
12899 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12900 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12902 /* Allow post-increment by register for VLDn */
12903 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12904 && GET_CODE (XEXP (ind, 1)) == PLUS
12905 && REG_P (XEXP (XEXP (ind, 1), 1)))
12912 && GET_CODE (ind) == PLUS
12913 && REG_P (XEXP (ind, 0))
12914 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12915 && CONST_INT_P (XEXP (ind, 1))
12916 && INTVAL (XEXP (ind, 1)) > -1024
12917 /* For quad modes, we restrict the constant offset to be slightly less
12918 than what the instruction format permits. We have no such constraint
12919 on double mode offsets. (This must match arm_legitimate_index_p.) */
12920 && (INTVAL (XEXP (ind, 1))
12921 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12922 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12928 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12931 neon_struct_mem_operand (rtx op)
12935 /* Reject eliminable registers. */
12936 if (! (reload_in_progress || reload_completed)
12937 && ( reg_mentioned_p (frame_pointer_rtx, op)
12938 || reg_mentioned_p (arg_pointer_rtx, op)
12939 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12940 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12941 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12942 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12945 /* Constants are converted into offsets from labels. */
12949 ind = XEXP (op, 0);
12951 if (reload_completed
12952 && (GET_CODE (ind) == LABEL_REF
12953 || (GET_CODE (ind) == CONST
12954 && GET_CODE (XEXP (ind, 0)) == PLUS
12955 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12956 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12959 /* Match: (mem (reg)). */
12961 return arm_address_register_rtx_p (ind, 0);
12963 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12964 if (GET_CODE (ind) == POST_INC
12965 || GET_CODE (ind) == PRE_DEC)
12966 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12971 /* Prepares the operands for the VCMLA by lane instruction such that the right
12972 register number is selected. This instruction is special in that it always
12973 requires a D register, however there is a choice to be made between Dn[0],
12974 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12976 The VCMLA by lane function always selects two values. For instance given D0
12977 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12978 used by the instruction. However given V4SF then index 0 and 1 are valid as
12979 D0[0] or D1[0] are both valid.
12981 This function centralizes that information based on OPERANDS, OPERANDS[3]
12982 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12983 updated to contain the right index. */
12986 neon_vcmla_lane_prepare_operands (rtx *operands)
12988 int lane = INTVAL (operands[4]);
12989 machine_mode constmode = SImode;
12990 machine_mode mode = GET_MODE (operands[3]);
12991 int regno = REGNO (operands[3]);
12992 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
12993 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
12995 operands[3] = gen_int_mode (regno + 1, constmode);
12997 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13001 operands[3] = gen_int_mode (regno, constmode);
13002 operands[4] = gen_int_mode (lane, constmode);
13008 /* Return true if X is a register that will be eliminated later on. */
13010 arm_eliminable_register (rtx x)
13012 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13013 || REGNO (x) == ARG_POINTER_REGNUM
13014 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13015 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13018 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13019 coprocessor registers. Otherwise return NO_REGS. */
13022 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13024 if (mode == HFmode)
13026 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13027 return GENERAL_REGS;
13028 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13030 return GENERAL_REGS;
13033 /* The neon move patterns handle all legitimate vector and struct
13036 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13037 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13038 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13039 || VALID_NEON_STRUCT_MODE (mode)))
13042 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13045 return GENERAL_REGS;
13048 /* Values which must be returned in the most-significant end of the return
13052 arm_return_in_msb (const_tree valtype)
13054 return (TARGET_AAPCS_BASED
13055 && BYTES_BIG_ENDIAN
13056 && (AGGREGATE_TYPE_P (valtype)
13057 || TREE_CODE (valtype) == COMPLEX_TYPE
13058 || FIXED_POINT_TYPE_P (valtype)));
13061 /* Return TRUE if X references a SYMBOL_REF. */
13063 symbol_mentioned_p (rtx x)
13068 if (GET_CODE (x) == SYMBOL_REF)
13071 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13072 are constant offsets, not symbols. */
13073 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13076 fmt = GET_RTX_FORMAT (GET_CODE (x));
13078 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13084 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13085 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13088 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13095 /* Return TRUE if X references a LABEL_REF. */
13097 label_mentioned_p (rtx x)
13102 if (GET_CODE (x) == LABEL_REF)
13105 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13106 instruction, but they are constant offsets, not symbols. */
13107 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13110 fmt = GET_RTX_FORMAT (GET_CODE (x));
13111 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13117 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13118 if (label_mentioned_p (XVECEXP (x, i, j)))
13121 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13129 tls_mentioned_p (rtx x)
13131 switch (GET_CODE (x))
13134 return tls_mentioned_p (XEXP (x, 0));
13137 if (XINT (x, 1) == UNSPEC_TLS)
13140 /* Fall through. */
13146 /* Must not copy any rtx that uses a pc-relative address.
13147 Also, disallow copying of load-exclusive instructions that
13148 may appear after splitting of compare-and-swap-style operations
13149 so as to prevent those loops from being transformed away from their
13150 canonical forms (see PR 69904). */
13153 arm_cannot_copy_insn_p (rtx_insn *insn)
13155 /* The tls call insn cannot be copied, as it is paired with a data
13157 if (recog_memoized (insn) == CODE_FOR_tlscall)
13160 subrtx_iterator::array_type array;
13161 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13163 const_rtx x = *iter;
13164 if (GET_CODE (x) == UNSPEC
13165 && (XINT (x, 1) == UNSPEC_PIC_BASE
13166 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13170 rtx set = single_set (insn);
13173 rtx src = SET_SRC (set);
13174 if (GET_CODE (src) == ZERO_EXTEND)
13175 src = XEXP (src, 0);
13177 /* Catch the load-exclusive and load-acquire operations. */
13178 if (GET_CODE (src) == UNSPEC_VOLATILE
13179 && (XINT (src, 1) == VUNSPEC_LL
13180 || XINT (src, 1) == VUNSPEC_LAX))
13187 minmax_code (rtx x)
13189 enum rtx_code code = GET_CODE (x);
13202 gcc_unreachable ();
13206 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13209 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13210 int *mask, bool *signed_sat)
13212 /* The high bound must be a power of two minus one. */
13213 int log = exact_log2 (INTVAL (hi_bound) + 1);
13217 /* The low bound is either zero (for usat) or one less than the
13218 negation of the high bound (for ssat). */
13219 if (INTVAL (lo_bound) == 0)
13224 *signed_sat = false;
13229 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13234 *signed_sat = true;
13242 /* Return 1 if memory locations are adjacent. */
13244 adjacent_mem_locations (rtx a, rtx b)
13246 /* We don't guarantee to preserve the order of these memory refs. */
13247 if (volatile_refs_p (a) || volatile_refs_p (b))
13250 if ((REG_P (XEXP (a, 0))
13251 || (GET_CODE (XEXP (a, 0)) == PLUS
13252 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13253 && (REG_P (XEXP (b, 0))
13254 || (GET_CODE (XEXP (b, 0)) == PLUS
13255 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13257 HOST_WIDE_INT val0 = 0, val1 = 0;
13261 if (GET_CODE (XEXP (a, 0)) == PLUS)
13263 reg0 = XEXP (XEXP (a, 0), 0);
13264 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13267 reg0 = XEXP (a, 0);
13269 if (GET_CODE (XEXP (b, 0)) == PLUS)
13271 reg1 = XEXP (XEXP (b, 0), 0);
13272 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13275 reg1 = XEXP (b, 0);
13277 /* Don't accept any offset that will require multiple
13278 instructions to handle, since this would cause the
13279 arith_adjacentmem pattern to output an overlong sequence. */
13280 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13283 /* Don't allow an eliminable register: register elimination can make
13284 the offset too large. */
13285 if (arm_eliminable_register (reg0))
13288 val_diff = val1 - val0;
13292 /* If the target has load delay slots, then there's no benefit
13293 to using an ldm instruction unless the offset is zero and
13294 we are optimizing for size. */
13295 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13296 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13297 && (val_diff == 4 || val_diff == -4));
13300 return ((REGNO (reg0) == REGNO (reg1))
13301 && (val_diff == 4 || val_diff == -4));
13307 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13308 for load operations, false for store operations. CONSECUTIVE is true
13309 if the register numbers in the operation must be consecutive in the register
13310 bank. RETURN_PC is true if value is to be loaded in PC.
13311 The pattern we are trying to match for load is:
13312 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13313 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13316 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13319 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13320 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13321 3. If consecutive is TRUE, then for kth register being loaded,
13322 REGNO (R_dk) = REGNO (R_d0) + k.
13323 The pattern for store is similar. */
13325 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13326 bool consecutive, bool return_pc)
13328 HOST_WIDE_INT count = XVECLEN (op, 0);
13329 rtx reg, mem, addr;
13331 unsigned first_regno;
13332 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13334 bool addr_reg_in_reglist = false;
13335 bool update = false;
13340 /* If not in SImode, then registers must be consecutive
13341 (e.g., VLDM instructions for DFmode). */
13342 gcc_assert ((mode == SImode) || consecutive);
13343 /* Setting return_pc for stores is illegal. */
13344 gcc_assert (!return_pc || load);
13346 /* Set up the increments and the regs per val based on the mode. */
13347 reg_increment = GET_MODE_SIZE (mode);
13348 regs_per_val = reg_increment / 4;
13349 offset_adj = return_pc ? 1 : 0;
13352 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13353 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13356 /* Check if this is a write-back. */
13357 elt = XVECEXP (op, 0, offset_adj);
13358 if (GET_CODE (SET_SRC (elt)) == PLUS)
13364 /* The offset adjustment must be the number of registers being
13365 popped times the size of a single register. */
13366 if (!REG_P (SET_DEST (elt))
13367 || !REG_P (XEXP (SET_SRC (elt), 0))
13368 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13369 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13370 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13371 ((count - 1 - offset_adj) * reg_increment))
13375 i = i + offset_adj;
13376 base = base + offset_adj;
13377 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13378 success depends on the type: VLDM can do just one reg,
13379 LDM must do at least two. */
13380 if ((count <= i) && (mode == SImode))
13383 elt = XVECEXP (op, 0, i - 1);
13384 if (GET_CODE (elt) != SET)
13389 reg = SET_DEST (elt);
13390 mem = SET_SRC (elt);
13394 reg = SET_SRC (elt);
13395 mem = SET_DEST (elt);
13398 if (!REG_P (reg) || !MEM_P (mem))
13401 regno = REGNO (reg);
13402 first_regno = regno;
13403 addr = XEXP (mem, 0);
13404 if (GET_CODE (addr) == PLUS)
13406 if (!CONST_INT_P (XEXP (addr, 1)))
13409 offset = INTVAL (XEXP (addr, 1));
13410 addr = XEXP (addr, 0);
13416 /* Don't allow SP to be loaded unless it is also the base register. It
13417 guarantees that SP is reset correctly when an LDM instruction
13418 is interrupted. Otherwise, we might end up with a corrupt stack. */
13419 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13422 if (regno == REGNO (addr))
13423 addr_reg_in_reglist = true;
13425 for (; i < count; i++)
13427 elt = XVECEXP (op, 0, i);
13428 if (GET_CODE (elt) != SET)
13433 reg = SET_DEST (elt);
13434 mem = SET_SRC (elt);
13438 reg = SET_SRC (elt);
13439 mem = SET_DEST (elt);
13443 || GET_MODE (reg) != mode
13444 || REGNO (reg) <= regno
13447 (unsigned int) (first_regno + regs_per_val * (i - base))))
13448 /* Don't allow SP to be loaded unless it is also the base register. It
13449 guarantees that SP is reset correctly when an LDM instruction
13450 is interrupted. Otherwise, we might end up with a corrupt stack. */
13451 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13453 || GET_MODE (mem) != mode
13454 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13455 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13456 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13457 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13458 offset + (i - base) * reg_increment))
13459 && (!REG_P (XEXP (mem, 0))
13460 || offset + (i - base) * reg_increment != 0)))
13463 regno = REGNO (reg);
13464 if (regno == REGNO (addr))
13465 addr_reg_in_reglist = true;
13470 if (update && addr_reg_in_reglist)
13473 /* For Thumb-1, address register is always modified - either by write-back
13474 or by explicit load. If the pattern does not describe an update,
13475 then the address register must be in the list of loaded registers. */
13477 return update || addr_reg_in_reglist;
13483 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13484 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13485 instruction. ADD_OFFSET is nonzero if the base address register needs
13486 to be modified with an add instruction before we can use it. */
13489 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13490 int nops, HOST_WIDE_INT add_offset)
13492 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13493 if the offset isn't small enough. The reason 2 ldrs are faster
13494 is because these ARMs are able to do more than one cache access
13495 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13496 whilst the ARM8 has a double bandwidth cache. This means that
13497 these cores can do both an instruction fetch and a data fetch in
13498 a single cycle, so the trick of calculating the address into a
13499 scratch register (one of the result regs) and then doing a load
13500 multiple actually becomes slower (and no smaller in code size).
13501 That is the transformation
13503 ldr rd1, [rbase + offset]
13504 ldr rd2, [rbase + offset + 4]
13508 add rd1, rbase, offset
13509 ldmia rd1, {rd1, rd2}
13511 produces worse code -- '3 cycles + any stalls on rd2' instead of
13512 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13513 access per cycle, the first sequence could never complete in less
13514 than 6 cycles, whereas the ldm sequence would only take 5 and
13515 would make better use of sequential accesses if not hitting the
13518 We cheat here and test 'arm_ld_sched' which we currently know to
13519 only be true for the ARM8, ARM9 and StrongARM. If this ever
13520 changes, then the test below needs to be reworked. */
13521 if (nops == 2 && arm_ld_sched && add_offset != 0)
13524 /* XScale has load-store double instructions, but they have stricter
13525 alignment requirements than load-store multiple, so we cannot
13528 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13529 the pipeline until completion.
13537 An ldr instruction takes 1-3 cycles, but does not block the
13546 Best case ldr will always win. However, the more ldr instructions
13547 we issue, the less likely we are to be able to schedule them well.
13548 Using ldr instructions also increases code size.
13550 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13551 for counts of 3 or 4 regs. */
13552 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13557 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13558 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13559 an array ORDER which describes the sequence to use when accessing the
13560 offsets that produces an ascending order. In this sequence, each
13561 offset must be larger by exactly 4 than the previous one. ORDER[0]
13562 must have been filled in with the lowest offset by the caller.
13563 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13564 we use to verify that ORDER produces an ascending order of registers.
13565 Return true if it was possible to construct such an order, false if
13569 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13570 int *unsorted_regs)
13573 for (i = 1; i < nops; i++)
13577 order[i] = order[i - 1];
13578 for (j = 0; j < nops; j++)
13579 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13581 /* We must find exactly one offset that is higher than the
13582 previous one by 4. */
13583 if (order[i] != order[i - 1])
13587 if (order[i] == order[i - 1])
13589 /* The register numbers must be ascending. */
13590 if (unsorted_regs != NULL
13591 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13597 /* Used to determine in a peephole whether a sequence of load
13598 instructions can be changed into a load-multiple instruction.
13599 NOPS is the number of separate load instructions we are examining. The
13600 first NOPS entries in OPERANDS are the destination registers, the
13601 next NOPS entries are memory operands. If this function is
13602 successful, *BASE is set to the common base register of the memory
13603 accesses; *LOAD_OFFSET is set to the first memory location's offset
13604 from that base register.
13605 REGS is an array filled in with the destination register numbers.
13606 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13607 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13608 the sequence of registers in REGS matches the loads from ascending memory
13609 locations, and the function verifies that the register numbers are
13610 themselves ascending. If CHECK_REGS is false, the register numbers
13611 are stored in the order they are found in the operands. */
13613 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13614 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13616 int unsorted_regs[MAX_LDM_STM_OPS];
13617 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13618 int order[MAX_LDM_STM_OPS];
13622 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13623 easily extended if required. */
13624 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13626 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13628 /* Loop over the operands and check that the memory references are
13629 suitable (i.e. immediate offsets from the same base register). At
13630 the same time, extract the target register, and the memory
13632 for (i = 0; i < nops; i++)
13637 /* Convert a subreg of a mem into the mem itself. */
13638 if (GET_CODE (operands[nops + i]) == SUBREG)
13639 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13641 gcc_assert (MEM_P (operands[nops + i]));
13643 /* Don't reorder volatile memory references; it doesn't seem worth
13644 looking for the case where the order is ok anyway. */
13645 if (MEM_VOLATILE_P (operands[nops + i]))
13648 offset = const0_rtx;
13650 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13651 || (GET_CODE (reg) == SUBREG
13652 && REG_P (reg = SUBREG_REG (reg))))
13653 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13654 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13655 || (GET_CODE (reg) == SUBREG
13656 && REG_P (reg = SUBREG_REG (reg))))
13657 && (CONST_INT_P (offset
13658 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13662 base_reg = REGNO (reg);
13663 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13666 else if (base_reg != (int) REGNO (reg))
13667 /* Not addressed from the same base register. */
13670 unsorted_regs[i] = (REG_P (operands[i])
13671 ? REGNO (operands[i])
13672 : REGNO (SUBREG_REG (operands[i])));
13674 /* If it isn't an integer register, or if it overwrites the
13675 base register but isn't the last insn in the list, then
13676 we can't do this. */
13677 if (unsorted_regs[i] < 0
13678 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13679 || unsorted_regs[i] > 14
13680 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13683 /* Don't allow SP to be loaded unless it is also the base
13684 register. It guarantees that SP is reset correctly when
13685 an LDM instruction is interrupted. Otherwise, we might
13686 end up with a corrupt stack. */
13687 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13690 unsorted_offsets[i] = INTVAL (offset);
13691 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13695 /* Not a suitable memory address. */
13699 /* All the useful information has now been extracted from the
13700 operands into unsorted_regs and unsorted_offsets; additionally,
13701 order[0] has been set to the lowest offset in the list. Sort
13702 the offsets into order, verifying that they are adjacent, and
13703 check that the register numbers are ascending. */
13704 if (!compute_offset_order (nops, unsorted_offsets, order,
13705 check_regs ? unsorted_regs : NULL))
13709 memcpy (saved_order, order, sizeof order);
13715 for (i = 0; i < nops; i++)
13716 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13718 *load_offset = unsorted_offsets[order[0]];
13721 if (unsorted_offsets[order[0]] == 0)
13722 ldm_case = 1; /* ldmia */
13723 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13724 ldm_case = 2; /* ldmib */
13725 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13726 ldm_case = 3; /* ldmda */
13727 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13728 ldm_case = 4; /* ldmdb */
13729 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13730 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13735 if (!multiple_operation_profitable_p (false, nops,
13737 ? unsorted_offsets[order[0]] : 0))
13743 /* Used to determine in a peephole whether a sequence of store instructions can
13744 be changed into a store-multiple instruction.
13745 NOPS is the number of separate store instructions we are examining.
13746 NOPS_TOTAL is the total number of instructions recognized by the peephole
13748 The first NOPS entries in OPERANDS are the source registers, the next
13749 NOPS entries are memory operands. If this function is successful, *BASE is
13750 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13751 to the first memory location's offset from that base register. REGS is an
13752 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13753 likewise filled with the corresponding rtx's.
13754 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13755 numbers to an ascending order of stores.
13756 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13757 from ascending memory locations, and the function verifies that the register
13758 numbers are themselves ascending. If CHECK_REGS is false, the register
13759 numbers are stored in the order they are found in the operands. */
13761 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13762 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13763 HOST_WIDE_INT *load_offset, bool check_regs)
13765 int unsorted_regs[MAX_LDM_STM_OPS];
13766 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13767 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13768 int order[MAX_LDM_STM_OPS];
13770 rtx base_reg_rtx = NULL;
13773 /* Write back of base register is currently only supported for Thumb 1. */
13774 int base_writeback = TARGET_THUMB1;
13776 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13777 easily extended if required. */
13778 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13780 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13782 /* Loop over the operands and check that the memory references are
13783 suitable (i.e. immediate offsets from the same base register). At
13784 the same time, extract the target register, and the memory
13786 for (i = 0; i < nops; i++)
13791 /* Convert a subreg of a mem into the mem itself. */
13792 if (GET_CODE (operands[nops + i]) == SUBREG)
13793 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13795 gcc_assert (MEM_P (operands[nops + i]));
13797 /* Don't reorder volatile memory references; it doesn't seem worth
13798 looking for the case where the order is ok anyway. */
13799 if (MEM_VOLATILE_P (operands[nops + i]))
13802 offset = const0_rtx;
13804 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13805 || (GET_CODE (reg) == SUBREG
13806 && REG_P (reg = SUBREG_REG (reg))))
13807 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13808 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13809 || (GET_CODE (reg) == SUBREG
13810 && REG_P (reg = SUBREG_REG (reg))))
13811 && (CONST_INT_P (offset
13812 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13814 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13815 ? operands[i] : SUBREG_REG (operands[i]));
13816 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13820 base_reg = REGNO (reg);
13821 base_reg_rtx = reg;
13822 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13825 else if (base_reg != (int) REGNO (reg))
13826 /* Not addressed from the same base register. */
13829 /* If it isn't an integer register, then we can't do this. */
13830 if (unsorted_regs[i] < 0
13831 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13832 /* The effects are unpredictable if the base register is
13833 both updated and stored. */
13834 || (base_writeback && unsorted_regs[i] == base_reg)
13835 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13836 || unsorted_regs[i] > 14)
13839 unsorted_offsets[i] = INTVAL (offset);
13840 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13844 /* Not a suitable memory address. */
13848 /* All the useful information has now been extracted from the
13849 operands into unsorted_regs and unsorted_offsets; additionally,
13850 order[0] has been set to the lowest offset in the list. Sort
13851 the offsets into order, verifying that they are adjacent, and
13852 check that the register numbers are ascending. */
13853 if (!compute_offset_order (nops, unsorted_offsets, order,
13854 check_regs ? unsorted_regs : NULL))
13858 memcpy (saved_order, order, sizeof order);
13864 for (i = 0; i < nops; i++)
13866 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13868 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13871 *load_offset = unsorted_offsets[order[0]];
13875 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13878 if (unsorted_offsets[order[0]] == 0)
13879 stm_case = 1; /* stmia */
13880 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13881 stm_case = 2; /* stmib */
13882 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13883 stm_case = 3; /* stmda */
13884 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13885 stm_case = 4; /* stmdb */
13889 if (!multiple_operation_profitable_p (false, nops, 0))
13895 /* Routines for use in generating RTL. */
13897 /* Generate a load-multiple instruction. COUNT is the number of loads in
13898 the instruction; REGS and MEMS are arrays containing the operands.
13899 BASEREG is the base register to be used in addressing the memory operands.
13900 WBACK_OFFSET is nonzero if the instruction should update the base
13904 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13905 HOST_WIDE_INT wback_offset)
13910 if (!multiple_operation_profitable_p (false, count, 0))
13916 for (i = 0; i < count; i++)
13917 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13919 if (wback_offset != 0)
13920 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13922 seq = get_insns ();
13928 result = gen_rtx_PARALLEL (VOIDmode,
13929 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13930 if (wback_offset != 0)
13932 XVECEXP (result, 0, 0)
13933 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13938 for (j = 0; i < count; i++, j++)
13939 XVECEXP (result, 0, i)
13940 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13945 /* Generate a store-multiple instruction. COUNT is the number of stores in
13946 the instruction; REGS and MEMS are arrays containing the operands.
13947 BASEREG is the base register to be used in addressing the memory operands.
13948 WBACK_OFFSET is nonzero if the instruction should update the base
13952 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13953 HOST_WIDE_INT wback_offset)
13958 if (GET_CODE (basereg) == PLUS)
13959 basereg = XEXP (basereg, 0);
13961 if (!multiple_operation_profitable_p (false, count, 0))
13967 for (i = 0; i < count; i++)
13968 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13970 if (wback_offset != 0)
13971 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13973 seq = get_insns ();
13979 result = gen_rtx_PARALLEL (VOIDmode,
13980 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13981 if (wback_offset != 0)
13983 XVECEXP (result, 0, 0)
13984 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13989 for (j = 0; i < count; i++, j++)
13990 XVECEXP (result, 0, i)
13991 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13996 /* Generate either a load-multiple or a store-multiple instruction. This
13997 function can be used in situations where we can start with a single MEM
13998 rtx and adjust its address upwards.
13999 COUNT is the number of operations in the instruction, not counting a
14000 possible update of the base register. REGS is an array containing the
14002 BASEREG is the base register to be used in addressing the memory operands,
14003 which are constructed from BASEMEM.
14004 WRITE_BACK specifies whether the generated instruction should include an
14005 update of the base register.
14006 OFFSETP is used to pass an offset to and from this function; this offset
14007 is not used when constructing the address (instead BASEMEM should have an
14008 appropriate offset in its address), it is used only for setting
14009 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14012 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14013 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14015 rtx mems[MAX_LDM_STM_OPS];
14016 HOST_WIDE_INT offset = *offsetp;
14019 gcc_assert (count <= MAX_LDM_STM_OPS);
14021 if (GET_CODE (basereg) == PLUS)
14022 basereg = XEXP (basereg, 0);
14024 for (i = 0; i < count; i++)
14026 rtx addr = plus_constant (Pmode, basereg, i * 4);
14027 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14035 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14036 write_back ? 4 * count : 0);
14038 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14039 write_back ? 4 * count : 0);
14043 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14044 rtx basemem, HOST_WIDE_INT *offsetp)
14046 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14051 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14052 rtx basemem, HOST_WIDE_INT *offsetp)
14054 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14058 /* Called from a peephole2 expander to turn a sequence of loads into an
14059 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14060 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14061 is true if we can reorder the registers because they are used commutatively
14063 Returns true iff we could generate a new instruction. */
14066 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14068 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14069 rtx mems[MAX_LDM_STM_OPS];
14070 int i, j, base_reg;
14072 HOST_WIDE_INT offset;
14073 int write_back = FALSE;
14077 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14078 &base_reg, &offset, !sort_regs);
14084 for (i = 0; i < nops - 1; i++)
14085 for (j = i + 1; j < nops; j++)
14086 if (regs[i] > regs[j])
14092 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14096 gcc_assert (ldm_case == 1 || ldm_case == 5);
14098 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14100 for (i = 0; i < nops; i++)
14101 if (base_reg == regs[i])
14102 write_back = false;
14104 /* Ensure the base is dead if it is updated. */
14105 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14111 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14112 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14114 base_reg_rtx = newbase;
14117 for (i = 0; i < nops; i++)
14119 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14120 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14123 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14124 write_back ? offset + i * 4 : 0));
14128 /* Called from a peephole2 expander to turn a sequence of stores into an
14129 STM instruction. OPERANDS are the operands found by the peephole matcher;
14130 NOPS indicates how many separate stores we are trying to combine.
14131 Returns true iff we could generate a new instruction. */
14134 gen_stm_seq (rtx *operands, int nops)
14137 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14138 rtx mems[MAX_LDM_STM_OPS];
14141 HOST_WIDE_INT offset;
14142 int write_back = FALSE;
14145 bool base_reg_dies;
14147 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14148 mem_order, &base_reg, &offset, true);
14153 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14155 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14158 gcc_assert (base_reg_dies);
14164 gcc_assert (base_reg_dies);
14165 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14169 addr = plus_constant (Pmode, base_reg_rtx, offset);
14171 for (i = 0; i < nops; i++)
14173 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14174 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14177 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14178 write_back ? offset + i * 4 : 0));
14182 /* Called from a peephole2 expander to turn a sequence of stores that are
14183 preceded by constant loads into an STM instruction. OPERANDS are the
14184 operands found by the peephole matcher; NOPS indicates how many
14185 separate stores we are trying to combine; there are 2 * NOPS
14186 instructions in the peephole.
14187 Returns true iff we could generate a new instruction. */
14190 gen_const_stm_seq (rtx *operands, int nops)
14192 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14193 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14194 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14195 rtx mems[MAX_LDM_STM_OPS];
14198 HOST_WIDE_INT offset;
14199 int write_back = FALSE;
14202 bool base_reg_dies;
14204 HARD_REG_SET allocated;
14206 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14207 mem_order, &base_reg, &offset, false);
14212 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14214 /* If the same register is used more than once, try to find a free
14216 CLEAR_HARD_REG_SET (allocated);
14217 for (i = 0; i < nops; i++)
14219 for (j = i + 1; j < nops; j++)
14220 if (regs[i] == regs[j])
14222 rtx t = peep2_find_free_register (0, nops * 2,
14223 TARGET_THUMB1 ? "l" : "r",
14224 SImode, &allocated);
14228 regs[i] = REGNO (t);
14232 /* Compute an ordering that maps the register numbers to an ascending
14235 for (i = 0; i < nops; i++)
14236 if (regs[i] < regs[reg_order[0]])
14239 for (i = 1; i < nops; i++)
14241 int this_order = reg_order[i - 1];
14242 for (j = 0; j < nops; j++)
14243 if (regs[j] > regs[reg_order[i - 1]]
14244 && (this_order == reg_order[i - 1]
14245 || regs[j] < regs[this_order]))
14247 reg_order[i] = this_order;
14250 /* Ensure that registers that must be live after the instruction end
14251 up with the correct value. */
14252 for (i = 0; i < nops; i++)
14254 int this_order = reg_order[i];
14255 if ((this_order != mem_order[i]
14256 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14257 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14261 /* Load the constants. */
14262 for (i = 0; i < nops; i++)
14264 rtx op = operands[2 * nops + mem_order[i]];
14265 sorted_regs[i] = regs[reg_order[i]];
14266 emit_move_insn (reg_rtxs[reg_order[i]], op);
14269 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14271 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14274 gcc_assert (base_reg_dies);
14280 gcc_assert (base_reg_dies);
14281 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14285 addr = plus_constant (Pmode, base_reg_rtx, offset);
14287 for (i = 0; i < nops; i++)
14289 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14290 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14293 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14294 write_back ? offset + i * 4 : 0));
14298 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14299 unaligned copies on processors which support unaligned semantics for those
14300 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14301 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14302 An interleave factor of 1 (the minimum) will perform no interleaving.
14303 Load/store multiple are used for aligned addresses where possible. */
14306 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14307 HOST_WIDE_INT length,
14308 unsigned int interleave_factor)
14310 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14311 int *regnos = XALLOCAVEC (int, interleave_factor);
14312 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14313 HOST_WIDE_INT i, j;
14314 HOST_WIDE_INT remaining = length, words;
14315 rtx halfword_tmp = NULL, byte_tmp = NULL;
14317 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14318 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14319 HOST_WIDE_INT srcoffset, dstoffset;
14320 HOST_WIDE_INT src_autoinc, dst_autoinc;
14323 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14325 /* Use hard registers if we have aligned source or destination so we can use
14326 load/store multiple with contiguous registers. */
14327 if (dst_aligned || src_aligned)
14328 for (i = 0; i < interleave_factor; i++)
14329 regs[i] = gen_rtx_REG (SImode, i);
14331 for (i = 0; i < interleave_factor; i++)
14332 regs[i] = gen_reg_rtx (SImode);
14334 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14335 src = copy_addr_to_reg (XEXP (srcbase, 0));
14337 srcoffset = dstoffset = 0;
14339 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14340 For copying the last bytes we want to subtract this offset again. */
14341 src_autoinc = dst_autoinc = 0;
14343 for (i = 0; i < interleave_factor; i++)
14346 /* Copy BLOCK_SIZE_BYTES chunks. */
14348 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14351 if (src_aligned && interleave_factor > 1)
14353 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14354 TRUE, srcbase, &srcoffset));
14355 src_autoinc += UNITS_PER_WORD * interleave_factor;
14359 for (j = 0; j < interleave_factor; j++)
14361 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14363 mem = adjust_automodify_address (srcbase, SImode, addr,
14364 srcoffset + j * UNITS_PER_WORD);
14365 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14367 srcoffset += block_size_bytes;
14371 if (dst_aligned && interleave_factor > 1)
14373 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14374 TRUE, dstbase, &dstoffset));
14375 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14379 for (j = 0; j < interleave_factor; j++)
14381 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14383 mem = adjust_automodify_address (dstbase, SImode, addr,
14384 dstoffset + j * UNITS_PER_WORD);
14385 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14387 dstoffset += block_size_bytes;
14390 remaining -= block_size_bytes;
14393 /* Copy any whole words left (note these aren't interleaved with any
14394 subsequent halfword/byte load/stores in the interests of simplicity). */
14396 words = remaining / UNITS_PER_WORD;
14398 gcc_assert (words < interleave_factor);
14400 if (src_aligned && words > 1)
14402 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14404 src_autoinc += UNITS_PER_WORD * words;
14408 for (j = 0; j < words; j++)
14410 addr = plus_constant (Pmode, src,
14411 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14412 mem = adjust_automodify_address (srcbase, SImode, addr,
14413 srcoffset + j * UNITS_PER_WORD);
14415 emit_move_insn (regs[j], mem);
14417 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14419 srcoffset += words * UNITS_PER_WORD;
14422 if (dst_aligned && words > 1)
14424 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14426 dst_autoinc += words * UNITS_PER_WORD;
14430 for (j = 0; j < words; j++)
14432 addr = plus_constant (Pmode, dst,
14433 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14434 mem = adjust_automodify_address (dstbase, SImode, addr,
14435 dstoffset + j * UNITS_PER_WORD);
14437 emit_move_insn (mem, regs[j]);
14439 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14441 dstoffset += words * UNITS_PER_WORD;
14444 remaining -= words * UNITS_PER_WORD;
14446 gcc_assert (remaining < 4);
14448 /* Copy a halfword if necessary. */
14450 if (remaining >= 2)
14452 halfword_tmp = gen_reg_rtx (SImode);
14454 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14455 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14456 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14458 /* Either write out immediately, or delay until we've loaded the last
14459 byte, depending on interleave factor. */
14460 if (interleave_factor == 1)
14462 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14463 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14464 emit_insn (gen_unaligned_storehi (mem,
14465 gen_lowpart (HImode, halfword_tmp)));
14466 halfword_tmp = NULL;
14474 gcc_assert (remaining < 2);
14476 /* Copy last byte. */
14478 if ((remaining & 1) != 0)
14480 byte_tmp = gen_reg_rtx (SImode);
14482 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14483 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14484 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14486 if (interleave_factor == 1)
14488 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14489 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14490 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14499 /* Store last halfword if we haven't done so already. */
14503 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14504 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14505 emit_insn (gen_unaligned_storehi (mem,
14506 gen_lowpart (HImode, halfword_tmp)));
14510 /* Likewise for last byte. */
14514 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14515 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14516 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14520 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14523 /* From mips_adjust_block_mem:
14525 Helper function for doing a loop-based block operation on memory
14526 reference MEM. Each iteration of the loop will operate on LENGTH
14529 Create a new base register for use within the loop and point it to
14530 the start of MEM. Create a new memory reference that uses this
14531 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14534 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14537 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14539 /* Although the new mem does not refer to a known location,
14540 it does keep up to LENGTH bytes of alignment. */
14541 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14542 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14545 /* From mips_block_move_loop:
14547 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14548 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14549 the memory regions do not overlap. */
14552 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14553 unsigned int interleave_factor,
14554 HOST_WIDE_INT bytes_per_iter)
14556 rtx src_reg, dest_reg, final_src, test;
14557 HOST_WIDE_INT leftover;
14559 leftover = length % bytes_per_iter;
14560 length -= leftover;
14562 /* Create registers and memory references for use within the loop. */
14563 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14564 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14566 /* Calculate the value that SRC_REG should have after the last iteration of
14568 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14569 0, 0, OPTAB_WIDEN);
14571 /* Emit the start of the loop. */
14572 rtx_code_label *label = gen_label_rtx ();
14573 emit_label (label);
14575 /* Emit the loop body. */
14576 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14577 interleave_factor);
14579 /* Move on to the next block. */
14580 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14581 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14583 /* Emit the loop condition. */
14584 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14585 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14587 /* Mop up any left-over bytes. */
14589 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14592 /* Emit a block move when either the source or destination is unaligned (not
14593 aligned to a four-byte boundary). This may need further tuning depending on
14594 core type, optimize_size setting, etc. */
14597 arm_cpymemqi_unaligned (rtx *operands)
14599 HOST_WIDE_INT length = INTVAL (operands[2]);
14603 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14604 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14605 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14606 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14607 or dst_aligned though: allow more interleaving in those cases since the
14608 resulting code can be smaller. */
14609 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14610 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14613 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14614 interleave_factor, bytes_per_iter);
14616 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14617 interleave_factor);
14621 /* Note that the loop created by arm_block_move_unaligned_loop may be
14622 subject to loop unrolling, which makes tuning this condition a little
14625 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14627 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14634 arm_gen_cpymemqi (rtx *operands)
14636 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14637 HOST_WIDE_INT srcoffset, dstoffset;
14638 rtx src, dst, srcbase, dstbase;
14639 rtx part_bytes_reg = NULL;
14642 if (!CONST_INT_P (operands[2])
14643 || !CONST_INT_P (operands[3])
14644 || INTVAL (operands[2]) > 64)
14647 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14648 return arm_cpymemqi_unaligned (operands);
14650 if (INTVAL (operands[3]) & 3)
14653 dstbase = operands[0];
14654 srcbase = operands[1];
14656 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14657 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14659 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14660 out_words_to_go = INTVAL (operands[2]) / 4;
14661 last_bytes = INTVAL (operands[2]) & 3;
14662 dstoffset = srcoffset = 0;
14664 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14665 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14667 while (in_words_to_go >= 2)
14669 if (in_words_to_go > 4)
14670 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14671 TRUE, srcbase, &srcoffset));
14673 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14674 src, FALSE, srcbase,
14677 if (out_words_to_go)
14679 if (out_words_to_go > 4)
14680 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14681 TRUE, dstbase, &dstoffset));
14682 else if (out_words_to_go != 1)
14683 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14684 out_words_to_go, dst,
14687 dstbase, &dstoffset));
14690 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14691 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14692 if (last_bytes != 0)
14694 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14700 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14701 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14704 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14705 if (out_words_to_go)
14709 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14710 sreg = copy_to_reg (mem);
14712 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14713 emit_move_insn (mem, sreg);
14716 gcc_assert (!in_words_to_go); /* Sanity check */
14719 if (in_words_to_go)
14721 gcc_assert (in_words_to_go > 0);
14723 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14724 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14727 gcc_assert (!last_bytes || part_bytes_reg);
14729 if (BYTES_BIG_ENDIAN && last_bytes)
14731 rtx tmp = gen_reg_rtx (SImode);
14733 /* The bytes we want are in the top end of the word. */
14734 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14735 GEN_INT (8 * (4 - last_bytes))));
14736 part_bytes_reg = tmp;
14740 mem = adjust_automodify_address (dstbase, QImode,
14741 plus_constant (Pmode, dst,
14743 dstoffset + last_bytes - 1);
14744 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14748 tmp = gen_reg_rtx (SImode);
14749 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14750 part_bytes_reg = tmp;
14757 if (last_bytes > 1)
14759 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14760 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14764 rtx tmp = gen_reg_rtx (SImode);
14765 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14766 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14767 part_bytes_reg = tmp;
14774 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14775 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14782 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14785 next_consecutive_mem (rtx mem)
14787 machine_mode mode = GET_MODE (mem);
14788 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14789 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14791 return adjust_automodify_address (mem, mode, addr, offset);
14794 /* Copy using LDRD/STRD instructions whenever possible.
14795 Returns true upon success. */
14797 gen_cpymem_ldrd_strd (rtx *operands)
14799 unsigned HOST_WIDE_INT len;
14800 HOST_WIDE_INT align;
14801 rtx src, dst, base;
14803 bool src_aligned, dst_aligned;
14804 bool src_volatile, dst_volatile;
14806 gcc_assert (CONST_INT_P (operands[2]));
14807 gcc_assert (CONST_INT_P (operands[3]));
14809 len = UINTVAL (operands[2]);
14813 /* Maximum alignment we can assume for both src and dst buffers. */
14814 align = INTVAL (operands[3]);
14816 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14819 /* Place src and dst addresses in registers
14820 and update the corresponding mem rtx. */
14822 dst_volatile = MEM_VOLATILE_P (dst);
14823 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14824 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14825 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14828 src_volatile = MEM_VOLATILE_P (src);
14829 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14830 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14831 src = adjust_automodify_address (src, VOIDmode, base, 0);
14833 if (!unaligned_access && !(src_aligned && dst_aligned))
14836 if (src_volatile || dst_volatile)
14839 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14840 if (!(dst_aligned || src_aligned))
14841 return arm_gen_cpymemqi (operands);
14843 /* If the either src or dst is unaligned we'll be accessing it as pairs
14844 of unaligned SImode accesses. Otherwise we can generate DImode
14845 ldrd/strd instructions. */
14846 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14847 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14852 reg0 = gen_reg_rtx (DImode);
14853 rtx low_reg = NULL_RTX;
14854 rtx hi_reg = NULL_RTX;
14856 if (!src_aligned || !dst_aligned)
14858 low_reg = gen_lowpart (SImode, reg0);
14859 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14861 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
14862 emit_move_insn (reg0, src);
14863 else if (src_aligned)
14864 emit_insn (gen_unaligned_loaddi (reg0, src));
14867 emit_insn (gen_unaligned_loadsi (low_reg, src));
14868 src = next_consecutive_mem (src);
14869 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14872 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
14873 emit_move_insn (dst, reg0);
14874 else if (dst_aligned)
14875 emit_insn (gen_unaligned_storedi (dst, reg0));
14878 emit_insn (gen_unaligned_storesi (dst, low_reg));
14879 dst = next_consecutive_mem (dst);
14880 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14883 src = next_consecutive_mem (src);
14884 dst = next_consecutive_mem (dst);
14887 gcc_assert (len < 8);
14890 /* More than a word but less than a double-word to copy. Copy a word. */
14891 reg0 = gen_reg_rtx (SImode);
14892 src = adjust_address (src, SImode, 0);
14893 dst = adjust_address (dst, SImode, 0);
14895 emit_move_insn (reg0, src);
14897 emit_insn (gen_unaligned_loadsi (reg0, src));
14900 emit_move_insn (dst, reg0);
14902 emit_insn (gen_unaligned_storesi (dst, reg0));
14904 src = next_consecutive_mem (src);
14905 dst = next_consecutive_mem (dst);
14912 /* Copy the remaining bytes. */
14915 dst = adjust_address (dst, HImode, 0);
14916 src = adjust_address (src, HImode, 0);
14917 reg0 = gen_reg_rtx (SImode);
14919 emit_insn (gen_zero_extendhisi2 (reg0, src));
14921 emit_insn (gen_unaligned_loadhiu (reg0, src));
14924 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14926 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14928 src = next_consecutive_mem (src);
14929 dst = next_consecutive_mem (dst);
14934 dst = adjust_address (dst, QImode, 0);
14935 src = adjust_address (src, QImode, 0);
14936 reg0 = gen_reg_rtx (QImode);
14937 emit_move_insn (reg0, src);
14938 emit_move_insn (dst, reg0);
14942 /* Select a dominance comparison mode if possible for a test of the general
14943 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14944 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14945 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14946 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14947 In all cases OP will be either EQ or NE, but we don't need to know which
14948 here. If we are unable to support a dominance comparison we return
14949 CC mode. This will then fail to match for the RTL expressions that
14950 generate this call. */
14952 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14954 enum rtx_code cond1, cond2;
14957 /* Currently we will probably get the wrong result if the individual
14958 comparisons are not simple. This also ensures that it is safe to
14959 reverse a comparison if necessary. */
14960 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14962 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14966 /* The if_then_else variant of this tests the second condition if the
14967 first passes, but is true if the first fails. Reverse the first
14968 condition to get a true "inclusive-or" expression. */
14969 if (cond_or == DOM_CC_NX_OR_Y)
14970 cond1 = reverse_condition (cond1);
14972 /* If the comparisons are not equal, and one doesn't dominate the other,
14973 then we can't do this. */
14975 && !comparison_dominates_p (cond1, cond2)
14976 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14980 std::swap (cond1, cond2);
14985 if (cond_or == DOM_CC_X_AND_Y)
14990 case EQ: return CC_DEQmode;
14991 case LE: return CC_DLEmode;
14992 case LEU: return CC_DLEUmode;
14993 case GE: return CC_DGEmode;
14994 case GEU: return CC_DGEUmode;
14995 default: gcc_unreachable ();
14999 if (cond_or == DOM_CC_X_AND_Y)
15011 gcc_unreachable ();
15015 if (cond_or == DOM_CC_X_AND_Y)
15027 gcc_unreachable ();
15031 if (cond_or == DOM_CC_X_AND_Y)
15032 return CC_DLTUmode;
15037 return CC_DLTUmode;
15039 return CC_DLEUmode;
15043 gcc_unreachable ();
15047 if (cond_or == DOM_CC_X_AND_Y)
15048 return CC_DGTUmode;
15053 return CC_DGTUmode;
15055 return CC_DGEUmode;
15059 gcc_unreachable ();
15062 /* The remaining cases only occur when both comparisons are the
15065 gcc_assert (cond1 == cond2);
15069 gcc_assert (cond1 == cond2);
15073 gcc_assert (cond1 == cond2);
15077 gcc_assert (cond1 == cond2);
15078 return CC_DLEUmode;
15081 gcc_assert (cond1 == cond2);
15082 return CC_DGEUmode;
15085 gcc_unreachable ();
15090 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15092 /* All floating point compares return CCFP if it is an equality
15093 comparison, and CCFPE otherwise. */
15094 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15117 gcc_unreachable ();
15121 /* A compare with a shifted operand. Because of canonicalization, the
15122 comparison will have to be swapped when we emit the assembler. */
15123 if (GET_MODE (y) == SImode
15124 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15125 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15126 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15127 || GET_CODE (x) == ROTATERT))
15130 /* This operation is performed swapped, but since we only rely on the Z
15131 flag we don't need an additional mode. */
15132 if (GET_MODE (y) == SImode
15133 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15134 && GET_CODE (x) == NEG
15135 && (op == EQ || op == NE))
15138 /* This is a special case that is used by combine to allow a
15139 comparison of a shifted byte load to be split into a zero-extend
15140 followed by a comparison of the shifted integer (only valid for
15141 equalities and unsigned inequalities). */
15142 if (GET_MODE (x) == SImode
15143 && GET_CODE (x) == ASHIFT
15144 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15145 && GET_CODE (XEXP (x, 0)) == SUBREG
15146 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15147 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15148 && (op == EQ || op == NE
15149 || op == GEU || op == GTU || op == LTU || op == LEU)
15150 && CONST_INT_P (y))
15153 /* A construct for a conditional compare, if the false arm contains
15154 0, then both conditions must be true, otherwise either condition
15155 must be true. Not all conditions are possible, so CCmode is
15156 returned if it can't be done. */
15157 if (GET_CODE (x) == IF_THEN_ELSE
15158 && (XEXP (x, 2) == const0_rtx
15159 || XEXP (x, 2) == const1_rtx)
15160 && COMPARISON_P (XEXP (x, 0))
15161 && COMPARISON_P (XEXP (x, 1)))
15162 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15163 INTVAL (XEXP (x, 2)));
15165 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15166 if (GET_CODE (x) == AND
15167 && (op == EQ || op == NE)
15168 && COMPARISON_P (XEXP (x, 0))
15169 && COMPARISON_P (XEXP (x, 1)))
15170 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15173 if (GET_CODE (x) == IOR
15174 && (op == EQ || op == NE)
15175 && COMPARISON_P (XEXP (x, 0))
15176 && COMPARISON_P (XEXP (x, 1)))
15177 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15180 /* An operation (on Thumb) where we want to test for a single bit.
15181 This is done by shifting that bit up into the top bit of a
15182 scratch register; we can then branch on the sign bit. */
15184 && GET_MODE (x) == SImode
15185 && (op == EQ || op == NE)
15186 && GET_CODE (x) == ZERO_EXTRACT
15187 && XEXP (x, 1) == const1_rtx)
15190 /* An operation that sets the condition codes as a side-effect, the
15191 V flag is not set correctly, so we can only use comparisons where
15192 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15194 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15195 if (GET_MODE (x) == SImode
15197 && (op == EQ || op == NE || op == LT || op == GE)
15198 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15199 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15200 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15201 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15202 || GET_CODE (x) == LSHIFTRT
15203 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15204 || GET_CODE (x) == ROTATERT
15205 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15206 return CC_NOOVmode;
15208 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15211 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15212 && GET_CODE (x) == PLUS
15213 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15216 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15222 /* A DImode comparison against zero can be implemented by
15223 or'ing the two halves together. */
15224 if (y == const0_rtx)
15227 /* We can do an equality test in three Thumb instructions. */
15237 /* DImode unsigned comparisons can be implemented by cmp +
15238 cmpeq without a scratch register. Not worth doing in
15249 /* DImode signed and unsigned comparisons can be implemented
15250 by cmp + sbcs with a scratch register, but that does not
15251 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15252 gcc_assert (op != EQ && op != NE);
15256 gcc_unreachable ();
15260 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15261 return GET_MODE (x);
15266 /* X and Y are two things to compare using CODE. Emit the compare insn and
15267 return the rtx for register 0 in the proper mode. FP means this is a
15268 floating point compare: I don't think that it is needed on the arm. */
15270 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15274 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15276 /* We might have X as a constant, Y as a register because of the predicates
15277 used for cmpdi. If so, force X to a register here. */
15278 if (dimode_comparison && !REG_P (x))
15279 x = force_reg (DImode, x);
15281 mode = SELECT_CC_MODE (code, x, y);
15282 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15284 if (dimode_comparison
15285 && mode != CC_CZmode)
15289 /* To compare two non-zero values for equality, XOR them and
15290 then compare against zero. Not used for ARM mode; there
15291 CC_CZmode is cheaper. */
15292 if (mode == CC_Zmode && y != const0_rtx)
15294 gcc_assert (!reload_completed);
15295 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15299 /* A scratch register is required. */
15300 if (reload_completed)
15301 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15303 scratch = gen_rtx_SCRATCH (SImode);
15305 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15306 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15307 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15310 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15315 /* Generate a sequence of insns that will generate the correct return
15316 address mask depending on the physical architecture that the program
15319 arm_gen_return_addr_mask (void)
15321 rtx reg = gen_reg_rtx (Pmode);
15323 emit_insn (gen_return_addr_mask (reg));
15328 arm_reload_in_hi (rtx *operands)
15330 rtx ref = operands[1];
15332 HOST_WIDE_INT offset = 0;
15334 if (GET_CODE (ref) == SUBREG)
15336 offset = SUBREG_BYTE (ref);
15337 ref = SUBREG_REG (ref);
15342 /* We have a pseudo which has been spilt onto the stack; there
15343 are two cases here: the first where there is a simple
15344 stack-slot replacement and a second where the stack-slot is
15345 out of range, or is used as a subreg. */
15346 if (reg_equiv_mem (REGNO (ref)))
15348 ref = reg_equiv_mem (REGNO (ref));
15349 base = find_replacement (&XEXP (ref, 0));
15352 /* The slot is out of range, or was dressed up in a SUBREG. */
15353 base = reg_equiv_address (REGNO (ref));
15355 /* PR 62554: If there is no equivalent memory location then just move
15356 the value as an SImode register move. This happens when the target
15357 architecture variant does not have an HImode register move. */
15360 gcc_assert (REG_P (operands[0]));
15361 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15362 gen_rtx_SUBREG (SImode, ref, 0)));
15367 base = find_replacement (&XEXP (ref, 0));
15369 /* Handle the case where the address is too complex to be offset by 1. */
15370 if (GET_CODE (base) == MINUS
15371 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15373 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15375 emit_set_insn (base_plus, base);
15378 else if (GET_CODE (base) == PLUS)
15380 /* The addend must be CONST_INT, or we would have dealt with it above. */
15381 HOST_WIDE_INT hi, lo;
15383 offset += INTVAL (XEXP (base, 1));
15384 base = XEXP (base, 0);
15386 /* Rework the address into a legal sequence of insns. */
15387 /* Valid range for lo is -4095 -> 4095 */
15390 : -((-offset) & 0xfff));
15392 /* Corner case, if lo is the max offset then we would be out of range
15393 once we have added the additional 1 below, so bump the msb into the
15394 pre-loading insn(s). */
15398 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15399 ^ (HOST_WIDE_INT) 0x80000000)
15400 - (HOST_WIDE_INT) 0x80000000);
15402 gcc_assert (hi + lo == offset);
15406 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15408 /* Get the base address; addsi3 knows how to handle constants
15409 that require more than one insn. */
15410 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15416 /* Operands[2] may overlap operands[0] (though it won't overlap
15417 operands[1]), that's why we asked for a DImode reg -- so we can
15418 use the bit that does not overlap. */
15419 if (REGNO (operands[2]) == REGNO (operands[0]))
15420 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15422 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15424 emit_insn (gen_zero_extendqisi2 (scratch,
15425 gen_rtx_MEM (QImode,
15426 plus_constant (Pmode, base,
15428 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15429 gen_rtx_MEM (QImode,
15430 plus_constant (Pmode, base,
15432 if (!BYTES_BIG_ENDIAN)
15433 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15434 gen_rtx_IOR (SImode,
15437 gen_rtx_SUBREG (SImode, operands[0], 0),
15441 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15442 gen_rtx_IOR (SImode,
15443 gen_rtx_ASHIFT (SImode, scratch,
15445 gen_rtx_SUBREG (SImode, operands[0], 0)));
15448 /* Handle storing a half-word to memory during reload by synthesizing as two
15449 byte stores. Take care not to clobber the input values until after we
15450 have moved them somewhere safe. This code assumes that if the DImode
15451 scratch in operands[2] overlaps either the input value or output address
15452 in some way, then that value must die in this insn (we absolutely need
15453 two scratch registers for some corner cases). */
15455 arm_reload_out_hi (rtx *operands)
15457 rtx ref = operands[0];
15458 rtx outval = operands[1];
15460 HOST_WIDE_INT offset = 0;
15462 if (GET_CODE (ref) == SUBREG)
15464 offset = SUBREG_BYTE (ref);
15465 ref = SUBREG_REG (ref);
15470 /* We have a pseudo which has been spilt onto the stack; there
15471 are two cases here: the first where there is a simple
15472 stack-slot replacement and a second where the stack-slot is
15473 out of range, or is used as a subreg. */
15474 if (reg_equiv_mem (REGNO (ref)))
15476 ref = reg_equiv_mem (REGNO (ref));
15477 base = find_replacement (&XEXP (ref, 0));
15480 /* The slot is out of range, or was dressed up in a SUBREG. */
15481 base = reg_equiv_address (REGNO (ref));
15483 /* PR 62254: If there is no equivalent memory location then just move
15484 the value as an SImode register move. This happens when the target
15485 architecture variant does not have an HImode register move. */
15488 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15490 if (REG_P (outval))
15492 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15493 gen_rtx_SUBREG (SImode, outval, 0)));
15495 else /* SUBREG_P (outval) */
15497 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15498 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15499 SUBREG_REG (outval)));
15501 /* FIXME: Handle other cases ? */
15502 gcc_unreachable ();
15508 base = find_replacement (&XEXP (ref, 0));
15510 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15512 /* Handle the case where the address is too complex to be offset by 1. */
15513 if (GET_CODE (base) == MINUS
15514 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15516 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15518 /* Be careful not to destroy OUTVAL. */
15519 if (reg_overlap_mentioned_p (base_plus, outval))
15521 /* Updating base_plus might destroy outval, see if we can
15522 swap the scratch and base_plus. */
15523 if (!reg_overlap_mentioned_p (scratch, outval))
15524 std::swap (scratch, base_plus);
15527 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15529 /* Be conservative and copy OUTVAL into the scratch now,
15530 this should only be necessary if outval is a subreg
15531 of something larger than a word. */
15532 /* XXX Might this clobber base? I can't see how it can,
15533 since scratch is known to overlap with OUTVAL, and
15534 must be wider than a word. */
15535 emit_insn (gen_movhi (scratch_hi, outval));
15536 outval = scratch_hi;
15540 emit_set_insn (base_plus, base);
15543 else if (GET_CODE (base) == PLUS)
15545 /* The addend must be CONST_INT, or we would have dealt with it above. */
15546 HOST_WIDE_INT hi, lo;
15548 offset += INTVAL (XEXP (base, 1));
15549 base = XEXP (base, 0);
15551 /* Rework the address into a legal sequence of insns. */
15552 /* Valid range for lo is -4095 -> 4095 */
15555 : -((-offset) & 0xfff));
15557 /* Corner case, if lo is the max offset then we would be out of range
15558 once we have added the additional 1 below, so bump the msb into the
15559 pre-loading insn(s). */
15563 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15564 ^ (HOST_WIDE_INT) 0x80000000)
15565 - (HOST_WIDE_INT) 0x80000000);
15567 gcc_assert (hi + lo == offset);
15571 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15573 /* Be careful not to destroy OUTVAL. */
15574 if (reg_overlap_mentioned_p (base_plus, outval))
15576 /* Updating base_plus might destroy outval, see if we
15577 can swap the scratch and base_plus. */
15578 if (!reg_overlap_mentioned_p (scratch, outval))
15579 std::swap (scratch, base_plus);
15582 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15584 /* Be conservative and copy outval into scratch now,
15585 this should only be necessary if outval is a
15586 subreg of something larger than a word. */
15587 /* XXX Might this clobber base? I can't see how it
15588 can, since scratch is known to overlap with
15590 emit_insn (gen_movhi (scratch_hi, outval));
15591 outval = scratch_hi;
15595 /* Get the base address; addsi3 knows how to handle constants
15596 that require more than one insn. */
15597 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15603 if (BYTES_BIG_ENDIAN)
15605 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15606 plus_constant (Pmode, base,
15608 gen_lowpart (QImode, outval)));
15609 emit_insn (gen_lshrsi3 (scratch,
15610 gen_rtx_SUBREG (SImode, outval, 0),
15612 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15614 gen_lowpart (QImode, scratch)));
15618 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15620 gen_lowpart (QImode, outval)));
15621 emit_insn (gen_lshrsi3 (scratch,
15622 gen_rtx_SUBREG (SImode, outval, 0),
15624 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15625 plus_constant (Pmode, base,
15627 gen_lowpart (QImode, scratch)));
15631 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15632 (padded to the size of a word) should be passed in a register. */
15635 arm_must_pass_in_stack (const function_arg_info &arg)
15637 if (TARGET_AAPCS_BASED)
15638 return must_pass_in_stack_var_size (arg);
15640 return must_pass_in_stack_var_size_or_pad (arg);
15644 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15645 byte of a stack argument has useful data. For legacy APCS ABIs we use
15646 the default. For AAPCS based ABIs small aggregate types are placed
15647 in the lowest memory address. */
15649 static pad_direction
15650 arm_function_arg_padding (machine_mode mode, const_tree type)
15652 if (!TARGET_AAPCS_BASED)
15653 return default_function_arg_padding (mode, type);
15655 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15656 return PAD_DOWNWARD;
15662 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15663 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15664 register has useful data, and return the opposite if the most
15665 significant byte does. */
15668 arm_pad_reg_upward (machine_mode mode,
15669 tree type, int first ATTRIBUTE_UNUSED)
15671 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15673 /* For AAPCS, small aggregates, small fixed-point types,
15674 and small complex types are always padded upwards. */
15677 if ((AGGREGATE_TYPE_P (type)
15678 || TREE_CODE (type) == COMPLEX_TYPE
15679 || FIXED_POINT_TYPE_P (type))
15680 && int_size_in_bytes (type) <= 4)
15685 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15686 && GET_MODE_SIZE (mode) <= 4)
15691 /* Otherwise, use default padding. */
15692 return !BYTES_BIG_ENDIAN;
15695 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15696 assuming that the address in the base register is word aligned. */
15698 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15700 HOST_WIDE_INT max_offset;
15702 /* Offset must be a multiple of 4 in Thumb mode. */
15703 if (TARGET_THUMB2 && ((offset & 3) != 0))
15708 else if (TARGET_ARM)
15713 return ((offset <= max_offset) && (offset >= -max_offset));
15716 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15717 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15718 Assumes that the address in the base register RN is word aligned. Pattern
15719 guarantees that both memory accesses use the same base register,
15720 the offsets are constants within the range, and the gap between the offsets is 4.
15721 If preload complete then check that registers are legal. WBACK indicates whether
15722 address is updated. LOAD indicates whether memory access is load or store. */
15724 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15725 bool wback, bool load)
15727 unsigned int t, t2, n;
15729 if (!reload_completed)
15732 if (!offset_ok_for_ldrd_strd (offset))
15739 if ((TARGET_THUMB2)
15740 && ((wback && (n == t || n == t2))
15741 || (t == SP_REGNUM)
15742 || (t == PC_REGNUM)
15743 || (t2 == SP_REGNUM)
15744 || (t2 == PC_REGNUM)
15745 || (!load && (n == PC_REGNUM))
15746 || (load && (t == t2))
15747 /* Triggers Cortex-M3 LDRD errata. */
15748 || (!wback && load && fix_cm3_ldrd && (n == t))))
15752 && ((wback && (n == t || n == t2))
15753 || (t2 == PC_REGNUM)
15754 || (t % 2 != 0) /* First destination register is not even. */
15756 /* PC can be used as base register (for offset addressing only),
15757 but it is depricated. */
15758 || (n == PC_REGNUM)))
15764 /* Return true if a 64-bit access with alignment ALIGN and with a
15765 constant offset OFFSET from the base pointer is permitted on this
15768 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15770 return (unaligned_access
15771 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15772 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15775 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15776 operand MEM's address contains an immediate offset from the base
15777 register and has no side effects, in which case it sets BASE,
15778 OFFSET and ALIGN accordingly. */
15780 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15784 gcc_assert (base != NULL && offset != NULL);
15786 /* TODO: Handle more general memory operand patterns, such as
15787 PRE_DEC and PRE_INC. */
15789 if (side_effects_p (mem))
15792 /* Can't deal with subregs. */
15793 if (GET_CODE (mem) == SUBREG)
15796 gcc_assert (MEM_P (mem));
15798 *offset = const0_rtx;
15799 *align = MEM_ALIGN (mem);
15801 addr = XEXP (mem, 0);
15803 /* If addr isn't valid for DImode, then we can't handle it. */
15804 if (!arm_legitimate_address_p (DImode, addr,
15805 reload_in_progress || reload_completed))
15813 else if (GET_CODE (addr) == PLUS)
15815 *base = XEXP (addr, 0);
15816 *offset = XEXP (addr, 1);
15817 return (REG_P (*base) && CONST_INT_P (*offset));
15823 /* Called from a peephole2 to replace two word-size accesses with a
15824 single LDRD/STRD instruction. Returns true iff we can generate a
15825 new instruction sequence. That is, both accesses use the same base
15826 register and the gap between constant offsets is 4. This function
15827 may reorder its operands to match ldrd/strd RTL templates.
15828 OPERANDS are the operands found by the peephole matcher;
15829 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15830 corresponding memory operands. LOAD indicaates whether the access
15831 is load or store. CONST_STORE indicates a store of constant
15832 integer values held in OPERANDS[4,5] and assumes that the pattern
15833 is of length 4 insn, for the purpose of checking dead registers.
15834 COMMUTE indicates that register operands may be reordered. */
15836 gen_operands_ldrd_strd (rtx *operands, bool load,
15837 bool const_store, bool commute)
15840 HOST_WIDE_INT offsets[2], offset, align[2];
15841 rtx base = NULL_RTX;
15842 rtx cur_base, cur_offset, tmp;
15844 HARD_REG_SET regset;
15846 gcc_assert (!const_store || !load);
15847 /* Check that the memory references are immediate offsets from the
15848 same base register. Extract the base register, the destination
15849 registers, and the corresponding memory offsets. */
15850 for (i = 0; i < nops; i++)
15852 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15858 else if (REGNO (base) != REGNO (cur_base))
15861 offsets[i] = INTVAL (cur_offset);
15862 if (GET_CODE (operands[i]) == SUBREG)
15864 tmp = SUBREG_REG (operands[i]);
15865 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15870 /* Make sure there is no dependency between the individual loads. */
15871 if (load && REGNO (operands[0]) == REGNO (base))
15872 return false; /* RAW */
15874 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15875 return false; /* WAW */
15877 /* If the same input register is used in both stores
15878 when storing different constants, try to find a free register.
15879 For example, the code
15884 can be transformed into
15888 in Thumb mode assuming that r1 is free.
15889 For ARM mode do the same but only if the starting register
15890 can be made to be even. */
15892 && REGNO (operands[0]) == REGNO (operands[1])
15893 && INTVAL (operands[4]) != INTVAL (operands[5]))
15897 CLEAR_HARD_REG_SET (regset);
15898 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
15899 if (tmp == NULL_RTX)
15902 /* Use the new register in the first load to ensure that
15903 if the original input register is not dead after peephole,
15904 then it will have the correct constant value. */
15907 else if (TARGET_ARM)
15909 int regno = REGNO (operands[0]);
15910 if (!peep2_reg_dead_p (4, operands[0]))
15912 /* When the input register is even and is not dead after the
15913 pattern, it has to hold the second constant but we cannot
15914 form a legal STRD in ARM mode with this register as the second
15916 if (regno % 2 == 0)
15919 /* Is regno-1 free? */
15920 SET_HARD_REG_SET (regset);
15921 CLEAR_HARD_REG_BIT(regset, regno - 1);
15922 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
15923 if (tmp == NULL_RTX)
15930 /* Find a DImode register. */
15931 CLEAR_HARD_REG_SET (regset);
15932 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
15933 if (tmp != NULL_RTX)
15935 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15936 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15940 /* Can we use the input register to form a DI register? */
15941 SET_HARD_REG_SET (regset);
15942 CLEAR_HARD_REG_BIT(regset,
15943 regno % 2 == 0 ? regno + 1 : regno - 1);
15944 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
15945 if (tmp == NULL_RTX)
15947 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15951 gcc_assert (operands[0] != NULL_RTX);
15952 gcc_assert (operands[1] != NULL_RTX);
15953 gcc_assert (REGNO (operands[0]) % 2 == 0);
15954 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15958 /* Make sure the instructions are ordered with lower memory access first. */
15959 if (offsets[0] > offsets[1])
15961 gap = offsets[0] - offsets[1];
15962 offset = offsets[1];
15964 /* Swap the instructions such that lower memory is accessed first. */
15965 std::swap (operands[0], operands[1]);
15966 std::swap (operands[2], operands[3]);
15967 std::swap (align[0], align[1]);
15969 std::swap (operands[4], operands[5]);
15973 gap = offsets[1] - offsets[0];
15974 offset = offsets[0];
15977 /* Make sure accesses are to consecutive memory locations. */
15978 if (gap != GET_MODE_SIZE (SImode))
15981 if (!align_ok_ldrd_strd (align[0], offset))
15984 /* Make sure we generate legal instructions. */
15985 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15989 /* In Thumb state, where registers are almost unconstrained, there
15990 is little hope to fix it. */
15994 if (load && commute)
15996 /* Try reordering registers. */
15997 std::swap (operands[0], operands[1]);
15998 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16005 /* If input registers are dead after this pattern, they can be
16006 reordered or replaced by other registers that are free in the
16007 current pattern. */
16008 if (!peep2_reg_dead_p (4, operands[0])
16009 || !peep2_reg_dead_p (4, operands[1]))
16012 /* Try to reorder the input registers. */
16013 /* For example, the code
16018 can be transformed into
16023 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16026 std::swap (operands[0], operands[1]);
16030 /* Try to find a free DI register. */
16031 CLEAR_HARD_REG_SET (regset);
16032 add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
16033 add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
16036 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
16037 if (tmp == NULL_RTX)
16040 /* DREG must be an even-numbered register in DImode.
16041 Split it into SI registers. */
16042 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16043 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16044 gcc_assert (operands[0] != NULL_RTX);
16045 gcc_assert (operands[1] != NULL_RTX);
16046 gcc_assert (REGNO (operands[0]) % 2 == 0);
16047 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16049 return (operands_ok_ldrd_strd (operands[0], operands[1],
16059 /* Return true if parallel execution of the two word-size accesses provided
16060 could be satisfied with a single LDRD/STRD instruction. Two word-size
16061 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16062 register operands and OPERANDS[2,3] are the corresponding memory operands.
16065 valid_operands_ldrd_strd (rtx *operands, bool load)
16068 HOST_WIDE_INT offsets[2], offset, align[2];
16069 rtx base = NULL_RTX;
16070 rtx cur_base, cur_offset;
16073 /* Check that the memory references are immediate offsets from the
16074 same base register. Extract the base register, the destination
16075 registers, and the corresponding memory offsets. */
16076 for (i = 0; i < nops; i++)
16078 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16084 else if (REGNO (base) != REGNO (cur_base))
16087 offsets[i] = INTVAL (cur_offset);
16088 if (GET_CODE (operands[i]) == SUBREG)
16092 if (offsets[0] > offsets[1])
16095 gap = offsets[1] - offsets[0];
16096 offset = offsets[0];
16098 /* Make sure accesses are to consecutive memory locations. */
16099 if (gap != GET_MODE_SIZE (SImode))
16102 if (!align_ok_ldrd_strd (align[0], offset))
16105 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16110 /* Print a symbolic form of X to the debug file, F. */
16112 arm_print_value (FILE *f, rtx x)
16114 switch (GET_CODE (x))
16117 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16121 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16129 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16131 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16132 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16140 fprintf (f, "\"%s\"", XSTR (x, 0));
16144 fprintf (f, "`%s'", XSTR (x, 0));
16148 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16152 arm_print_value (f, XEXP (x, 0));
16156 arm_print_value (f, XEXP (x, 0));
16158 arm_print_value (f, XEXP (x, 1));
16166 fprintf (f, "????");
16171 /* Routines for manipulation of the constant pool. */
16173 /* Arm instructions cannot load a large constant directly into a
16174 register; they have to come from a pc relative load. The constant
16175 must therefore be placed in the addressable range of the pc
16176 relative load. Depending on the precise pc relative load
16177 instruction the range is somewhere between 256 bytes and 4k. This
16178 means that we often have to dump a constant inside a function, and
16179 generate code to branch around it.
16181 It is important to minimize this, since the branches will slow
16182 things down and make the code larger.
16184 Normally we can hide the table after an existing unconditional
16185 branch so that there is no interruption of the flow, but in the
16186 worst case the code looks like this:
16204 We fix this by performing a scan after scheduling, which notices
16205 which instructions need to have their operands fetched from the
16206 constant table and builds the table.
16208 The algorithm starts by building a table of all the constants that
16209 need fixing up and all the natural barriers in the function (places
16210 where a constant table can be dropped without breaking the flow).
16211 For each fixup we note how far the pc-relative replacement will be
16212 able to reach and the offset of the instruction into the function.
16214 Having built the table we then group the fixes together to form
16215 tables that are as large as possible (subject to addressing
16216 constraints) and emit each table of constants after the last
16217 barrier that is within range of all the instructions in the group.
16218 If a group does not contain a barrier, then we forcibly create one
16219 by inserting a jump instruction into the flow. Once the table has
16220 been inserted, the insns are then modified to reference the
16221 relevant entry in the pool.
16223 Possible enhancements to the algorithm (not implemented) are:
16225 1) For some processors and object formats, there may be benefit in
16226 aligning the pools to the start of cache lines; this alignment
16227 would need to be taken into account when calculating addressability
16230 /* These typedefs are located at the start of this file, so that
16231 they can be used in the prototypes there. This comment is to
16232 remind readers of that fact so that the following structures
16233 can be understood more easily.
16235 typedef struct minipool_node Mnode;
16236 typedef struct minipool_fixup Mfix; */
16238 struct minipool_node
16240 /* Doubly linked chain of entries. */
16243 /* The maximum offset into the code that this entry can be placed. While
16244 pushing fixes for forward references, all entries are sorted in order
16245 of increasing max_address. */
16246 HOST_WIDE_INT max_address;
16247 /* Similarly for an entry inserted for a backwards ref. */
16248 HOST_WIDE_INT min_address;
16249 /* The number of fixes referencing this entry. This can become zero
16250 if we "unpush" an entry. In this case we ignore the entry when we
16251 come to emit the code. */
16253 /* The offset from the start of the minipool. */
16254 HOST_WIDE_INT offset;
16255 /* The value in table. */
16257 /* The mode of value. */
16259 /* The size of the value. With iWMMXt enabled
16260 sizes > 4 also imply an alignment of 8-bytes. */
16264 struct minipool_fixup
16268 HOST_WIDE_INT address;
16274 HOST_WIDE_INT forwards;
16275 HOST_WIDE_INT backwards;
16278 /* Fixes less than a word need padding out to a word boundary. */
16279 #define MINIPOOL_FIX_SIZE(mode) \
16280 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16282 static Mnode * minipool_vector_head;
16283 static Mnode * minipool_vector_tail;
16284 static rtx_code_label *minipool_vector_label;
16285 static int minipool_pad;
16287 /* The linked list of all minipool fixes required for this function. */
16288 Mfix * minipool_fix_head;
16289 Mfix * minipool_fix_tail;
16290 /* The fix entry for the current minipool, once it has been placed. */
16291 Mfix * minipool_barrier;
16293 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16294 #define JUMP_TABLES_IN_TEXT_SECTION 0
16297 static HOST_WIDE_INT
16298 get_jump_table_size (rtx_jump_table_data *insn)
16300 /* ADDR_VECs only take room if read-only data does into the text
16302 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16304 rtx body = PATTERN (insn);
16305 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16306 HOST_WIDE_INT size;
16307 HOST_WIDE_INT modesize;
16309 modesize = GET_MODE_SIZE (GET_MODE (body));
16310 size = modesize * XVECLEN (body, elt);
16314 /* Round up size of TBB table to a halfword boundary. */
16315 size = (size + 1) & ~HOST_WIDE_INT_1;
16318 /* No padding necessary for TBH. */
16321 /* Add two bytes for alignment on Thumb. */
16326 gcc_unreachable ();
16334 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16335 function descriptor) into a register and the GOT address into the
16336 FDPIC register, returning an rtx for the register holding the
16337 function address. */
16340 arm_load_function_descriptor (rtx funcdesc)
16342 rtx fnaddr_reg = gen_reg_rtx (Pmode);
16343 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
16344 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
16345 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
16347 emit_move_insn (fnaddr_reg, fnaddr);
16349 /* The ABI requires the entry point address to be loaded first, but
16350 since we cannot support lazy binding for lack of atomic load of
16351 two 32-bits values, we do not need to bother to prevent the
16352 previous load from being moved after that of the GOT address. */
16353 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
16358 /* Return the maximum amount of padding that will be inserted before
16360 static HOST_WIDE_INT
16361 get_label_padding (rtx label)
16363 HOST_WIDE_INT align, min_insn_size;
16365 align = 1 << label_to_alignment (label).levels[0].log;
16366 min_insn_size = TARGET_THUMB ? 2 : 4;
16367 return align > min_insn_size ? align - min_insn_size : 0;
16370 /* Move a minipool fix MP from its current location to before MAX_MP.
16371 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16372 constraints may need updating. */
16374 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16375 HOST_WIDE_INT max_address)
16377 /* The code below assumes these are different. */
16378 gcc_assert (mp != max_mp);
16380 if (max_mp == NULL)
16382 if (max_address < mp->max_address)
16383 mp->max_address = max_address;
16387 if (max_address > max_mp->max_address - mp->fix_size)
16388 mp->max_address = max_mp->max_address - mp->fix_size;
16390 mp->max_address = max_address;
16392 /* Unlink MP from its current position. Since max_mp is non-null,
16393 mp->prev must be non-null. */
16394 mp->prev->next = mp->next;
16395 if (mp->next != NULL)
16396 mp->next->prev = mp->prev;
16398 minipool_vector_tail = mp->prev;
16400 /* Re-insert it before MAX_MP. */
16402 mp->prev = max_mp->prev;
16405 if (mp->prev != NULL)
16406 mp->prev->next = mp;
16408 minipool_vector_head = mp;
16411 /* Save the new entry. */
16414 /* Scan over the preceding entries and adjust their addresses as
16416 while (mp->prev != NULL
16417 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16419 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16426 /* Add a constant to the minipool for a forward reference. Returns the
16427 node added or NULL if the constant will not fit in this pool. */
16429 add_minipool_forward_ref (Mfix *fix)
16431 /* If set, max_mp is the first pool_entry that has a lower
16432 constraint than the one we are trying to add. */
16433 Mnode * max_mp = NULL;
16434 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16437 /* If the minipool starts before the end of FIX->INSN then this FIX
16438 cannot be placed into the current pool. Furthermore, adding the
16439 new constant pool entry may cause the pool to start FIX_SIZE bytes
16441 if (minipool_vector_head &&
16442 (fix->address + get_attr_length (fix->insn)
16443 >= minipool_vector_head->max_address - fix->fix_size))
16446 /* Scan the pool to see if a constant with the same value has
16447 already been added. While we are doing this, also note the
16448 location where we must insert the constant if it doesn't already
16450 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16452 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16453 && fix->mode == mp->mode
16454 && (!LABEL_P (fix->value)
16455 || (CODE_LABEL_NUMBER (fix->value)
16456 == CODE_LABEL_NUMBER (mp->value)))
16457 && rtx_equal_p (fix->value, mp->value))
16459 /* More than one fix references this entry. */
16461 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16464 /* Note the insertion point if necessary. */
16466 && mp->max_address > max_address)
16469 /* If we are inserting an 8-bytes aligned quantity and
16470 we have not already found an insertion point, then
16471 make sure that all such 8-byte aligned quantities are
16472 placed at the start of the pool. */
16473 if (ARM_DOUBLEWORD_ALIGN
16475 && fix->fix_size >= 8
16476 && mp->fix_size < 8)
16479 max_address = mp->max_address;
16483 /* The value is not currently in the minipool, so we need to create
16484 a new entry for it. If MAX_MP is NULL, the entry will be put on
16485 the end of the list since the placement is less constrained than
16486 any existing entry. Otherwise, we insert the new fix before
16487 MAX_MP and, if necessary, adjust the constraints on the other
16490 mp->fix_size = fix->fix_size;
16491 mp->mode = fix->mode;
16492 mp->value = fix->value;
16494 /* Not yet required for a backwards ref. */
16495 mp->min_address = -65536;
16497 if (max_mp == NULL)
16499 mp->max_address = max_address;
16501 mp->prev = minipool_vector_tail;
16503 if (mp->prev == NULL)
16505 minipool_vector_head = mp;
16506 minipool_vector_label = gen_label_rtx ();
16509 mp->prev->next = mp;
16511 minipool_vector_tail = mp;
16515 if (max_address > max_mp->max_address - mp->fix_size)
16516 mp->max_address = max_mp->max_address - mp->fix_size;
16518 mp->max_address = max_address;
16521 mp->prev = max_mp->prev;
16523 if (mp->prev != NULL)
16524 mp->prev->next = mp;
16526 minipool_vector_head = mp;
16529 /* Save the new entry. */
16532 /* Scan over the preceding entries and adjust their addresses as
16534 while (mp->prev != NULL
16535 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16537 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16545 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16546 HOST_WIDE_INT min_address)
16548 HOST_WIDE_INT offset;
16550 /* The code below assumes these are different. */
16551 gcc_assert (mp != min_mp);
16553 if (min_mp == NULL)
16555 if (min_address > mp->min_address)
16556 mp->min_address = min_address;
16560 /* We will adjust this below if it is too loose. */
16561 mp->min_address = min_address;
16563 /* Unlink MP from its current position. Since min_mp is non-null,
16564 mp->next must be non-null. */
16565 mp->next->prev = mp->prev;
16566 if (mp->prev != NULL)
16567 mp->prev->next = mp->next;
16569 minipool_vector_head = mp->next;
16571 /* Reinsert it after MIN_MP. */
16573 mp->next = min_mp->next;
16575 if (mp->next != NULL)
16576 mp->next->prev = mp;
16578 minipool_vector_tail = mp;
16584 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16586 mp->offset = offset;
16587 if (mp->refcount > 0)
16588 offset += mp->fix_size;
16590 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16591 mp->next->min_address = mp->min_address + mp->fix_size;
16597 /* Add a constant to the minipool for a backward reference. Returns the
16598 node added or NULL if the constant will not fit in this pool.
16600 Note that the code for insertion for a backwards reference can be
16601 somewhat confusing because the calculated offsets for each fix do
16602 not take into account the size of the pool (which is still under
16605 add_minipool_backward_ref (Mfix *fix)
16607 /* If set, min_mp is the last pool_entry that has a lower constraint
16608 than the one we are trying to add. */
16609 Mnode *min_mp = NULL;
16610 /* This can be negative, since it is only a constraint. */
16611 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16614 /* If we can't reach the current pool from this insn, or if we can't
16615 insert this entry at the end of the pool without pushing other
16616 fixes out of range, then we don't try. This ensures that we
16617 can't fail later on. */
16618 if (min_address >= minipool_barrier->address
16619 || (minipool_vector_tail->min_address + fix->fix_size
16620 >= minipool_barrier->address))
16623 /* Scan the pool to see if a constant with the same value has
16624 already been added. While we are doing this, also note the
16625 location where we must insert the constant if it doesn't already
16627 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16629 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16630 && fix->mode == mp->mode
16631 && (!LABEL_P (fix->value)
16632 || (CODE_LABEL_NUMBER (fix->value)
16633 == CODE_LABEL_NUMBER (mp->value)))
16634 && rtx_equal_p (fix->value, mp->value)
16635 /* Check that there is enough slack to move this entry to the
16636 end of the table (this is conservative). */
16637 && (mp->max_address
16638 > (minipool_barrier->address
16639 + minipool_vector_tail->offset
16640 + minipool_vector_tail->fix_size)))
16643 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16646 if (min_mp != NULL)
16647 mp->min_address += fix->fix_size;
16650 /* Note the insertion point if necessary. */
16651 if (mp->min_address < min_address)
16653 /* For now, we do not allow the insertion of 8-byte alignment
16654 requiring nodes anywhere but at the start of the pool. */
16655 if (ARM_DOUBLEWORD_ALIGN
16656 && fix->fix_size >= 8 && mp->fix_size < 8)
16661 else if (mp->max_address
16662 < minipool_barrier->address + mp->offset + fix->fix_size)
16664 /* Inserting before this entry would push the fix beyond
16665 its maximum address (which can happen if we have
16666 re-located a forwards fix); force the new fix to come
16668 if (ARM_DOUBLEWORD_ALIGN
16669 && fix->fix_size >= 8 && mp->fix_size < 8)
16674 min_address = mp->min_address + fix->fix_size;
16677 /* Do not insert a non-8-byte aligned quantity before 8-byte
16678 aligned quantities. */
16679 else if (ARM_DOUBLEWORD_ALIGN
16680 && fix->fix_size < 8
16681 && mp->fix_size >= 8)
16684 min_address = mp->min_address + fix->fix_size;
16689 /* We need to create a new entry. */
16691 mp->fix_size = fix->fix_size;
16692 mp->mode = fix->mode;
16693 mp->value = fix->value;
16695 mp->max_address = minipool_barrier->address + 65536;
16697 mp->min_address = min_address;
16699 if (min_mp == NULL)
16702 mp->next = minipool_vector_head;
16704 if (mp->next == NULL)
16706 minipool_vector_tail = mp;
16707 minipool_vector_label = gen_label_rtx ();
16710 mp->next->prev = mp;
16712 minipool_vector_head = mp;
16716 mp->next = min_mp->next;
16720 if (mp->next != NULL)
16721 mp->next->prev = mp;
16723 minipool_vector_tail = mp;
16726 /* Save the new entry. */
16734 /* Scan over the following entries and adjust their offsets. */
16735 while (mp->next != NULL)
16737 if (mp->next->min_address < mp->min_address + mp->fix_size)
16738 mp->next->min_address = mp->min_address + mp->fix_size;
16741 mp->next->offset = mp->offset + mp->fix_size;
16743 mp->next->offset = mp->offset;
16752 assign_minipool_offsets (Mfix *barrier)
16754 HOST_WIDE_INT offset = 0;
16757 minipool_barrier = barrier;
16759 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16761 mp->offset = offset;
16763 if (mp->refcount > 0)
16764 offset += mp->fix_size;
16768 /* Output the literal table */
16770 dump_minipool (rtx_insn *scan)
16776 if (ARM_DOUBLEWORD_ALIGN)
16777 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16778 if (mp->refcount > 0 && mp->fix_size >= 8)
16785 fprintf (dump_file,
16786 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16787 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16789 scan = emit_label_after (gen_label_rtx (), scan);
16790 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16791 scan = emit_label_after (minipool_vector_label, scan);
16793 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16795 if (mp->refcount > 0)
16799 fprintf (dump_file,
16800 ";; Offset %u, min %ld, max %ld ",
16801 (unsigned) mp->offset, (unsigned long) mp->min_address,
16802 (unsigned long) mp->max_address);
16803 arm_print_value (dump_file, mp->value);
16804 fputc ('\n', dump_file);
16807 rtx val = copy_rtx (mp->value);
16809 switch (GET_MODE_SIZE (mp->mode))
16811 #ifdef HAVE_consttable_1
16813 scan = emit_insn_after (gen_consttable_1 (val), scan);
16817 #ifdef HAVE_consttable_2
16819 scan = emit_insn_after (gen_consttable_2 (val), scan);
16823 #ifdef HAVE_consttable_4
16825 scan = emit_insn_after (gen_consttable_4 (val), scan);
16829 #ifdef HAVE_consttable_8
16831 scan = emit_insn_after (gen_consttable_8 (val), scan);
16835 #ifdef HAVE_consttable_16
16837 scan = emit_insn_after (gen_consttable_16 (val), scan);
16842 gcc_unreachable ();
16850 minipool_vector_head = minipool_vector_tail = NULL;
16851 scan = emit_insn_after (gen_consttable_end (), scan);
16852 scan = emit_barrier_after (scan);
16855 /* Return the cost of forcibly inserting a barrier after INSN. */
16857 arm_barrier_cost (rtx_insn *insn)
16859 /* Basing the location of the pool on the loop depth is preferable,
16860 but at the moment, the basic block information seems to be
16861 corrupt by this stage of the compilation. */
16862 int base_cost = 50;
16863 rtx_insn *next = next_nonnote_insn (insn);
16865 if (next != NULL && LABEL_P (next))
16868 switch (GET_CODE (insn))
16871 /* It will always be better to place the table before the label, rather
16880 return base_cost - 10;
16883 return base_cost + 10;
16887 /* Find the best place in the insn stream in the range
16888 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16889 Create the barrier by inserting a jump and add a new fix entry for
16892 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16894 HOST_WIDE_INT count = 0;
16895 rtx_barrier *barrier;
16896 rtx_insn *from = fix->insn;
16897 /* The instruction after which we will insert the jump. */
16898 rtx_insn *selected = NULL;
16900 /* The address at which the jump instruction will be placed. */
16901 HOST_WIDE_INT selected_address;
16903 HOST_WIDE_INT max_count = max_address - fix->address;
16904 rtx_code_label *label = gen_label_rtx ();
16906 selected_cost = arm_barrier_cost (from);
16907 selected_address = fix->address;
16909 while (from && count < max_count)
16911 rtx_jump_table_data *tmp;
16914 /* This code shouldn't have been called if there was a natural barrier
16916 gcc_assert (!BARRIER_P (from));
16918 /* Count the length of this insn. This must stay in sync with the
16919 code that pushes minipool fixes. */
16920 if (LABEL_P (from))
16921 count += get_label_padding (from);
16923 count += get_attr_length (from);
16925 /* If there is a jump table, add its length. */
16926 if (tablejump_p (from, NULL, &tmp))
16928 count += get_jump_table_size (tmp);
16930 /* Jump tables aren't in a basic block, so base the cost on
16931 the dispatch insn. If we select this location, we will
16932 still put the pool after the table. */
16933 new_cost = arm_barrier_cost (from);
16935 if (count < max_count
16936 && (!selected || new_cost <= selected_cost))
16939 selected_cost = new_cost;
16940 selected_address = fix->address + count;
16943 /* Continue after the dispatch table. */
16944 from = NEXT_INSN (tmp);
16948 new_cost = arm_barrier_cost (from);
16950 if (count < max_count
16951 && (!selected || new_cost <= selected_cost))
16954 selected_cost = new_cost;
16955 selected_address = fix->address + count;
16958 from = NEXT_INSN (from);
16961 /* Make sure that we found a place to insert the jump. */
16962 gcc_assert (selected);
16964 /* Create a new JUMP_INSN that branches around a barrier. */
16965 from = emit_jump_insn_after (gen_jump (label), selected);
16966 JUMP_LABEL (from) = label;
16967 barrier = emit_barrier_after (from);
16968 emit_label_after (label, barrier);
16970 /* Create a minipool barrier entry for the new barrier. */
16971 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16972 new_fix->insn = barrier;
16973 new_fix->address = selected_address;
16974 new_fix->next = fix->next;
16975 fix->next = new_fix;
16980 /* Record that there is a natural barrier in the insn stream at
16983 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16985 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16988 fix->address = address;
16991 if (minipool_fix_head != NULL)
16992 minipool_fix_tail->next = fix;
16994 minipool_fix_head = fix;
16996 minipool_fix_tail = fix;
16999 /* Record INSN, which will need fixing up to load a value from the
17000 minipool. ADDRESS is the offset of the insn since the start of the
17001 function; LOC is a pointer to the part of the insn which requires
17002 fixing; VALUE is the constant that must be loaded, which is of type
17005 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17006 machine_mode mode, rtx value)
17008 gcc_assert (!arm_disable_literal_pool);
17009 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17012 fix->address = address;
17015 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17016 fix->value = value;
17017 fix->forwards = get_attr_pool_range (insn);
17018 fix->backwards = get_attr_neg_pool_range (insn);
17019 fix->minipool = NULL;
17021 /* If an insn doesn't have a range defined for it, then it isn't
17022 expecting to be reworked by this code. Better to stop now than
17023 to generate duff assembly code. */
17024 gcc_assert (fix->forwards || fix->backwards);
17026 /* If an entry requires 8-byte alignment then assume all constant pools
17027 require 4 bytes of padding. Trying to do this later on a per-pool
17028 basis is awkward because existing pool entries have to be modified. */
17029 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17034 fprintf (dump_file,
17035 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17036 GET_MODE_NAME (mode),
17037 INSN_UID (insn), (unsigned long) address,
17038 -1 * (long)fix->backwards, (long)fix->forwards);
17039 arm_print_value (dump_file, fix->value);
17040 fprintf (dump_file, "\n");
17043 /* Add it to the chain of fixes. */
17046 if (minipool_fix_head != NULL)
17047 minipool_fix_tail->next = fix;
17049 minipool_fix_head = fix;
17051 minipool_fix_tail = fix;
17054 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17055 Returns the number of insns needed, or 99 if we always want to synthesize
17058 arm_max_const_double_inline_cost ()
17060 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17063 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17064 Returns the number of insns needed, or 99 if we don't know how to
17067 arm_const_double_inline_cost (rtx val)
17069 rtx lowpart, highpart;
17072 mode = GET_MODE (val);
17074 if (mode == VOIDmode)
17077 gcc_assert (GET_MODE_SIZE (mode) == 8);
17079 lowpart = gen_lowpart (SImode, val);
17080 highpart = gen_highpart_mode (SImode, mode, val);
17082 gcc_assert (CONST_INT_P (lowpart));
17083 gcc_assert (CONST_INT_P (highpart));
17085 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17086 NULL_RTX, NULL_RTX, 0, 0)
17087 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17088 NULL_RTX, NULL_RTX, 0, 0));
17091 /* Cost of loading a SImode constant. */
17093 arm_const_inline_cost (enum rtx_code code, rtx val)
17095 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17096 NULL_RTX, NULL_RTX, 1, 0);
17099 /* Return true if it is worthwhile to split a 64-bit constant into two
17100 32-bit operations. This is the case if optimizing for size, or
17101 if we have load delay slots, or if one 32-bit part can be done with
17102 a single data operation. */
17104 arm_const_double_by_parts (rtx val)
17106 machine_mode mode = GET_MODE (val);
17109 if (optimize_size || arm_ld_sched)
17112 if (mode == VOIDmode)
17115 part = gen_highpart_mode (SImode, mode, val);
17117 gcc_assert (CONST_INT_P (part));
17119 if (const_ok_for_arm (INTVAL (part))
17120 || const_ok_for_arm (~INTVAL (part)))
17123 part = gen_lowpart (SImode, val);
17125 gcc_assert (CONST_INT_P (part));
17127 if (const_ok_for_arm (INTVAL (part))
17128 || const_ok_for_arm (~INTVAL (part)))
17134 /* Return true if it is possible to inline both the high and low parts
17135 of a 64-bit constant into 32-bit data processing instructions. */
17137 arm_const_double_by_immediates (rtx val)
17139 machine_mode mode = GET_MODE (val);
17142 if (mode == VOIDmode)
17145 part = gen_highpart_mode (SImode, mode, val);
17147 gcc_assert (CONST_INT_P (part));
17149 if (!const_ok_for_arm (INTVAL (part)))
17152 part = gen_lowpart (SImode, val);
17154 gcc_assert (CONST_INT_P (part));
17156 if (!const_ok_for_arm (INTVAL (part)))
17162 /* Scan INSN and note any of its operands that need fixing.
17163 If DO_PUSHES is false we do not actually push any of the fixups
17166 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17170 extract_constrain_insn (insn);
17172 if (recog_data.n_alternatives == 0)
17175 /* Fill in recog_op_alt with information about the constraints of
17177 preprocess_constraints (insn);
17179 const operand_alternative *op_alt = which_op_alt ();
17180 for (opno = 0; opno < recog_data.n_operands; opno++)
17182 /* Things we need to fix can only occur in inputs. */
17183 if (recog_data.operand_type[opno] != OP_IN)
17186 /* If this alternative is a memory reference, then any mention
17187 of constants in this alternative is really to fool reload
17188 into allowing us to accept one there. We need to fix them up
17189 now so that we output the right code. */
17190 if (op_alt[opno].memory_ok)
17192 rtx op = recog_data.operand[opno];
17194 if (CONSTANT_P (op))
17197 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17198 recog_data.operand_mode[opno], op);
17200 else if (MEM_P (op)
17201 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17202 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17206 rtx cop = avoid_constant_pool_reference (op);
17208 /* Casting the address of something to a mode narrower
17209 than a word can cause avoid_constant_pool_reference()
17210 to return the pool reference itself. That's no good to
17211 us here. Lets just hope that we can use the
17212 constant pool value directly. */
17214 cop = get_pool_constant (XEXP (op, 0));
17216 push_minipool_fix (insn, address,
17217 recog_data.operand_loc[opno],
17218 recog_data.operand_mode[opno], cop);
17228 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17229 and unions in the context of ARMv8-M Security Extensions. It is used as a
17230 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17231 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17232 or four masks, depending on whether it is being computed for a
17233 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17234 respectively. The tree for the type of the argument or a field within an
17235 argument is passed in ARG_TYPE, the current register this argument or field
17236 starts in is kept in the pointer REGNO and updated accordingly, the bit this
17237 argument or field starts at is passed in STARTING_BIT and the last used bit
17238 is kept in LAST_USED_BIT which is also updated accordingly. */
17240 static unsigned HOST_WIDE_INT
17241 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
17242 uint32_t * padding_bits_to_clear,
17243 unsigned starting_bit, int * last_used_bit)
17246 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
17248 if (TREE_CODE (arg_type) == RECORD_TYPE)
17250 unsigned current_bit = starting_bit;
17252 long int offset, size;
17255 field = TYPE_FIELDS (arg_type);
17258 /* The offset within a structure is always an offset from
17259 the start of that structure. Make sure we take that into the
17260 calculation of the register based offset that we use here. */
17261 offset = starting_bit;
17262 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
17265 /* This is the actual size of the field, for bitfields this is the
17266 bitfield width and not the container size. */
17267 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17269 if (*last_used_bit != offset)
17271 if (offset < *last_used_bit)
17273 /* This field's offset is before the 'last_used_bit', that
17274 means this field goes on the next register. So we need to
17275 pad the rest of the current register and increase the
17276 register number. */
17278 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17281 padding_bits_to_clear[*regno] |= mask;
17282 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17287 /* Otherwise we pad the bits between the last field's end and
17288 the start of the new field. */
17291 mask = ((uint32_t)-1) >> (32 - offset);
17292 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17293 padding_bits_to_clear[*regno] |= mask;
17295 current_bit = offset;
17298 /* Calculate further padding bits for inner structs/unions too. */
17299 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17301 *last_used_bit = current_bit;
17302 not_to_clear_reg_mask
17303 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17304 padding_bits_to_clear, offset,
17309 /* Update 'current_bit' with this field's size. If the
17310 'current_bit' lies in a subsequent register, update 'regno' and
17311 reset 'current_bit' to point to the current bit in that new
17313 current_bit += size;
17314 while (current_bit >= 32)
17317 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17320 *last_used_bit = current_bit;
17323 field = TREE_CHAIN (field);
17325 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17327 else if (TREE_CODE (arg_type) == UNION_TYPE)
17329 tree field, field_t;
17330 int i, regno_t, field_size;
17334 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17335 = {-1, -1, -1, -1};
17337 /* To compute the padding bits in a union we only consider bits as
17338 padding bits if they are always either a padding bit or fall outside a
17339 fields size for all fields in the union. */
17340 field = TYPE_FIELDS (arg_type);
17343 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17344 = {0U, 0U, 0U, 0U};
17345 int last_used_bit_t = *last_used_bit;
17347 field_t = TREE_TYPE (field);
17349 /* If the field's type is either a record or a union make sure to
17350 compute their padding bits too. */
17351 if (RECORD_OR_UNION_TYPE_P (field_t))
17352 not_to_clear_reg_mask
17353 |= comp_not_to_clear_mask_str_un (field_t, ®no_t,
17354 &padding_bits_to_clear_t[0],
17355 starting_bit, &last_used_bit_t);
17358 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17359 regno_t = (field_size / 32) + *regno;
17360 last_used_bit_t = (starting_bit + field_size) % 32;
17363 for (i = *regno; i < regno_t; i++)
17365 /* For all but the last register used by this field only keep the
17366 padding bits that were padding bits in this field. */
17367 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17370 /* For the last register, keep all padding bits that were padding
17371 bits in this field and any padding bits that are still valid
17372 as padding bits but fall outside of this field's size. */
17373 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17374 padding_bits_to_clear_res[regno_t]
17375 &= padding_bits_to_clear_t[regno_t] | mask;
17377 /* Update the maximum size of the fields in terms of registers used
17378 ('max_reg') and the 'last_used_bit' in said register. */
17379 if (max_reg < regno_t)
17382 max_bit = last_used_bit_t;
17384 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17385 max_bit = last_used_bit_t;
17387 field = TREE_CHAIN (field);
17390 /* Update the current padding_bits_to_clear using the intersection of the
17391 padding bits of all the fields. */
17392 for (i=*regno; i < max_reg; i++)
17393 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17395 /* Do not keep trailing padding bits, we do not know yet whether this
17396 is the end of the argument. */
17397 mask = ((uint32_t) 1 << max_bit) - 1;
17398 padding_bits_to_clear[max_reg]
17399 |= padding_bits_to_clear_res[max_reg] & mask;
17402 *last_used_bit = max_bit;
17405 /* This function should only be used for structs and unions. */
17406 gcc_unreachable ();
17408 return not_to_clear_reg_mask;
17411 /* In the context of ARMv8-M Security Extensions, this function is used for both
17412 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17413 registers are used when returning or passing arguments, which is then
17414 returned as a mask. It will also compute a mask to indicate padding/unused
17415 bits for each of these registers, and passes this through the
17416 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17417 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17418 the starting register used to pass this argument or return value is passed
17419 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17420 for struct and union types. */
17422 static unsigned HOST_WIDE_INT
17423 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17424 uint32_t * padding_bits_to_clear)
17427 int last_used_bit = 0;
17428 unsigned HOST_WIDE_INT not_to_clear_mask;
17430 if (RECORD_OR_UNION_TYPE_P (arg_type))
17433 = comp_not_to_clear_mask_str_un (arg_type, ®no,
17434 padding_bits_to_clear, 0,
17438 /* If the 'last_used_bit' is not zero, that means we are still using a
17439 part of the last 'regno'. In such cases we must clear the trailing
17440 bits. Otherwise we are not using regno and we should mark it as to
17442 if (last_used_bit != 0)
17443 padding_bits_to_clear[regno]
17444 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17446 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17450 not_to_clear_mask = 0;
17451 /* We are not dealing with structs nor unions. So these arguments may be
17452 passed in floating point registers too. In some cases a BLKmode is
17453 used when returning or passing arguments in multiple VFP registers. */
17454 if (GET_MODE (arg_rtx) == BLKmode)
17459 /* This should really only occur when dealing with the hard-float
17461 gcc_assert (TARGET_HARD_FLOAT_ABI);
17463 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17465 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17466 gcc_assert (REG_P (reg));
17468 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17470 /* If we are dealing with DF mode, make sure we don't
17471 clear either of the registers it addresses. */
17472 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17475 unsigned HOST_WIDE_INT mask;
17476 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17477 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17478 not_to_clear_mask |= mask;
17484 /* Otherwise we can rely on the MODE to determine how many registers
17485 are being used by this argument. */
17486 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17487 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17490 unsigned HOST_WIDE_INT
17491 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17492 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17493 not_to_clear_mask |= mask;
17498 return not_to_clear_mask;
17501 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17502 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17503 are to be fully cleared, using the value in register CLEARING_REG if more
17504 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17505 the bits that needs to be cleared in caller-saved core registers, with
17506 SCRATCH_REG used as a scratch register for that clearing.
17508 NOTE: one of three following assertions must hold:
17509 - SCRATCH_REG is a low register
17510 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17511 in TO_CLEAR_BITMAP)
17512 - CLEARING_REG is a low register. */
17515 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17516 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17518 bool saved_clearing = false;
17519 rtx saved_clearing_reg = NULL_RTX;
17520 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17522 gcc_assert (arm_arch_cmse);
17524 if (!bitmap_empty_p (to_clear_bitmap))
17526 minregno = bitmap_first_set_bit (to_clear_bitmap);
17527 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17529 clearing_regno = REGNO (clearing_reg);
17531 /* Clear padding bits. */
17532 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17533 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17536 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17538 if (padding_bits_to_clear[i] == 0)
17541 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17542 CLEARING_REG as scratch. */
17544 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17546 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17547 such that we can use clearing_reg to clear the unused bits in the
17549 if ((clearing_regno > maxregno
17550 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17551 && !saved_clearing)
17553 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17554 emit_move_insn (scratch_reg, clearing_reg);
17555 saved_clearing = true;
17556 saved_clearing_reg = scratch_reg;
17558 scratch_reg = clearing_reg;
17561 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17562 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17563 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17565 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17566 mask = (~padding_bits_to_clear[i]) >> 16;
17567 rtx16 = gen_int_mode (16, SImode);
17568 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17570 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17572 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17574 if (saved_clearing)
17575 emit_move_insn (clearing_reg, saved_clearing_reg);
17578 /* Clear full registers. */
17580 /* If not marked for clearing, clearing_reg already does not contain
17582 if (clearing_regno <= maxregno
17583 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17585 emit_move_insn (clearing_reg, const0_rtx);
17586 emit_use (clearing_reg);
17587 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17590 for (regno = minregno; regno <= maxregno; regno++)
17592 if (!bitmap_bit_p (to_clear_bitmap, regno))
17595 if (IS_VFP_REGNUM (regno))
17597 /* If regno is an even vfp register and its successor is also to
17598 be cleared, use vmov. */
17599 if (TARGET_VFP_DOUBLE
17600 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17601 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17603 emit_move_insn (gen_rtx_REG (DFmode, regno),
17604 CONST1_RTX (DFmode));
17605 emit_use (gen_rtx_REG (DFmode, regno));
17610 emit_move_insn (gen_rtx_REG (SFmode, regno),
17611 CONST1_RTX (SFmode));
17612 emit_use (gen_rtx_REG (SFmode, regno));
17617 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17618 emit_use (gen_rtx_REG (SImode, regno));
17623 /* Clears caller saved registers not used to pass arguments before a
17624 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17625 registers is done in __gnu_cmse_nonsecure_call libcall.
17626 See libgcc/config/arm/cmse_nonsecure_call.S. */
17629 cmse_nonsecure_call_clear_caller_saved (void)
17633 FOR_EACH_BB_FN (bb, cfun)
17637 FOR_BB_INSNS (bb, insn)
17639 unsigned address_regnum, regno, maxregno =
17640 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17641 auto_sbitmap to_clear_bitmap (maxregno + 1);
17643 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17645 CUMULATIVE_ARGS args_so_far_v;
17646 cumulative_args_t args_so_far;
17647 tree arg_type, fntype;
17648 bool first_param = true;
17649 function_args_iterator args_iter;
17650 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17652 if (!NONDEBUG_INSN_P (insn))
17655 if (!CALL_P (insn))
17658 pat = PATTERN (insn);
17659 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17660 call = XVECEXP (pat, 0, 0);
17662 /* Get the real call RTX if the insn sets a value, ie. returns. */
17663 if (GET_CODE (call) == SET)
17664 call = SET_SRC (call);
17666 /* Check if it is a cmse_nonsecure_call. */
17667 unspec = XEXP (call, 0);
17668 if (GET_CODE (unspec) != UNSPEC
17669 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17672 /* Determine the caller-saved registers we need to clear. */
17673 bitmap_clear (to_clear_bitmap);
17674 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17676 /* Only look at the caller-saved floating point registers in case of
17677 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17678 lazy store and loads which clear both caller- and callee-saved
17680 if (TARGET_HARD_FLOAT_ABI)
17682 auto_sbitmap float_bitmap (maxregno + 1);
17684 bitmap_clear (float_bitmap);
17685 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17686 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17687 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17690 /* Make sure the register used to hold the function address is not
17692 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17693 gcc_assert (MEM_P (address));
17694 gcc_assert (REG_P (XEXP (address, 0)));
17695 address_regnum = REGNO (XEXP (address, 0));
17696 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17697 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17699 /* Set basic block of call insn so that df rescan is performed on
17700 insns inserted here. */
17701 set_block_for_insn (insn, bb);
17702 df_set_flags (DF_DEFER_INSN_RESCAN);
17705 /* Make sure the scheduler doesn't schedule other insns beyond
17707 emit_insn (gen_blockage ());
17709 /* Walk through all arguments and clear registers appropriately.
17711 fntype = TREE_TYPE (MEM_EXPR (address));
17712 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17714 args_so_far = pack_cumulative_args (&args_so_far_v);
17715 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17718 uint64_t to_clear_args_mask;
17720 if (VOID_TYPE_P (arg_type))
17723 function_arg_info arg (arg_type, /*named=*/true);
17725 /* ??? We should advance after processing the argument and pass
17726 the argument we're advancing past. */
17727 arm_function_arg_advance (args_so_far, arg);
17729 arg_rtx = arm_function_arg (args_so_far, arg);
17730 gcc_assert (REG_P (arg_rtx));
17732 = compute_not_to_clear_mask (arg_type, arg_rtx,
17734 &padding_bits_to_clear[0]);
17735 if (to_clear_args_mask)
17737 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17739 if (to_clear_args_mask & (1ULL << regno))
17740 bitmap_clear_bit (to_clear_bitmap, regno);
17744 first_param = false;
17747 /* We use right shift and left shift to clear the LSB of the address
17748 we jump to instead of using bic, to avoid having to use an extra
17749 register on Thumb-1. */
17750 clearing_reg = XEXP (address, 0);
17751 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17752 emit_insn (gen_rtx_SET (clearing_reg, shift));
17753 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17754 emit_insn (gen_rtx_SET (clearing_reg, shift));
17756 /* Clear caller-saved registers that leak before doing a non-secure
17758 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17759 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17760 NUM_ARG_REGS, ip_reg, clearing_reg);
17762 seq = get_insns ();
17764 emit_insn_before (seq, insn);
17769 /* Rewrite move insn into subtract of 0 if the condition codes will
17770 be useful in next conditional jump insn. */
17773 thumb1_reorg (void)
17777 FOR_EACH_BB_FN (bb, cfun)
17780 rtx cmp, op0, op1, set = NULL;
17781 rtx_insn *prev, *insn = BB_END (bb);
17782 bool insn_clobbered = false;
17784 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17785 insn = PREV_INSN (insn);
17787 /* Find the last cbranchsi4_insn in basic block BB. */
17788 if (insn == BB_HEAD (bb)
17789 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17792 /* Get the register with which we are comparing. */
17793 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17794 op0 = XEXP (cmp, 0);
17795 op1 = XEXP (cmp, 1);
17797 /* Check that comparison is against ZERO. */
17798 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17801 /* Find the first flag setting insn before INSN in basic block BB. */
17802 gcc_assert (insn != BB_HEAD (bb));
17803 for (prev = PREV_INSN (insn);
17805 && prev != BB_HEAD (bb)
17807 || DEBUG_INSN_P (prev)
17808 || ((set = single_set (prev)) != NULL
17809 && get_attr_conds (prev) == CONDS_NOCOND)));
17810 prev = PREV_INSN (prev))
17812 if (reg_set_p (op0, prev))
17813 insn_clobbered = true;
17816 /* Skip if op0 is clobbered by insn other than prev. */
17817 if (insn_clobbered)
17823 dest = SET_DEST (set);
17824 src = SET_SRC (set);
17825 if (!low_register_operand (dest, SImode)
17826 || !low_register_operand (src, SImode))
17829 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17830 in INSN. Both src and dest of the move insn are checked. */
17831 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17833 dest = copy_rtx (dest);
17834 src = copy_rtx (src);
17835 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17836 PATTERN (prev) = gen_rtx_SET (dest, src);
17837 INSN_CODE (prev) = -1;
17838 /* Set test register in INSN to dest. */
17839 XEXP (cmp, 0) = copy_rtx (dest);
17840 INSN_CODE (insn) = -1;
17845 /* Convert instructions to their cc-clobbering variant if possible, since
17846 that allows us to use smaller encodings. */
17849 thumb2_reorg (void)
17854 INIT_REG_SET (&live);
17856 /* We are freeing block_for_insn in the toplev to keep compatibility
17857 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17858 compute_bb_for_insn ();
17861 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17863 FOR_EACH_BB_FN (bb, cfun)
17865 if ((current_tune->disparage_flag_setting_t16_encodings
17866 == tune_params::DISPARAGE_FLAGS_ALL)
17867 && optimize_bb_for_speed_p (bb))
17871 Convert_Action action = SKIP;
17872 Convert_Action action_for_partial_flag_setting
17873 = ((current_tune->disparage_flag_setting_t16_encodings
17874 != tune_params::DISPARAGE_FLAGS_NEITHER)
17875 && optimize_bb_for_speed_p (bb))
17878 COPY_REG_SET (&live, DF_LR_OUT (bb));
17879 df_simulate_initialize_backwards (bb, &live);
17880 FOR_BB_INSNS_REVERSE (bb, insn)
17882 if (NONJUMP_INSN_P (insn)
17883 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17884 && GET_CODE (PATTERN (insn)) == SET)
17887 rtx pat = PATTERN (insn);
17888 rtx dst = XEXP (pat, 0);
17889 rtx src = XEXP (pat, 1);
17890 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17892 if (UNARY_P (src) || BINARY_P (src))
17893 op0 = XEXP (src, 0);
17895 if (BINARY_P (src))
17896 op1 = XEXP (src, 1);
17898 if (low_register_operand (dst, SImode))
17900 switch (GET_CODE (src))
17903 /* Adding two registers and storing the result
17904 in the first source is already a 16-bit
17906 if (rtx_equal_p (dst, op0)
17907 && register_operand (op1, SImode))
17910 if (low_register_operand (op0, SImode))
17912 /* ADDS <Rd>,<Rn>,<Rm> */
17913 if (low_register_operand (op1, SImode))
17915 /* ADDS <Rdn>,#<imm8> */
17916 /* SUBS <Rdn>,#<imm8> */
17917 else if (rtx_equal_p (dst, op0)
17918 && CONST_INT_P (op1)
17919 && IN_RANGE (INTVAL (op1), -255, 255))
17921 /* ADDS <Rd>,<Rn>,#<imm3> */
17922 /* SUBS <Rd>,<Rn>,#<imm3> */
17923 else if (CONST_INT_P (op1)
17924 && IN_RANGE (INTVAL (op1), -7, 7))
17927 /* ADCS <Rd>, <Rn> */
17928 else if (GET_CODE (XEXP (src, 0)) == PLUS
17929 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17930 && low_register_operand (XEXP (XEXP (src, 0), 1),
17932 && COMPARISON_P (op1)
17933 && cc_register (XEXP (op1, 0), VOIDmode)
17934 && maybe_get_arm_condition_code (op1) == ARM_CS
17935 && XEXP (op1, 1) == const0_rtx)
17940 /* RSBS <Rd>,<Rn>,#0
17941 Not handled here: see NEG below. */
17942 /* SUBS <Rd>,<Rn>,#<imm3>
17944 Not handled here: see PLUS above. */
17945 /* SUBS <Rd>,<Rn>,<Rm> */
17946 if (low_register_operand (op0, SImode)
17947 && low_register_operand (op1, SImode))
17952 /* MULS <Rdm>,<Rn>,<Rdm>
17953 As an exception to the rule, this is only used
17954 when optimizing for size since MULS is slow on all
17955 known implementations. We do not even want to use
17956 MULS in cold code, if optimizing for speed, so we
17957 test the global flag here. */
17958 if (!optimize_size)
17960 /* Fall through. */
17964 /* ANDS <Rdn>,<Rm> */
17965 if (rtx_equal_p (dst, op0)
17966 && low_register_operand (op1, SImode))
17967 action = action_for_partial_flag_setting;
17968 else if (rtx_equal_p (dst, op1)
17969 && low_register_operand (op0, SImode))
17970 action = action_for_partial_flag_setting == SKIP
17971 ? SKIP : SWAP_CONV;
17977 /* ASRS <Rdn>,<Rm> */
17978 /* LSRS <Rdn>,<Rm> */
17979 /* LSLS <Rdn>,<Rm> */
17980 if (rtx_equal_p (dst, op0)
17981 && low_register_operand (op1, SImode))
17982 action = action_for_partial_flag_setting;
17983 /* ASRS <Rd>,<Rm>,#<imm5> */
17984 /* LSRS <Rd>,<Rm>,#<imm5> */
17985 /* LSLS <Rd>,<Rm>,#<imm5> */
17986 else if (low_register_operand (op0, SImode)
17987 && CONST_INT_P (op1)
17988 && IN_RANGE (INTVAL (op1), 0, 31))
17989 action = action_for_partial_flag_setting;
17993 /* RORS <Rdn>,<Rm> */
17994 if (rtx_equal_p (dst, op0)
17995 && low_register_operand (op1, SImode))
17996 action = action_for_partial_flag_setting;
18000 /* MVNS <Rd>,<Rm> */
18001 if (low_register_operand (op0, SImode))
18002 action = action_for_partial_flag_setting;
18006 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
18007 if (low_register_operand (op0, SImode))
18012 /* MOVS <Rd>,#<imm8> */
18013 if (CONST_INT_P (src)
18014 && IN_RANGE (INTVAL (src), 0, 255))
18015 action = action_for_partial_flag_setting;
18019 /* MOVS and MOV<c> with registers have different
18020 encodings, so are not relevant here. */
18028 if (action != SKIP)
18030 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18031 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18034 if (action == SWAP_CONV)
18036 src = copy_rtx (src);
18037 XEXP (src, 0) = op1;
18038 XEXP (src, 1) = op0;
18039 pat = gen_rtx_SET (dst, src);
18040 vec = gen_rtvec (2, pat, clobber);
18042 else /* action == CONV */
18043 vec = gen_rtvec (2, pat, clobber);
18045 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
18046 INSN_CODE (insn) = -1;
18050 if (NONDEBUG_INSN_P (insn))
18051 df_simulate_one_insn_backwards (bb, insn, &live);
18055 CLEAR_REG_SET (&live);
18058 /* Gcc puts the pool in the wrong place for ARM, since we can only
18059 load addresses a limited distance around the pc. We do some
18060 special munging to move the constant pool values to the correct
18061 point in the code. */
18066 HOST_WIDE_INT address = 0;
18070 cmse_nonsecure_call_clear_caller_saved ();
18072 /* We cannot run the Thumb passes for thunks because there is no CFG. */
18073 if (cfun->is_thunk)
18075 else if (TARGET_THUMB1)
18077 else if (TARGET_THUMB2)
18080 /* Ensure all insns that must be split have been split at this point.
18081 Otherwise, the pool placement code below may compute incorrect
18082 insn lengths. Note that when optimizing, all insns have already
18083 been split at this point. */
18085 split_all_insns_noflow ();
18087 /* Make sure we do not attempt to create a literal pool even though it should
18088 no longer be necessary to create any. */
18089 if (arm_disable_literal_pool)
18092 minipool_fix_head = minipool_fix_tail = NULL;
18094 /* The first insn must always be a note, or the code below won't
18095 scan it properly. */
18096 insn = get_insns ();
18097 gcc_assert (NOTE_P (insn));
18100 /* Scan all the insns and record the operands that will need fixing. */
18101 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
18103 if (BARRIER_P (insn))
18104 push_minipool_barrier (insn, address);
18105 else if (INSN_P (insn))
18107 rtx_jump_table_data *table;
18109 note_invalid_constants (insn, address, true);
18110 address += get_attr_length (insn);
18112 /* If the insn is a vector jump, add the size of the table
18113 and skip the table. */
18114 if (tablejump_p (insn, NULL, &table))
18116 address += get_jump_table_size (table);
18120 else if (LABEL_P (insn))
18121 /* Add the worst-case padding due to alignment. We don't add
18122 the _current_ padding because the minipool insertions
18123 themselves might change it. */
18124 address += get_label_padding (insn);
18127 fix = minipool_fix_head;
18129 /* Now scan the fixups and perform the required changes. */
18134 Mfix * last_added_fix;
18135 Mfix * last_barrier = NULL;
18138 /* Skip any further barriers before the next fix. */
18139 while (fix && BARRIER_P (fix->insn))
18142 /* No more fixes. */
18146 last_added_fix = NULL;
18148 for (ftmp = fix; ftmp; ftmp = ftmp->next)
18150 if (BARRIER_P (ftmp->insn))
18152 if (ftmp->address >= minipool_vector_head->max_address)
18155 last_barrier = ftmp;
18157 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
18160 last_added_fix = ftmp; /* Keep track of the last fix added. */
18163 /* If we found a barrier, drop back to that; any fixes that we
18164 could have reached but come after the barrier will now go in
18165 the next mini-pool. */
18166 if (last_barrier != NULL)
18168 /* Reduce the refcount for those fixes that won't go into this
18170 for (fdel = last_barrier->next;
18171 fdel && fdel != ftmp;
18174 fdel->minipool->refcount--;
18175 fdel->minipool = NULL;
18178 ftmp = last_barrier;
18182 /* ftmp is first fix that we can't fit into this pool and
18183 there no natural barriers that we could use. Insert a
18184 new barrier in the code somewhere between the previous
18185 fix and this one, and arrange to jump around it. */
18186 HOST_WIDE_INT max_address;
18188 /* The last item on the list of fixes must be a barrier, so
18189 we can never run off the end of the list of fixes without
18190 last_barrier being set. */
18193 max_address = minipool_vector_head->max_address;
18194 /* Check that there isn't another fix that is in range that
18195 we couldn't fit into this pool because the pool was
18196 already too large: we need to put the pool before such an
18197 instruction. The pool itself may come just after the
18198 fix because create_fix_barrier also allows space for a
18199 jump instruction. */
18200 if (ftmp->address < max_address)
18201 max_address = ftmp->address + 1;
18203 last_barrier = create_fix_barrier (last_added_fix, max_address);
18206 assign_minipool_offsets (last_barrier);
18210 if (!BARRIER_P (ftmp->insn)
18211 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
18218 /* Scan over the fixes we have identified for this pool, fixing them
18219 up and adding the constants to the pool itself. */
18220 for (this_fix = fix; this_fix && ftmp != this_fix;
18221 this_fix = this_fix->next)
18222 if (!BARRIER_P (this_fix->insn))
18225 = plus_constant (Pmode,
18226 gen_rtx_LABEL_REF (VOIDmode,
18227 minipool_vector_label),
18228 this_fix->minipool->offset);
18229 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
18232 dump_minipool (last_barrier->insn);
18236 /* From now on we must synthesize any constants that we can't handle
18237 directly. This can happen if the RTL gets split during final
18238 instruction generation. */
18239 cfun->machine->after_arm_reorg = 1;
18241 /* Free the minipool memory. */
18242 obstack_free (&minipool_obstack, minipool_startobj);
18245 /* Routines to output assembly language. */
18247 /* Return string representation of passed in real value. */
18248 static const char *
18249 fp_const_from_val (REAL_VALUE_TYPE *r)
18251 if (!fp_consts_inited)
18254 gcc_assert (real_equal (r, &value_fp0));
18258 /* OPERANDS[0] is the entire list of insns that constitute pop,
18259 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18260 is in the list, UPDATE is true iff the list contains explicit
18261 update of base register. */
18263 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
18269 const char *conditional;
18270 int num_saves = XVECLEN (operands[0], 0);
18271 unsigned int regno;
18272 unsigned int regno_base = REGNO (operands[1]);
18273 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18276 offset += update ? 1 : 0;
18277 offset += return_pc ? 1 : 0;
18279 /* Is the base register in the list? */
18280 for (i = offset; i < num_saves; i++)
18282 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18283 /* If SP is in the list, then the base register must be SP. */
18284 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18285 /* If base register is in the list, there must be no explicit update. */
18286 if (regno == regno_base)
18287 gcc_assert (!update);
18290 conditional = reverse ? "%?%D0" : "%?%d0";
18291 /* Can't use POP if returning from an interrupt. */
18292 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18293 sprintf (pattern, "pop%s\t{", conditional);
18296 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18297 It's just a convention, their semantics are identical. */
18298 if (regno_base == SP_REGNUM)
18299 sprintf (pattern, "ldmfd%s\t", conditional);
18301 sprintf (pattern, "ldmia%s\t", conditional);
18303 sprintf (pattern, "ldm%s\t", conditional);
18305 strcat (pattern, reg_names[regno_base]);
18307 strcat (pattern, "!, {");
18309 strcat (pattern, ", {");
18312 /* Output the first destination register. */
18314 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18316 /* Output the rest of the destination registers. */
18317 for (i = offset + 1; i < num_saves; i++)
18319 strcat (pattern, ", ");
18321 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18324 strcat (pattern, "}");
18326 if (interrupt_p && return_pc)
18327 strcat (pattern, "^");
18329 output_asm_insn (pattern, &cond);
18333 /* Output the assembly for a store multiple. */
18336 vfp_output_vstmd (rtx * operands)
18342 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18343 ? XEXP (operands[0], 0)
18344 : XEXP (XEXP (operands[0], 0), 0);
18345 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18348 strcpy (pattern, "vpush%?.64\t{%P1");
18350 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18352 p = strlen (pattern);
18354 gcc_assert (REG_P (operands[1]));
18356 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18357 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18359 p += sprintf (&pattern[p], ", d%d", base + i);
18361 strcpy (&pattern[p], "}");
18363 output_asm_insn (pattern, operands);
18368 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18369 number of bytes pushed. */
18372 vfp_emit_fstmd (int base_reg, int count)
18379 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18380 register pairs are stored by a store multiple insn. We avoid this
18381 by pushing an extra pair. */
18382 if (count == 2 && !arm_arch6)
18384 if (base_reg == LAST_VFP_REGNUM - 3)
18389 /* FSTMD may not store more than 16 doubleword registers at once. Split
18390 larger stores into multiple parts (up to a maximum of two, in
18395 /* NOTE: base_reg is an internal register number, so each D register
18397 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18398 saved += vfp_emit_fstmd (base_reg, 16);
18402 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18403 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18405 reg = gen_rtx_REG (DFmode, base_reg);
18408 XVECEXP (par, 0, 0)
18409 = gen_rtx_SET (gen_frame_mem
18411 gen_rtx_PRE_MODIFY (Pmode,
18414 (Pmode, stack_pointer_rtx,
18417 gen_rtx_UNSPEC (BLKmode,
18418 gen_rtvec (1, reg),
18419 UNSPEC_PUSH_MULT));
18421 tmp = gen_rtx_SET (stack_pointer_rtx,
18422 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18423 RTX_FRAME_RELATED_P (tmp) = 1;
18424 XVECEXP (dwarf, 0, 0) = tmp;
18426 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18427 RTX_FRAME_RELATED_P (tmp) = 1;
18428 XVECEXP (dwarf, 0, 1) = tmp;
18430 for (i = 1; i < count; i++)
18432 reg = gen_rtx_REG (DFmode, base_reg);
18434 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18436 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18437 plus_constant (Pmode,
18441 RTX_FRAME_RELATED_P (tmp) = 1;
18442 XVECEXP (dwarf, 0, i + 1) = tmp;
18445 par = emit_insn (par);
18446 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18447 RTX_FRAME_RELATED_P (par) = 1;
18452 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18453 has the cmse_nonsecure_call attribute and returns false otherwise. */
18456 detect_cmse_nonsecure_call (tree addr)
18461 tree fntype = TREE_TYPE (addr);
18462 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18463 TYPE_ATTRIBUTES (fntype)))
18469 /* Emit a call instruction with pattern PAT. ADDR is the address of
18470 the call target. */
18473 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18477 insn = emit_call_insn (pat);
18479 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18480 If the call might use such an entry, add a use of the PIC register
18481 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18482 if (TARGET_VXWORKS_RTP
18485 && GET_CODE (addr) == SYMBOL_REF
18486 && (SYMBOL_REF_DECL (addr)
18487 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18488 : !SYMBOL_REF_LOCAL_P (addr)))
18490 require_pic_register (NULL_RTX, false /*compute_now*/);
18491 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18496 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
18497 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
18500 if (TARGET_AAPCS_BASED)
18502 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18503 linker. We need to add an IP clobber to allow setting
18504 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18505 is not needed since it's a fixed register. */
18506 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18507 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18511 /* Output a 'call' insn. */
18513 output_call (rtx *operands)
18515 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18517 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18518 if (REGNO (operands[0]) == LR_REGNUM)
18520 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18521 output_asm_insn ("mov%?\t%0, %|lr", operands);
18524 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18526 if (TARGET_INTERWORK || arm_arch4t)
18527 output_asm_insn ("bx%?\t%0", operands);
18529 output_asm_insn ("mov%?\t%|pc, %0", operands);
18534 /* Output a move from arm registers to arm registers of a long double
18535 OPERANDS[0] is the destination.
18536 OPERANDS[1] is the source. */
18538 output_mov_long_double_arm_from_arm (rtx *operands)
18540 /* We have to be careful here because the two might overlap. */
18541 int dest_start = REGNO (operands[0]);
18542 int src_start = REGNO (operands[1]);
18546 if (dest_start < src_start)
18548 for (i = 0; i < 3; i++)
18550 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18551 ops[1] = gen_rtx_REG (SImode, src_start + i);
18552 output_asm_insn ("mov%?\t%0, %1", ops);
18557 for (i = 2; i >= 0; i--)
18559 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18560 ops[1] = gen_rtx_REG (SImode, src_start + i);
18561 output_asm_insn ("mov%?\t%0, %1", ops);
18569 arm_emit_movpair (rtx dest, rtx src)
18571 /* If the src is an immediate, simplify it. */
18572 if (CONST_INT_P (src))
18574 HOST_WIDE_INT val = INTVAL (src);
18575 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18576 if ((val >> 16) & 0x0000ffff)
18578 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18580 GEN_INT ((val >> 16) & 0x0000ffff));
18581 rtx_insn *insn = get_last_insn ();
18582 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18586 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18587 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18588 rtx_insn *insn = get_last_insn ();
18589 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18592 /* Output a move between double words. It must be REG<-MEM
18595 output_move_double (rtx *operands, bool emit, int *count)
18597 enum rtx_code code0 = GET_CODE (operands[0]);
18598 enum rtx_code code1 = GET_CODE (operands[1]);
18603 /* The only case when this might happen is when
18604 you are looking at the length of a DImode instruction
18605 that has an invalid constant in it. */
18606 if (code0 == REG && code1 != MEM)
18608 gcc_assert (!emit);
18615 unsigned int reg0 = REGNO (operands[0]);
18617 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18619 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18621 switch (GET_CODE (XEXP (operands[1], 0)))
18628 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18629 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18631 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18636 gcc_assert (TARGET_LDRD);
18638 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18645 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18647 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18655 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18657 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18662 gcc_assert (TARGET_LDRD);
18664 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18669 /* Autoicrement addressing modes should never have overlapping
18670 base and destination registers, and overlapping index registers
18671 are already prohibited, so this doesn't need to worry about
18673 otherops[0] = operands[0];
18674 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18675 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18677 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18679 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18681 /* Registers overlap so split out the increment. */
18684 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18685 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18692 /* Use a single insn if we can.
18693 FIXME: IWMMXT allows offsets larger than ldrd can
18694 handle, fix these up with a pair of ldr. */
18696 || !CONST_INT_P (otherops[2])
18697 || (INTVAL (otherops[2]) > -256
18698 && INTVAL (otherops[2]) < 256))
18701 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18707 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18708 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18718 /* Use a single insn if we can.
18719 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18720 fix these up with a pair of ldr. */
18722 || !CONST_INT_P (otherops[2])
18723 || (INTVAL (otherops[2]) > -256
18724 && INTVAL (otherops[2]) < 256))
18727 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18733 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18734 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18744 /* We might be able to use ldrd %0, %1 here. However the range is
18745 different to ldr/adr, and it is broken on some ARMv7-M
18746 implementations. */
18747 /* Use the second register of the pair to avoid problematic
18749 otherops[1] = operands[1];
18751 output_asm_insn ("adr%?\t%0, %1", otherops);
18752 operands[1] = otherops[0];
18756 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18758 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18765 /* ??? This needs checking for thumb2. */
18767 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18768 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18770 otherops[0] = operands[0];
18771 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18772 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18774 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18776 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18778 switch ((int) INTVAL (otherops[2]))
18782 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18788 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18794 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18798 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18799 operands[1] = otherops[0];
18801 && (REG_P (otherops[2])
18803 || (CONST_INT_P (otherops[2])
18804 && INTVAL (otherops[2]) > -256
18805 && INTVAL (otherops[2]) < 256)))
18807 if (reg_overlap_mentioned_p (operands[0],
18810 /* Swap base and index registers over to
18811 avoid a conflict. */
18812 std::swap (otherops[1], otherops[2]);
18814 /* If both registers conflict, it will usually
18815 have been fixed by a splitter. */
18816 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18817 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18821 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18822 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18829 otherops[0] = operands[0];
18831 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18836 if (CONST_INT_P (otherops[2]))
18840 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18841 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18843 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18849 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18855 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18862 return "ldrd%?\t%0, [%1]";
18864 return "ldmia%?\t%1, %M0";
18868 otherops[1] = adjust_address (operands[1], SImode, 4);
18869 /* Take care of overlapping base/data reg. */
18870 if (reg_mentioned_p (operands[0], operands[1]))
18874 output_asm_insn ("ldr%?\t%0, %1", otherops);
18875 output_asm_insn ("ldr%?\t%0, %1", operands);
18885 output_asm_insn ("ldr%?\t%0, %1", operands);
18886 output_asm_insn ("ldr%?\t%0, %1", otherops);
18896 /* Constraints should ensure this. */
18897 gcc_assert (code0 == MEM && code1 == REG);
18898 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18899 || (TARGET_ARM && TARGET_LDRD));
18901 /* For TARGET_ARM the first source register of an STRD
18902 must be even. This is usually the case for double-word
18903 values but user assembly constraints can force an odd
18904 starting register. */
18905 bool allow_strd = TARGET_LDRD
18906 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18907 switch (GET_CODE (XEXP (operands[0], 0)))
18913 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18915 output_asm_insn ("stm%?\t%m0, %M1", operands);
18920 gcc_assert (allow_strd);
18922 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18929 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18931 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18939 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18941 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18946 gcc_assert (allow_strd);
18948 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18953 otherops[0] = operands[1];
18954 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18955 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18957 /* IWMMXT allows offsets larger than strd can handle,
18958 fix these up with a pair of str. */
18960 && CONST_INT_P (otherops[2])
18961 && (INTVAL(otherops[2]) <= -256
18962 || INTVAL(otherops[2]) >= 256))
18964 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18968 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18969 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18978 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18979 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18985 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18988 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18993 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18998 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18999 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19001 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
19005 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
19012 output_asm_insn ("stmda%?\t%m0, %M1", operands);
19019 output_asm_insn ("stmib%?\t%m0, %M1", operands);
19024 && (REG_P (otherops[2])
19026 || (CONST_INT_P (otherops[2])
19027 && INTVAL (otherops[2]) > -256
19028 && INTVAL (otherops[2]) < 256)))
19030 otherops[0] = operands[1];
19031 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
19033 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
19039 otherops[0] = adjust_address (operands[0], SImode, 4);
19040 otherops[1] = operands[1];
19043 output_asm_insn ("str%?\t%1, %0", operands);
19044 output_asm_insn ("str%?\t%H1, %0", otherops);
19054 /* Output a move, load or store for quad-word vectors in ARM registers. Only
19055 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
19058 output_move_quad (rtx *operands)
19060 if (REG_P (operands[0]))
19062 /* Load, or reg->reg move. */
19064 if (MEM_P (operands[1]))
19066 switch (GET_CODE (XEXP (operands[1], 0)))
19069 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19074 output_asm_insn ("adr%?\t%0, %1", operands);
19075 output_asm_insn ("ldmia%?\t%0, %M0", operands);
19079 gcc_unreachable ();
19087 gcc_assert (REG_P (operands[1]));
19089 dest = REGNO (operands[0]);
19090 src = REGNO (operands[1]);
19092 /* This seems pretty dumb, but hopefully GCC won't try to do it
19095 for (i = 0; i < 4; i++)
19097 ops[0] = gen_rtx_REG (SImode, dest + i);
19098 ops[1] = gen_rtx_REG (SImode, src + i);
19099 output_asm_insn ("mov%?\t%0, %1", ops);
19102 for (i = 3; i >= 0; i--)
19104 ops[0] = gen_rtx_REG (SImode, dest + i);
19105 ops[1] = gen_rtx_REG (SImode, src + i);
19106 output_asm_insn ("mov%?\t%0, %1", ops);
19112 gcc_assert (MEM_P (operands[0]));
19113 gcc_assert (REG_P (operands[1]));
19114 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
19116 switch (GET_CODE (XEXP (operands[0], 0)))
19119 output_asm_insn ("stm%?\t%m0, %M1", operands);
19123 gcc_unreachable ();
19130 /* Output a VFP load or store instruction. */
19133 output_move_vfp (rtx *operands)
19135 rtx reg, mem, addr, ops[2];
19136 int load = REG_P (operands[0]);
19137 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
19138 int sp = (!TARGET_VFP_FP16INST
19139 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
19140 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
19145 reg = operands[!load];
19146 mem = operands[load];
19148 mode = GET_MODE (reg);
19150 gcc_assert (REG_P (reg));
19151 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
19152 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
19158 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
19159 gcc_assert (MEM_P (mem));
19161 addr = XEXP (mem, 0);
19163 switch (GET_CODE (addr))
19166 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19167 ops[0] = XEXP (addr, 0);
19172 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19173 ops[0] = XEXP (addr, 0);
19178 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
19184 sprintf (buff, templ,
19185 load ? "ld" : "st",
19186 dp ? "64" : sp ? "32" : "16",
19188 integer_p ? "\t%@ int" : "");
19189 output_asm_insn (buff, ops);
19194 /* Output a Neon double-word or quad-word load or store, or a load
19195 or store for larger structure modes.
19197 WARNING: The ordering of elements is weird in big-endian mode,
19198 because the EABI requires that vectors stored in memory appear
19199 as though they were stored by a VSTM, as required by the EABI.
19200 GCC RTL defines element ordering based on in-memory order.
19201 This can be different from the architectural ordering of elements
19202 within a NEON register. The intrinsics defined in arm_neon.h use the
19203 NEON register element ordering, not the GCC RTL element ordering.
19205 For example, the in-memory ordering of a big-endian a quadword
19206 vector with 16-bit elements when stored from register pair {d0,d1}
19207 will be (lowest address first, d0[N] is NEON register element N):
19209 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19211 When necessary, quadword registers (dN, dN+1) are moved to ARM
19212 registers from rN in the order:
19214 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19216 So that STM/LDM can be used on vectors in ARM registers, and the
19217 same memory layout will result as if VSTM/VLDM were used.
19219 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19220 possible, which allows use of appropriate alignment tags.
19221 Note that the choice of "64" is independent of the actual vector
19222 element size; this size simply ensures that the behavior is
19223 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19225 Due to limitations of those instructions, use of VST1.64/VLD1.64
19226 is not possible if:
19227 - the address contains PRE_DEC, or
19228 - the mode refers to more than 4 double-word registers
19230 In those cases, it would be possible to replace VSTM/VLDM by a
19231 sequence of instructions; this is not currently implemented since
19232 this is not certain to actually improve performance. */
19235 output_move_neon (rtx *operands)
19237 rtx reg, mem, addr, ops[2];
19238 int regno, nregs, load = REG_P (operands[0]);
19243 reg = operands[!load];
19244 mem = operands[load];
19246 mode = GET_MODE (reg);
19248 gcc_assert (REG_P (reg));
19249 regno = REGNO (reg);
19250 nregs = REG_NREGS (reg) / 2;
19251 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
19252 || NEON_REGNO_OK_FOR_QUAD (regno));
19253 gcc_assert (VALID_NEON_DREG_MODE (mode)
19254 || VALID_NEON_QREG_MODE (mode)
19255 || VALID_NEON_STRUCT_MODE (mode));
19256 gcc_assert (MEM_P (mem));
19258 addr = XEXP (mem, 0);
19260 /* Strip off const from addresses like (const (plus (...))). */
19261 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19262 addr = XEXP (addr, 0);
19264 switch (GET_CODE (addr))
19267 /* We have to use vldm / vstm for too-large modes. */
19270 templ = "v%smia%%?\t%%0!, %%h1";
19271 ops[0] = XEXP (addr, 0);
19275 templ = "v%s1.64\t%%h1, %%A0";
19282 /* We have to use vldm / vstm in this case, since there is no
19283 pre-decrement form of the vld1 / vst1 instructions. */
19284 templ = "v%smdb%%?\t%%0!, %%h1";
19285 ops[0] = XEXP (addr, 0);
19290 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19291 gcc_unreachable ();
19294 /* We have to use vldm / vstm for too-large modes. */
19298 templ = "v%smia%%?\t%%m0, %%h1";
19300 templ = "v%s1.64\t%%h1, %%A0";
19306 /* Fall through. */
19312 for (i = 0; i < nregs; i++)
19314 /* We're only using DImode here because it's a convenient size. */
19315 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19316 ops[1] = adjust_address (mem, DImode, 8 * i);
19317 if (reg_overlap_mentioned_p (ops[0], mem))
19319 gcc_assert (overlap == -1);
19324 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19325 output_asm_insn (buff, ops);
19330 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19331 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19332 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19333 output_asm_insn (buff, ops);
19340 gcc_unreachable ();
19343 sprintf (buff, templ, load ? "ld" : "st");
19344 output_asm_insn (buff, ops);
19349 /* Compute and return the length of neon_mov<mode>, where <mode> is
19350 one of VSTRUCT modes: EI, OI, CI or XI. */
19352 arm_attr_length_move_neon (rtx_insn *insn)
19354 rtx reg, mem, addr;
19358 extract_insn_cached (insn);
19360 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19362 mode = GET_MODE (recog_data.operand[0]);
19373 gcc_unreachable ();
19377 load = REG_P (recog_data.operand[0]);
19378 reg = recog_data.operand[!load];
19379 mem = recog_data.operand[load];
19381 gcc_assert (MEM_P (mem));
19383 addr = XEXP (mem, 0);
19385 /* Strip off const from addresses like (const (plus (...))). */
19386 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19387 addr = XEXP (addr, 0);
19389 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19391 int insns = REG_NREGS (reg) / 2;
19398 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19402 arm_address_offset_is_imm (rtx_insn *insn)
19406 extract_insn_cached (insn);
19408 if (REG_P (recog_data.operand[0]))
19411 mem = recog_data.operand[0];
19413 gcc_assert (MEM_P (mem));
19415 addr = XEXP (mem, 0);
19418 || (GET_CODE (addr) == PLUS
19419 && REG_P (XEXP (addr, 0))
19420 && CONST_INT_P (XEXP (addr, 1))))
19426 /* Output an ADD r, s, #n where n may be too big for one instruction.
19427 If adding zero to one register, output nothing. */
19429 output_add_immediate (rtx *operands)
19431 HOST_WIDE_INT n = INTVAL (operands[2]);
19433 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19436 output_multi_immediate (operands,
19437 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19440 output_multi_immediate (operands,
19441 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19448 /* Output a multiple immediate operation.
19449 OPERANDS is the vector of operands referred to in the output patterns.
19450 INSTR1 is the output pattern to use for the first constant.
19451 INSTR2 is the output pattern to use for subsequent constants.
19452 IMMED_OP is the index of the constant slot in OPERANDS.
19453 N is the constant value. */
19454 static const char *
19455 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19456 int immed_op, HOST_WIDE_INT n)
19458 #if HOST_BITS_PER_WIDE_INT > 32
19464 /* Quick and easy output. */
19465 operands[immed_op] = const0_rtx;
19466 output_asm_insn (instr1, operands);
19471 const char * instr = instr1;
19473 /* Note that n is never zero here (which would give no output). */
19474 for (i = 0; i < 32; i += 2)
19478 operands[immed_op] = GEN_INT (n & (255 << i));
19479 output_asm_insn (instr, operands);
19489 /* Return the name of a shifter operation. */
19490 static const char *
19491 arm_shift_nmem(enum rtx_code code)
19496 return ARM_LSL_NAME;
19512 /* Return the appropriate ARM instruction for the operation code.
19513 The returned result should not be overwritten. OP is the rtx of the
19514 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19517 arithmetic_instr (rtx op, int shift_first_arg)
19519 switch (GET_CODE (op))
19525 return shift_first_arg ? "rsb" : "sub";
19540 return arm_shift_nmem(GET_CODE(op));
19543 gcc_unreachable ();
19547 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19548 for the operation code. The returned result should not be overwritten.
19549 OP is the rtx code of the shift.
19550 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19552 static const char *
19553 shift_op (rtx op, HOST_WIDE_INT *amountp)
19556 enum rtx_code code = GET_CODE (op);
19561 if (!CONST_INT_P (XEXP (op, 1)))
19563 output_operand_lossage ("invalid shift operand");
19568 *amountp = 32 - INTVAL (XEXP (op, 1));
19576 mnem = arm_shift_nmem(code);
19577 if (CONST_INT_P (XEXP (op, 1)))
19579 *amountp = INTVAL (XEXP (op, 1));
19581 else if (REG_P (XEXP (op, 1)))
19588 output_operand_lossage ("invalid shift operand");
19594 /* We never have to worry about the amount being other than a
19595 power of 2, since this case can never be reloaded from a reg. */
19596 if (!CONST_INT_P (XEXP (op, 1)))
19598 output_operand_lossage ("invalid shift operand");
19602 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19604 /* Amount must be a power of two. */
19605 if (*amountp & (*amountp - 1))
19607 output_operand_lossage ("invalid shift operand");
19611 *amountp = exact_log2 (*amountp);
19612 gcc_assert (IN_RANGE (*amountp, 0, 31));
19613 return ARM_LSL_NAME;
19616 output_operand_lossage ("invalid shift operand");
19620 /* This is not 100% correct, but follows from the desire to merge
19621 multiplication by a power of 2 with the recognizer for a
19622 shift. >=32 is not a valid shift for "lsl", so we must try and
19623 output a shift that produces the correct arithmetical result.
19624 Using lsr #32 is identical except for the fact that the carry bit
19625 is not set correctly if we set the flags; but we never use the
19626 carry bit from such an operation, so we can ignore that. */
19627 if (code == ROTATERT)
19628 /* Rotate is just modulo 32. */
19630 else if (*amountp != (*amountp & 31))
19632 if (code == ASHIFT)
19637 /* Shifts of 0 are no-ops. */
19644 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19645 because /bin/as is horribly restrictive. The judgement about
19646 whether or not each character is 'printable' (and can be output as
19647 is) or not (and must be printed with an octal escape) must be made
19648 with reference to the *host* character set -- the situation is
19649 similar to that discussed in the comments above pp_c_char in
19650 c-pretty-print.c. */
19652 #define MAX_ASCII_LEN 51
19655 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19658 int len_so_far = 0;
19660 fputs ("\t.ascii\t\"", stream);
19662 for (i = 0; i < len; i++)
19666 if (len_so_far >= MAX_ASCII_LEN)
19668 fputs ("\"\n\t.ascii\t\"", stream);
19674 if (c == '\\' || c == '\"')
19676 putc ('\\', stream);
19684 fprintf (stream, "\\%03o", c);
19689 fputs ("\"\n", stream);
19693 /* Compute the register save mask for registers 0 through 12
19694 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19696 static unsigned long
19697 arm_compute_save_reg0_reg12_mask (void)
19699 unsigned long func_type = arm_current_func_type ();
19700 unsigned long save_reg_mask = 0;
19703 if (IS_INTERRUPT (func_type))
19705 unsigned int max_reg;
19706 /* Interrupt functions must not corrupt any registers,
19707 even call clobbered ones. If this is a leaf function
19708 we can just examine the registers used by the RTL, but
19709 otherwise we have to assume that whatever function is
19710 called might clobber anything, and so we have to save
19711 all the call-clobbered registers as well. */
19712 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19713 /* FIQ handlers have registers r8 - r12 banked, so
19714 we only need to check r0 - r7, Normal ISRs only
19715 bank r14 and r15, so we must check up to r12.
19716 r13 is the stack pointer which is always preserved,
19717 so we do not need to consider it here. */
19722 for (reg = 0; reg <= max_reg; reg++)
19723 if (df_regs_ever_live_p (reg)
19724 || (! crtl->is_leaf && call_used_regs[reg]))
19725 save_reg_mask |= (1 << reg);
19727 /* Also save the pic base register if necessary. */
19728 if (PIC_REGISTER_MAY_NEED_SAVING
19729 && crtl->uses_pic_offset_table)
19730 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19732 else if (IS_VOLATILE(func_type))
19734 /* For noreturn functions we historically omitted register saves
19735 altogether. However this really messes up debugging. As a
19736 compromise save just the frame pointers. Combined with the link
19737 register saved elsewhere this should be sufficient to get
19739 if (frame_pointer_needed)
19740 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19741 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19742 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19743 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19744 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19748 /* In the normal case we only need to save those registers
19749 which are call saved and which are used by this function. */
19750 for (reg = 0; reg <= 11; reg++)
19751 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19752 save_reg_mask |= (1 << reg);
19754 /* Handle the frame pointer as a special case. */
19755 if (frame_pointer_needed)
19756 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19758 /* If we aren't loading the PIC register,
19759 don't stack it even though it may be live. */
19760 if (PIC_REGISTER_MAY_NEED_SAVING
19761 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19762 || crtl->uses_pic_offset_table))
19763 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19765 /* The prologue will copy SP into R0, so save it. */
19766 if (IS_STACKALIGN (func_type))
19767 save_reg_mask |= 1;
19770 /* Save registers so the exception handler can modify them. */
19771 if (crtl->calls_eh_return)
19777 reg = EH_RETURN_DATA_REGNO (i);
19778 if (reg == INVALID_REGNUM)
19780 save_reg_mask |= 1 << reg;
19784 return save_reg_mask;
19787 /* Return true if r3 is live at the start of the function. */
19790 arm_r3_live_at_start_p (void)
19792 /* Just look at cfg info, which is still close enough to correct at this
19793 point. This gives false positives for broken functions that might use
19794 uninitialized data that happens to be allocated in r3, but who cares? */
19795 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19798 /* Compute the number of bytes used to store the static chain register on the
19799 stack, above the stack frame. We need to know this accurately to get the
19800 alignment of the rest of the stack frame correct. */
19803 arm_compute_static_chain_stack_bytes (void)
19805 /* Once the value is updated from the init value of -1, do not
19807 if (cfun->machine->static_chain_stack_bytes != -1)
19808 return cfun->machine->static_chain_stack_bytes;
19810 /* See the defining assertion in arm_expand_prologue. */
19811 if (IS_NESTED (arm_current_func_type ())
19812 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19813 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19814 || flag_stack_clash_protection)
19815 && !df_regs_ever_live_p (LR_REGNUM)))
19816 && arm_r3_live_at_start_p ()
19817 && crtl->args.pretend_args_size == 0)
19823 /* Compute a bit mask of which core registers need to be
19824 saved on the stack for the current function.
19825 This is used by arm_compute_frame_layout, which may add extra registers. */
19827 static unsigned long
19828 arm_compute_save_core_reg_mask (void)
19830 unsigned int save_reg_mask = 0;
19831 unsigned long func_type = arm_current_func_type ();
19834 if (IS_NAKED (func_type))
19835 /* This should never really happen. */
19838 /* If we are creating a stack frame, then we must save the frame pointer,
19839 IP (which will hold the old stack pointer), LR and the PC. */
19840 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19842 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19845 | (1 << PC_REGNUM);
19847 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19849 /* Decide if we need to save the link register.
19850 Interrupt routines have their own banked link register,
19851 so they never need to save it.
19852 Otherwise if we do not use the link register we do not need to save
19853 it. If we are pushing other registers onto the stack however, we
19854 can save an instruction in the epilogue by pushing the link register
19855 now and then popping it back into the PC. This incurs extra memory
19856 accesses though, so we only do it when optimizing for size, and only
19857 if we know that we will not need a fancy return sequence. */
19858 if (df_regs_ever_live_p (LR_REGNUM)
19861 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19862 && !crtl->tail_call_emit
19863 && !crtl->calls_eh_return))
19864 save_reg_mask |= 1 << LR_REGNUM;
19866 if (cfun->machine->lr_save_eliminated)
19867 save_reg_mask &= ~ (1 << LR_REGNUM);
19869 if (TARGET_REALLY_IWMMXT
19870 && ((bit_count (save_reg_mask)
19871 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19872 arm_compute_static_chain_stack_bytes())
19875 /* The total number of registers that are going to be pushed
19876 onto the stack is odd. We need to ensure that the stack
19877 is 64-bit aligned before we start to save iWMMXt registers,
19878 and also before we start to create locals. (A local variable
19879 might be a double or long long which we will load/store using
19880 an iWMMXt instruction). Therefore we need to push another
19881 ARM register, so that the stack will be 64-bit aligned. We
19882 try to avoid using the arg registers (r0 -r3) as they might be
19883 used to pass values in a tail call. */
19884 for (reg = 4; reg <= 12; reg++)
19885 if ((save_reg_mask & (1 << reg)) == 0)
19889 save_reg_mask |= (1 << reg);
19892 cfun->machine->sibcall_blocked = 1;
19893 save_reg_mask |= (1 << 3);
19897 /* We may need to push an additional register for use initializing the
19898 PIC base register. */
19899 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19900 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19902 reg = thumb_find_work_register (1 << 4);
19903 if (!call_used_regs[reg])
19904 save_reg_mask |= (1 << reg);
19907 return save_reg_mask;
19910 /* Compute a bit mask of which core registers need to be
19911 saved on the stack for the current function. */
19912 static unsigned long
19913 thumb1_compute_save_core_reg_mask (void)
19915 unsigned long mask;
19919 for (reg = 0; reg < 12; reg ++)
19920 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19923 /* Handle the frame pointer as a special case. */
19924 if (frame_pointer_needed)
19925 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19928 && !TARGET_SINGLE_PIC_BASE
19929 && arm_pic_register != INVALID_REGNUM
19930 && crtl->uses_pic_offset_table)
19931 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19933 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19934 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19935 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19937 /* LR will also be pushed if any lo regs are pushed. */
19938 if (mask & 0xff || thumb_force_lr_save ())
19939 mask |= (1 << LR_REGNUM);
19941 bool call_clobbered_scratch
19942 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19943 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19945 /* Make sure we have a low work register if we need one. We will
19946 need one if we are going to push a high register, but we are not
19947 currently intending to push a low register. However if both the
19948 prologue and epilogue have a spare call-clobbered low register,
19949 then we won't need to find an additional work register. It does
19950 not need to be the same register in the prologue and
19952 if ((mask & 0xff) == 0
19953 && !call_clobbered_scratch
19954 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19956 /* Use thumb_find_work_register to choose which register
19957 we will use. If the register is live then we will
19958 have to push it. Use LAST_LO_REGNUM as our fallback
19959 choice for the register to select. */
19960 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19961 /* Make sure the register returned by thumb_find_work_register is
19962 not part of the return value. */
19963 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19964 reg = LAST_LO_REGNUM;
19966 if (callee_saved_reg_p (reg))
19970 /* The 504 below is 8 bytes less than 512 because there are two possible
19971 alignment words. We can't tell here if they will be present or not so we
19972 have to play it safe and assume that they are. */
19973 if ((CALLER_INTERWORKING_SLOT_SIZE +
19974 ROUND_UP_WORD (get_frame_size ()) +
19975 crtl->outgoing_args_size) >= 504)
19977 /* This is the same as the code in thumb1_expand_prologue() which
19978 determines which register to use for stack decrement. */
19979 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19980 if (mask & (1 << reg))
19983 if (reg > LAST_LO_REGNUM)
19985 /* Make sure we have a register available for stack decrement. */
19986 mask |= 1 << LAST_LO_REGNUM;
19994 /* Return the number of bytes required to save VFP registers. */
19996 arm_get_vfp_saved_size (void)
19998 unsigned int regno;
20003 /* Space for saved VFP registers. */
20004 if (TARGET_HARD_FLOAT)
20007 for (regno = FIRST_VFP_REGNUM;
20008 regno < LAST_VFP_REGNUM;
20011 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
20012 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
20016 /* Workaround ARM10 VFPr1 bug. */
20017 if (count == 2 && !arm_arch6)
20019 saved += count * 8;
20028 if (count == 2 && !arm_arch6)
20030 saved += count * 8;
20037 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
20038 everything bar the final return instruction. If simple_return is true,
20039 then do not output epilogue, because it has already been emitted in RTL.
20041 Note: do not forget to update length attribute of corresponding insn pattern
20042 when changing assembly output (eg. length attribute of
20043 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20044 register clearing sequences). */
20046 output_return_instruction (rtx operand, bool really_return, bool reverse,
20047 bool simple_return)
20049 char conditional[10];
20052 unsigned long live_regs_mask;
20053 unsigned long func_type;
20054 arm_stack_offsets *offsets;
20056 func_type = arm_current_func_type ();
20058 if (IS_NAKED (func_type))
20061 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
20063 /* If this function was declared non-returning, and we have
20064 found a tail call, then we have to trust that the called
20065 function won't return. */
20070 /* Otherwise, trap an attempted return by aborting. */
20072 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
20074 assemble_external_libcall (ops[1]);
20075 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
20081 gcc_assert (!cfun->calls_alloca || really_return);
20083 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
20085 cfun->machine->return_used_this_function = 1;
20087 offsets = arm_get_frame_offsets ();
20088 live_regs_mask = offsets->saved_regs_mask;
20090 if (!simple_return && live_regs_mask)
20092 const char * return_reg;
20094 /* If we do not have any special requirements for function exit
20095 (e.g. interworking) then we can load the return address
20096 directly into the PC. Otherwise we must load it into LR. */
20098 && !IS_CMSE_ENTRY (func_type)
20099 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
20100 return_reg = reg_names[PC_REGNUM];
20102 return_reg = reg_names[LR_REGNUM];
20104 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
20106 /* There are three possible reasons for the IP register
20107 being saved. 1) a stack frame was created, in which case
20108 IP contains the old stack pointer, or 2) an ISR routine
20109 corrupted it, or 3) it was saved to align the stack on
20110 iWMMXt. In case 1, restore IP into SP, otherwise just
20112 if (frame_pointer_needed)
20114 live_regs_mask &= ~ (1 << IP_REGNUM);
20115 live_regs_mask |= (1 << SP_REGNUM);
20118 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
20121 /* On some ARM architectures it is faster to use LDR rather than
20122 LDM to load a single register. On other architectures, the
20123 cost is the same. In 26 bit mode, or for exception handlers,
20124 we have to use LDM to load the PC so that the CPSR is also
20126 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
20127 if (live_regs_mask == (1U << reg))
20130 if (reg <= LAST_ARM_REGNUM
20131 && (reg != LR_REGNUM
20133 || ! IS_INTERRUPT (func_type)))
20135 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
20136 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
20143 /* Generate the load multiple instruction to restore the
20144 registers. Note we can get here, even if
20145 frame_pointer_needed is true, but only if sp already
20146 points to the base of the saved core registers. */
20147 if (live_regs_mask & (1 << SP_REGNUM))
20149 unsigned HOST_WIDE_INT stack_adjust;
20151 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
20152 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
20154 if (stack_adjust && arm_arch5t && TARGET_ARM)
20155 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
20158 /* If we can't use ldmib (SA110 bug),
20159 then try to pop r3 instead. */
20161 live_regs_mask |= 1 << 3;
20163 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
20166 /* For interrupt returns we have to use an LDM rather than
20167 a POP so that we can use the exception return variant. */
20168 else if (IS_INTERRUPT (func_type))
20169 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
20171 sprintf (instr, "pop%s\t{", conditional);
20173 p = instr + strlen (instr);
20175 for (reg = 0; reg <= SP_REGNUM; reg++)
20176 if (live_regs_mask & (1 << reg))
20178 int l = strlen (reg_names[reg]);
20184 memcpy (p, ", ", 2);
20188 memcpy (p, "%|", 2);
20189 memcpy (p + 2, reg_names[reg], l);
20193 if (live_regs_mask & (1 << LR_REGNUM))
20195 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
20196 /* If returning from an interrupt, restore the CPSR. */
20197 if (IS_INTERRUPT (func_type))
20204 output_asm_insn (instr, & operand);
20206 /* See if we need to generate an extra instruction to
20207 perform the actual function return. */
20209 && func_type != ARM_FT_INTERWORKED
20210 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
20212 /* The return has already been handled
20213 by loading the LR into the PC. */
20220 switch ((int) ARM_FUNC_TYPE (func_type))
20224 /* ??? This is wrong for unified assembly syntax. */
20225 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
20228 case ARM_FT_INTERWORKED:
20229 gcc_assert (arm_arch5t || arm_arch4t);
20230 sprintf (instr, "bx%s\t%%|lr", conditional);
20233 case ARM_FT_EXCEPTION:
20234 /* ??? This is wrong for unified assembly syntax. */
20235 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
20239 if (IS_CMSE_ENTRY (func_type))
20241 /* Check if we have to clear the 'GE bits' which is only used if
20242 parallel add and subtraction instructions are available. */
20243 if (TARGET_INT_SIMD)
20244 snprintf (instr, sizeof (instr),
20245 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
20247 snprintf (instr, sizeof (instr),
20248 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
20250 output_asm_insn (instr, & operand);
20251 if (TARGET_HARD_FLOAT)
20253 /* Clear the cumulative exception-status bits (0-4,7) and the
20254 condition code bits (28-31) of the FPSCR. We need to
20255 remember to clear the first scratch register used (IP) and
20256 save and restore the second (r4). */
20257 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
20258 output_asm_insn (instr, & operand);
20259 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
20260 output_asm_insn (instr, & operand);
20261 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
20262 output_asm_insn (instr, & operand);
20263 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
20264 output_asm_insn (instr, & operand);
20265 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
20266 output_asm_insn (instr, & operand);
20267 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
20268 output_asm_insn (instr, & operand);
20269 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
20270 output_asm_insn (instr, & operand);
20271 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20272 output_asm_insn (instr, & operand);
20274 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20276 /* Use bx if it's available. */
20277 else if (arm_arch5t || arm_arch4t)
20278 sprintf (instr, "bx%s\t%%|lr", conditional);
20280 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20284 output_asm_insn (instr, & operand);
20290 /* Output in FILE asm statements needed to declare the NAME of the function
20291 defined by its DECL node. */
20294 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20296 size_t cmse_name_len;
20297 char *cmse_name = 0;
20298 char cmse_prefix[] = "__acle_se_";
20300 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20301 extra function label for each function with the 'cmse_nonsecure_entry'
20302 attribute. This extra function label should be prepended with
20303 '__acle_se_', telling the linker that it needs to create secure gateway
20304 veneers for this function. */
20305 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20306 DECL_ATTRIBUTES (decl)))
20308 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20309 cmse_name = XALLOCAVEC (char, cmse_name_len);
20310 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20311 targetm.asm_out.globalize_label (file, cmse_name);
20313 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20314 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20317 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20318 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20319 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20320 ASM_OUTPUT_LABEL (file, name);
20323 ASM_OUTPUT_LABEL (file, cmse_name);
20325 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20328 /* Write the function name into the code section, directly preceding
20329 the function prologue.
20331 Code will be output similar to this:
20333 .ascii "arm_poke_function_name", 0
20336 .word 0xff000000 + (t1 - t0)
20337 arm_poke_function_name
20339 stmfd sp!, {fp, ip, lr, pc}
20342 When performing a stack backtrace, code can inspect the value
20343 of 'pc' stored at 'fp' + 0. If the trace function then looks
20344 at location pc - 12 and the top 8 bits are set, then we know
20345 that there is a function name embedded immediately preceding this
20346 location and has length ((pc[-3]) & 0xff000000).
20348 We assume that pc is declared as a pointer to an unsigned long.
20350 It is of no benefit to output the function name if we are assembling
20351 a leaf function. These function types will not contain a stack
20352 backtrace structure, therefore it is not possible to determine the
20355 arm_poke_function_name (FILE *stream, const char *name)
20357 unsigned long alignlength;
20358 unsigned long length;
20361 length = strlen (name) + 1;
20362 alignlength = ROUND_UP_WORD (length);
20364 ASM_OUTPUT_ASCII (stream, name, length);
20365 ASM_OUTPUT_ALIGN (stream, 2);
20366 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20367 assemble_aligned_integer (UNITS_PER_WORD, x);
20370 /* Place some comments into the assembler stream
20371 describing the current function. */
20373 arm_output_function_prologue (FILE *f)
20375 unsigned long func_type;
20377 /* Sanity check. */
20378 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20380 func_type = arm_current_func_type ();
20382 switch ((int) ARM_FUNC_TYPE (func_type))
20385 case ARM_FT_NORMAL:
20387 case ARM_FT_INTERWORKED:
20388 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20391 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20394 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20396 case ARM_FT_EXCEPTION:
20397 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20401 if (IS_NAKED (func_type))
20402 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20404 if (IS_VOLATILE (func_type))
20405 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20407 if (IS_NESTED (func_type))
20408 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20409 if (IS_STACKALIGN (func_type))
20410 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20411 if (IS_CMSE_ENTRY (func_type))
20412 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20414 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20415 (HOST_WIDE_INT) crtl->args.size,
20416 crtl->args.pretend_args_size,
20417 (HOST_WIDE_INT) get_frame_size ());
20419 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20420 frame_pointer_needed,
20421 cfun->machine->uses_anonymous_args);
20423 if (cfun->machine->lr_save_eliminated)
20424 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20426 if (crtl->calls_eh_return)
20427 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20432 arm_output_function_epilogue (FILE *)
20434 arm_stack_offsets *offsets;
20440 /* Emit any call-via-reg trampolines that are needed for v4t support
20441 of call_reg and call_value_reg type insns. */
20442 for (regno = 0; regno < LR_REGNUM; regno++)
20444 rtx label = cfun->machine->call_via[regno];
20448 switch_to_section (function_section (current_function_decl));
20449 targetm.asm_out.internal_label (asm_out_file, "L",
20450 CODE_LABEL_NUMBER (label));
20451 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20455 /* ??? Probably not safe to set this here, since it assumes that a
20456 function will be emitted as assembly immediately after we generate
20457 RTL for it. This does not happen for inline functions. */
20458 cfun->machine->return_used_this_function = 0;
20460 else /* TARGET_32BIT */
20462 /* We need to take into account any stack-frame rounding. */
20463 offsets = arm_get_frame_offsets ();
20465 gcc_assert (!use_return_insn (FALSE, NULL)
20466 || (cfun->machine->return_used_this_function != 0)
20467 || offsets->saved_regs == offsets->outgoing_args
20468 || frame_pointer_needed);
20472 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20473 STR and STRD. If an even number of registers are being pushed, one
20474 or more STRD patterns are created for each register pair. If an
20475 odd number of registers are pushed, emit an initial STR followed by
20476 as many STRD instructions as are needed. This works best when the
20477 stack is initially 64-bit aligned (the normal case), since it
20478 ensures that each STRD is also 64-bit aligned. */
20480 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20485 rtx par = NULL_RTX;
20486 rtx dwarf = NULL_RTX;
20490 num_regs = bit_count (saved_regs_mask);
20492 /* Must be at least one register to save, and can't save SP or PC. */
20493 gcc_assert (num_regs > 0 && num_regs <= 14);
20494 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20495 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20497 /* Create sequence for DWARF info. All the frame-related data for
20498 debugging is held in this wrapper. */
20499 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20501 /* Describe the stack adjustment. */
20502 tmp = gen_rtx_SET (stack_pointer_rtx,
20503 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20504 RTX_FRAME_RELATED_P (tmp) = 1;
20505 XVECEXP (dwarf, 0, 0) = tmp;
20507 /* Find the first register. */
20508 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20513 /* If there's an odd number of registers to push. Start off by
20514 pushing a single register. This ensures that subsequent strd
20515 operations are dword aligned (assuming that SP was originally
20516 64-bit aligned). */
20517 if ((num_regs & 1) != 0)
20519 rtx reg, mem, insn;
20521 reg = gen_rtx_REG (SImode, regno);
20523 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20524 stack_pointer_rtx));
20526 mem = gen_frame_mem (Pmode,
20528 (Pmode, stack_pointer_rtx,
20529 plus_constant (Pmode, stack_pointer_rtx,
20532 tmp = gen_rtx_SET (mem, reg);
20533 RTX_FRAME_RELATED_P (tmp) = 1;
20534 insn = emit_insn (tmp);
20535 RTX_FRAME_RELATED_P (insn) = 1;
20536 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20537 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20538 RTX_FRAME_RELATED_P (tmp) = 1;
20541 XVECEXP (dwarf, 0, i) = tmp;
20545 while (i < num_regs)
20546 if (saved_regs_mask & (1 << regno))
20548 rtx reg1, reg2, mem1, mem2;
20549 rtx tmp0, tmp1, tmp2;
20552 /* Find the register to pair with this one. */
20553 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20557 reg1 = gen_rtx_REG (SImode, regno);
20558 reg2 = gen_rtx_REG (SImode, regno2);
20565 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20568 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20570 -4 * (num_regs - 1)));
20571 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20572 plus_constant (Pmode, stack_pointer_rtx,
20574 tmp1 = gen_rtx_SET (mem1, reg1);
20575 tmp2 = gen_rtx_SET (mem2, reg2);
20576 RTX_FRAME_RELATED_P (tmp0) = 1;
20577 RTX_FRAME_RELATED_P (tmp1) = 1;
20578 RTX_FRAME_RELATED_P (tmp2) = 1;
20579 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20580 XVECEXP (par, 0, 0) = tmp0;
20581 XVECEXP (par, 0, 1) = tmp1;
20582 XVECEXP (par, 0, 2) = tmp2;
20583 insn = emit_insn (par);
20584 RTX_FRAME_RELATED_P (insn) = 1;
20585 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20589 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20592 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20595 tmp1 = gen_rtx_SET (mem1, reg1);
20596 tmp2 = gen_rtx_SET (mem2, reg2);
20597 RTX_FRAME_RELATED_P (tmp1) = 1;
20598 RTX_FRAME_RELATED_P (tmp2) = 1;
20599 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20600 XVECEXP (par, 0, 0) = tmp1;
20601 XVECEXP (par, 0, 1) = tmp2;
20605 /* Create unwind information. This is an approximation. */
20606 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20607 plus_constant (Pmode,
20611 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20612 plus_constant (Pmode,
20617 RTX_FRAME_RELATED_P (tmp1) = 1;
20618 RTX_FRAME_RELATED_P (tmp2) = 1;
20619 XVECEXP (dwarf, 0, i + 1) = tmp1;
20620 XVECEXP (dwarf, 0, i + 2) = tmp2;
20622 regno = regno2 + 1;
20630 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20631 whenever possible, otherwise it emits single-word stores. The first store
20632 also allocates stack space for all saved registers, using writeback with
20633 post-addressing mode. All other stores use offset addressing. If no STRD
20634 can be emitted, this function emits a sequence of single-word stores,
20635 and not an STM as before, because single-word stores provide more freedom
20636 scheduling and can be turned into an STM by peephole optimizations. */
20638 arm_emit_strd_push (unsigned long saved_regs_mask)
20641 int i, j, dwarf_index = 0;
20643 rtx dwarf = NULL_RTX;
20644 rtx insn = NULL_RTX;
20647 /* TODO: A more efficient code can be emitted by changing the
20648 layout, e.g., first push all pairs that can use STRD to keep the
20649 stack aligned, and then push all other registers. */
20650 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20651 if (saved_regs_mask & (1 << i))
20654 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20655 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20656 gcc_assert (num_regs > 0);
20658 /* Create sequence for DWARF info. */
20659 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20661 /* For dwarf info, we generate explicit stack update. */
20662 tmp = gen_rtx_SET (stack_pointer_rtx,
20663 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20664 RTX_FRAME_RELATED_P (tmp) = 1;
20665 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20667 /* Save registers. */
20668 offset = - 4 * num_regs;
20670 while (j <= LAST_ARM_REGNUM)
20671 if (saved_regs_mask & (1 << j))
20674 && (saved_regs_mask & (1 << (j + 1))))
20676 /* Current register and previous register form register pair for
20677 which STRD can be generated. */
20680 /* Allocate stack space for all saved registers. */
20681 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20682 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20683 mem = gen_frame_mem (DImode, tmp);
20686 else if (offset > 0)
20687 mem = gen_frame_mem (DImode,
20688 plus_constant (Pmode,
20692 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20694 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20695 RTX_FRAME_RELATED_P (tmp) = 1;
20696 tmp = emit_insn (tmp);
20698 /* Record the first store insn. */
20699 if (dwarf_index == 1)
20702 /* Generate dwarf info. */
20703 mem = gen_frame_mem (SImode,
20704 plus_constant (Pmode,
20707 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20708 RTX_FRAME_RELATED_P (tmp) = 1;
20709 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20711 mem = gen_frame_mem (SImode,
20712 plus_constant (Pmode,
20715 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20716 RTX_FRAME_RELATED_P (tmp) = 1;
20717 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20724 /* Emit a single word store. */
20727 /* Allocate stack space for all saved registers. */
20728 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20729 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20730 mem = gen_frame_mem (SImode, tmp);
20733 else if (offset > 0)
20734 mem = gen_frame_mem (SImode,
20735 plus_constant (Pmode,
20739 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20741 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20742 RTX_FRAME_RELATED_P (tmp) = 1;
20743 tmp = emit_insn (tmp);
20745 /* Record the first store insn. */
20746 if (dwarf_index == 1)
20749 /* Generate dwarf info. */
20750 mem = gen_frame_mem (SImode,
20751 plus_constant(Pmode,
20754 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20755 RTX_FRAME_RELATED_P (tmp) = 1;
20756 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20765 /* Attach dwarf info to the first insn we generate. */
20766 gcc_assert (insn != NULL_RTX);
20767 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20768 RTX_FRAME_RELATED_P (insn) = 1;
20771 /* Generate and emit an insn that we will recognize as a push_multi.
20772 Unfortunately, since this insn does not reflect very well the actual
20773 semantics of the operation, we need to annotate the insn for the benefit
20774 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20775 MASK for registers that should be annotated for DWARF2 frame unwind
20778 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20781 int num_dwarf_regs = 0;
20785 int dwarf_par_index;
20788 /* We don't record the PC in the dwarf frame information. */
20789 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20791 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20793 if (mask & (1 << i))
20795 if (dwarf_regs_mask & (1 << i))
20799 gcc_assert (num_regs && num_regs <= 16);
20800 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20802 /* For the body of the insn we are going to generate an UNSPEC in
20803 parallel with several USEs. This allows the insn to be recognized
20804 by the push_multi pattern in the arm.md file.
20806 The body of the insn looks something like this:
20809 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20810 (const_int:SI <num>)))
20811 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20817 For the frame note however, we try to be more explicit and actually
20818 show each register being stored into the stack frame, plus a (single)
20819 decrement of the stack pointer. We do it this way in order to be
20820 friendly to the stack unwinding code, which only wants to see a single
20821 stack decrement per instruction. The RTL we generate for the note looks
20822 something like this:
20825 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20826 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20827 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20828 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20832 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20833 instead we'd have a parallel expression detailing all
20834 the stores to the various memory addresses so that debug
20835 information is more up-to-date. Remember however while writing
20836 this to take care of the constraints with the push instruction.
20838 Note also that this has to be taken care of for the VFP registers.
20840 For more see PR43399. */
20842 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20843 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20844 dwarf_par_index = 1;
20846 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20848 if (mask & (1 << i))
20850 reg = gen_rtx_REG (SImode, i);
20852 XVECEXP (par, 0, 0)
20853 = gen_rtx_SET (gen_frame_mem
20855 gen_rtx_PRE_MODIFY (Pmode,
20858 (Pmode, stack_pointer_rtx,
20861 gen_rtx_UNSPEC (BLKmode,
20862 gen_rtvec (1, reg),
20863 UNSPEC_PUSH_MULT));
20865 if (dwarf_regs_mask & (1 << i))
20867 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20869 RTX_FRAME_RELATED_P (tmp) = 1;
20870 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20877 for (j = 1, i++; j < num_regs; i++)
20879 if (mask & (1 << i))
20881 reg = gen_rtx_REG (SImode, i);
20883 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20885 if (dwarf_regs_mask & (1 << i))
20888 = gen_rtx_SET (gen_frame_mem
20890 plus_constant (Pmode, stack_pointer_rtx,
20893 RTX_FRAME_RELATED_P (tmp) = 1;
20894 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20901 par = emit_insn (par);
20903 tmp = gen_rtx_SET (stack_pointer_rtx,
20904 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20905 RTX_FRAME_RELATED_P (tmp) = 1;
20906 XVECEXP (dwarf, 0, 0) = tmp;
20908 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20913 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20914 SIZE is the offset to be adjusted.
20915 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20917 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20921 RTX_FRAME_RELATED_P (insn) = 1;
20922 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20923 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20926 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20927 SAVED_REGS_MASK shows which registers need to be restored.
20929 Unfortunately, since this insn does not reflect very well the actual
20930 semantics of the operation, we need to annotate the insn for the benefit
20931 of DWARF2 frame unwind information. */
20933 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20938 rtx dwarf = NULL_RTX;
20940 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20944 offset_adj = return_in_pc ? 1 : 0;
20945 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20946 if (saved_regs_mask & (1 << i))
20949 gcc_assert (num_regs && num_regs <= 16);
20951 /* If SP is in reglist, then we don't emit SP update insn. */
20952 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20954 /* The parallel needs to hold num_regs SETs
20955 and one SET for the stack update. */
20956 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20959 XVECEXP (par, 0, 0) = ret_rtx;
20963 /* Increment the stack pointer, based on there being
20964 num_regs 4-byte registers to restore. */
20965 tmp = gen_rtx_SET (stack_pointer_rtx,
20966 plus_constant (Pmode,
20969 RTX_FRAME_RELATED_P (tmp) = 1;
20970 XVECEXP (par, 0, offset_adj) = tmp;
20973 /* Now restore every reg, which may include PC. */
20974 for (j = 0, i = 0; j < num_regs; i++)
20975 if (saved_regs_mask & (1 << i))
20977 reg = gen_rtx_REG (SImode, i);
20978 if ((num_regs == 1) && emit_update && !return_in_pc)
20980 /* Emit single load with writeback. */
20981 tmp = gen_frame_mem (SImode,
20982 gen_rtx_POST_INC (Pmode,
20983 stack_pointer_rtx));
20984 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20985 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20989 tmp = gen_rtx_SET (reg,
20992 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20993 RTX_FRAME_RELATED_P (tmp) = 1;
20994 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20996 /* We need to maintain a sequence for DWARF info too. As dwarf info
20997 should not have PC, skip PC. */
20998 if (i != PC_REGNUM)
20999 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21005 par = emit_jump_insn (par);
21007 par = emit_insn (par);
21009 REG_NOTES (par) = dwarf;
21011 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
21012 stack_pointer_rtx, stack_pointer_rtx);
21015 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21016 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21018 Unfortunately, since this insn does not reflect very well the actual
21019 semantics of the operation, we need to annotate the insn for the benefit
21020 of DWARF2 frame unwind information. */
21022 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
21026 rtx dwarf = NULL_RTX;
21029 gcc_assert (num_regs && num_regs <= 32);
21031 /* Workaround ARM10 VFPr1 bug. */
21032 if (num_regs == 2 && !arm_arch6)
21034 if (first_reg == 15)
21040 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21041 there could be up to 32 D-registers to restore.
21042 If there are more than 16 D-registers, make two recursive calls,
21043 each of which emits one pop_multi instruction. */
21046 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
21047 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
21051 /* The parallel needs to hold num_regs SETs
21052 and one SET for the stack update. */
21053 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
21055 /* Increment the stack pointer, based on there being
21056 num_regs 8-byte registers to restore. */
21057 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
21058 RTX_FRAME_RELATED_P (tmp) = 1;
21059 XVECEXP (par, 0, 0) = tmp;
21061 /* Now show every reg that will be restored, using a SET for each. */
21062 for (j = 0, i=first_reg; j < num_regs; i += 2)
21064 reg = gen_rtx_REG (DFmode, i);
21066 tmp = gen_rtx_SET (reg,
21069 plus_constant (Pmode, base_reg, 8 * j)));
21070 RTX_FRAME_RELATED_P (tmp) = 1;
21071 XVECEXP (par, 0, j + 1) = tmp;
21073 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21078 par = emit_insn (par);
21079 REG_NOTES (par) = dwarf;
21081 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
21082 if (REGNO (base_reg) == IP_REGNUM)
21084 RTX_FRAME_RELATED_P (par) = 1;
21085 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
21088 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
21089 base_reg, base_reg);
21092 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
21093 number of registers are being popped, multiple LDRD patterns are created for
21094 all register pairs. If odd number of registers are popped, last register is
21095 loaded by using LDR pattern. */
21097 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
21101 rtx par = NULL_RTX;
21102 rtx dwarf = NULL_RTX;
21103 rtx tmp, reg, tmp1;
21104 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21106 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21107 if (saved_regs_mask & (1 << i))
21110 gcc_assert (num_regs && num_regs <= 16);
21112 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
21113 to be popped. So, if num_regs is even, now it will become odd,
21114 and we can generate pop with PC. If num_regs is odd, it will be
21115 even now, and ldr with return can be generated for PC. */
21119 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21121 /* Var j iterates over all the registers to gather all the registers in
21122 saved_regs_mask. Var i gives index of saved registers in stack frame.
21123 A PARALLEL RTX of register-pair is created here, so that pattern for
21124 LDRD can be matched. As PC is always last register to be popped, and
21125 we have already decremented num_regs if PC, we don't have to worry
21126 about PC in this loop. */
21127 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
21128 if (saved_regs_mask & (1 << j))
21130 /* Create RTX for memory load. */
21131 reg = gen_rtx_REG (SImode, j);
21132 tmp = gen_rtx_SET (reg,
21133 gen_frame_mem (SImode,
21134 plus_constant (Pmode,
21135 stack_pointer_rtx, 4 * i)));
21136 RTX_FRAME_RELATED_P (tmp) = 1;
21140 /* When saved-register index (i) is even, the RTX to be emitted is
21141 yet to be created. Hence create it first. The LDRD pattern we
21142 are generating is :
21143 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21144 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21145 where target registers need not be consecutive. */
21146 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21150 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
21151 added as 0th element and if i is odd, reg_i is added as 1st element
21152 of LDRD pattern shown above. */
21153 XVECEXP (par, 0, (i % 2)) = tmp;
21154 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21158 /* When saved-register index (i) is odd, RTXs for both the registers
21159 to be loaded are generated in above given LDRD pattern, and the
21160 pattern can be emitted now. */
21161 par = emit_insn (par);
21162 REG_NOTES (par) = dwarf;
21163 RTX_FRAME_RELATED_P (par) = 1;
21169 /* If the number of registers pushed is odd AND return_in_pc is false OR
21170 number of registers are even AND return_in_pc is true, last register is
21171 popped using LDR. It can be PC as well. Hence, adjust the stack first and
21172 then LDR with post increment. */
21174 /* Increment the stack pointer, based on there being
21175 num_regs 4-byte registers to restore. */
21176 tmp = gen_rtx_SET (stack_pointer_rtx,
21177 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
21178 RTX_FRAME_RELATED_P (tmp) = 1;
21179 tmp = emit_insn (tmp);
21182 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
21183 stack_pointer_rtx, stack_pointer_rtx);
21188 if (((num_regs % 2) == 1 && !return_in_pc)
21189 || ((num_regs % 2) == 0 && return_in_pc))
21191 /* Scan for the single register to be popped. Skip until the saved
21192 register is found. */
21193 for (; (saved_regs_mask & (1 << j)) == 0; j++);
21195 /* Gen LDR with post increment here. */
21196 tmp1 = gen_rtx_MEM (SImode,
21197 gen_rtx_POST_INC (SImode,
21198 stack_pointer_rtx));
21199 set_mem_alias_set (tmp1, get_frame_alias_set ());
21201 reg = gen_rtx_REG (SImode, j);
21202 tmp = gen_rtx_SET (reg, tmp1);
21203 RTX_FRAME_RELATED_P (tmp) = 1;
21204 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21208 /* If return_in_pc, j must be PC_REGNUM. */
21209 gcc_assert (j == PC_REGNUM);
21210 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21211 XVECEXP (par, 0, 0) = ret_rtx;
21212 XVECEXP (par, 0, 1) = tmp;
21213 par = emit_jump_insn (par);
21217 par = emit_insn (tmp);
21218 REG_NOTES (par) = dwarf;
21219 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21220 stack_pointer_rtx, stack_pointer_rtx);
21224 else if ((num_regs % 2) == 1 && return_in_pc)
21226 /* There are 2 registers to be popped. So, generate the pattern
21227 pop_multiple_with_stack_update_and_return to pop in PC. */
21228 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
21234 /* LDRD in ARM mode needs consecutive registers as operands. This function
21235 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21236 offset addressing and then generates one separate stack udpate. This provides
21237 more scheduling freedom, compared to writeback on every load. However,
21238 if the function returns using load into PC directly
21239 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21240 before the last load. TODO: Add a peephole optimization to recognize
21241 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
21242 peephole optimization to merge the load at stack-offset zero
21243 with the stack update instruction using load with writeback
21244 in post-index addressing mode. */
21246 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
21250 rtx par = NULL_RTX;
21251 rtx dwarf = NULL_RTX;
21254 /* Restore saved registers. */
21255 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
21257 while (j <= LAST_ARM_REGNUM)
21258 if (saved_regs_mask & (1 << j))
21261 && (saved_regs_mask & (1 << (j + 1)))
21262 && (j + 1) != PC_REGNUM)
21264 /* Current register and next register form register pair for which
21265 LDRD can be generated. PC is always the last register popped, and
21266 we handle it separately. */
21268 mem = gen_frame_mem (DImode,
21269 plus_constant (Pmode,
21273 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21275 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21276 tmp = emit_insn (tmp);
21277 RTX_FRAME_RELATED_P (tmp) = 1;
21279 /* Generate dwarf info. */
21281 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21282 gen_rtx_REG (SImode, j),
21284 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21285 gen_rtx_REG (SImode, j + 1),
21288 REG_NOTES (tmp) = dwarf;
21293 else if (j != PC_REGNUM)
21295 /* Emit a single word load. */
21297 mem = gen_frame_mem (SImode,
21298 plus_constant (Pmode,
21302 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21304 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21305 tmp = emit_insn (tmp);
21306 RTX_FRAME_RELATED_P (tmp) = 1;
21308 /* Generate dwarf info. */
21309 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21310 gen_rtx_REG (SImode, j),
21316 else /* j == PC_REGNUM */
21322 /* Update the stack. */
21325 tmp = gen_rtx_SET (stack_pointer_rtx,
21326 plus_constant (Pmode,
21329 tmp = emit_insn (tmp);
21330 arm_add_cfa_adjust_cfa_note (tmp, offset,
21331 stack_pointer_rtx, stack_pointer_rtx);
21335 if (saved_regs_mask & (1 << PC_REGNUM))
21337 /* Only PC is to be popped. */
21338 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21339 XVECEXP (par, 0, 0) = ret_rtx;
21340 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21341 gen_frame_mem (SImode,
21342 gen_rtx_POST_INC (SImode,
21343 stack_pointer_rtx)));
21344 RTX_FRAME_RELATED_P (tmp) = 1;
21345 XVECEXP (par, 0, 1) = tmp;
21346 par = emit_jump_insn (par);
21348 /* Generate dwarf info. */
21349 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21350 gen_rtx_REG (SImode, PC_REGNUM),
21352 REG_NOTES (par) = dwarf;
21353 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21354 stack_pointer_rtx, stack_pointer_rtx);
21358 /* Calculate the size of the return value that is passed in registers. */
21360 arm_size_return_regs (void)
21364 if (crtl->return_rtx != 0)
21365 mode = GET_MODE (crtl->return_rtx);
21367 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21369 return GET_MODE_SIZE (mode);
21372 /* Return true if the current function needs to save/restore LR. */
21374 thumb_force_lr_save (void)
21376 return !cfun->machine->lr_save_eliminated
21378 || thumb_far_jump_used_p ()
21379 || df_regs_ever_live_p (LR_REGNUM));
21382 /* We do not know if r3 will be available because
21383 we do have an indirect tailcall happening in this
21384 particular case. */
21386 is_indirect_tailcall_p (rtx call)
21388 rtx pat = PATTERN (call);
21390 /* Indirect tail call. */
21391 pat = XVECEXP (pat, 0, 0);
21392 if (GET_CODE (pat) == SET)
21393 pat = SET_SRC (pat);
21395 pat = XEXP (XEXP (pat, 0), 0);
21396 return REG_P (pat);
21399 /* Return true if r3 is used by any of the tail call insns in the
21400 current function. */
21402 any_sibcall_could_use_r3 (void)
21407 if (!crtl->tail_call_emit)
21409 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21410 if (e->flags & EDGE_SIBCALL)
21412 rtx_insn *call = BB_END (e->src);
21413 if (!CALL_P (call))
21414 call = prev_nonnote_nondebug_insn (call);
21415 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21416 if (find_regno_fusage (call, USE, 3)
21417 || is_indirect_tailcall_p (call))
21424 /* Compute the distance from register FROM to register TO.
21425 These can be the arg pointer (26), the soft frame pointer (25),
21426 the stack pointer (13) or the hard frame pointer (11).
21427 In thumb mode r7 is used as the soft frame pointer, if needed.
21428 Typical stack layout looks like this:
21430 old stack pointer -> | |
21433 | | saved arguments for
21434 | | vararg functions
21437 hard FP & arg pointer -> | | \
21445 soft frame pointer -> | | /
21450 locals base pointer -> | | /
21455 current stack pointer -> | | /
21458 For a given function some or all of these stack components
21459 may not be needed, giving rise to the possibility of
21460 eliminating some of the registers.
21462 The values returned by this function must reflect the behavior
21463 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21465 The sign of the number returned reflects the direction of stack
21466 growth, so the values are positive for all eliminations except
21467 from the soft frame pointer to the hard frame pointer.
21469 SFP may point just inside the local variables block to ensure correct
21473 /* Return cached stack offsets. */
21475 static arm_stack_offsets *
21476 arm_get_frame_offsets (void)
21478 struct arm_stack_offsets *offsets;
21480 offsets = &cfun->machine->stack_offsets;
21486 /* Calculate stack offsets. These are used to calculate register elimination
21487 offsets and in prologue/epilogue code. Also calculates which registers
21488 should be saved. */
21491 arm_compute_frame_layout (void)
21493 struct arm_stack_offsets *offsets;
21494 unsigned long func_type;
21497 HOST_WIDE_INT frame_size;
21500 offsets = &cfun->machine->stack_offsets;
21502 /* Initially this is the size of the local variables. It will translated
21503 into an offset once we have determined the size of preceding data. */
21504 frame_size = ROUND_UP_WORD (get_frame_size ());
21506 /* Space for variadic functions. */
21507 offsets->saved_args = crtl->args.pretend_args_size;
21509 /* In Thumb mode this is incorrect, but never used. */
21511 = (offsets->saved_args
21512 + arm_compute_static_chain_stack_bytes ()
21513 + (frame_pointer_needed ? 4 : 0));
21517 unsigned int regno;
21519 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21520 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21521 saved = core_saved;
21523 /* We know that SP will be doubleword aligned on entry, and we must
21524 preserve that condition at any subroutine call. We also require the
21525 soft frame pointer to be doubleword aligned. */
21527 if (TARGET_REALLY_IWMMXT)
21529 /* Check for the call-saved iWMMXt registers. */
21530 for (regno = FIRST_IWMMXT_REGNUM;
21531 regno <= LAST_IWMMXT_REGNUM;
21533 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21537 func_type = arm_current_func_type ();
21538 /* Space for saved VFP registers. */
21539 if (! IS_VOLATILE (func_type)
21540 && TARGET_HARD_FLOAT)
21541 saved += arm_get_vfp_saved_size ();
21543 else /* TARGET_THUMB1 */
21545 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21546 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21547 saved = core_saved;
21548 if (TARGET_BACKTRACE)
21552 /* Saved registers include the stack frame. */
21553 offsets->saved_regs
21554 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21555 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21557 /* A leaf function does not need any stack alignment if it has nothing
21559 if (crtl->is_leaf && frame_size == 0
21560 /* However if it calls alloca(), we have a dynamically allocated
21561 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21562 && ! cfun->calls_alloca)
21564 offsets->outgoing_args = offsets->soft_frame;
21565 offsets->locals_base = offsets->soft_frame;
21569 /* Ensure SFP has the correct alignment. */
21570 if (ARM_DOUBLEWORD_ALIGN
21571 && (offsets->soft_frame & 7))
21573 offsets->soft_frame += 4;
21574 /* Try to align stack by pushing an extra reg. Don't bother doing this
21575 when there is a stack frame as the alignment will be rolled into
21576 the normal stack adjustment. */
21577 if (frame_size + crtl->outgoing_args_size == 0)
21581 /* Register r3 is caller-saved. Normally it does not need to be
21582 saved on entry by the prologue. However if we choose to save
21583 it for padding then we may confuse the compiler into thinking
21584 a prologue sequence is required when in fact it is not. This
21585 will occur when shrink-wrapping if r3 is used as a scratch
21586 register and there are no other callee-saved writes.
21588 This situation can be avoided when other callee-saved registers
21589 are available and r3 is not mandatory if we choose a callee-saved
21590 register for padding. */
21591 bool prefer_callee_reg_p = false;
21593 /* If it is safe to use r3, then do so. This sometimes
21594 generates better code on Thumb-2 by avoiding the need to
21595 use 32-bit push/pop instructions. */
21596 if (! any_sibcall_could_use_r3 ()
21597 && arm_size_return_regs () <= 12
21598 && (offsets->saved_regs_mask & (1 << 3)) == 0
21600 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21603 if (!TARGET_THUMB2)
21604 prefer_callee_reg_p = true;
21607 || prefer_callee_reg_p)
21609 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21611 /* Avoid fixed registers; they may be changed at
21612 arbitrary times so it's unsafe to restore them
21613 during the epilogue. */
21615 && (offsets->saved_regs_mask & (1 << i)) == 0)
21625 offsets->saved_regs += 4;
21626 offsets->saved_regs_mask |= (1 << reg);
21631 offsets->locals_base = offsets->soft_frame + frame_size;
21632 offsets->outgoing_args = (offsets->locals_base
21633 + crtl->outgoing_args_size);
21635 if (ARM_DOUBLEWORD_ALIGN)
21637 /* Ensure SP remains doubleword aligned. */
21638 if (offsets->outgoing_args & 7)
21639 offsets->outgoing_args += 4;
21640 gcc_assert (!(offsets->outgoing_args & 7));
21645 /* Calculate the relative offsets for the different stack pointers. Positive
21646 offsets are in the direction of stack growth. */
21649 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21651 arm_stack_offsets *offsets;
21653 offsets = arm_get_frame_offsets ();
21655 /* OK, now we have enough information to compute the distances.
21656 There must be an entry in these switch tables for each pair
21657 of registers in ELIMINABLE_REGS, even if some of the entries
21658 seem to be redundant or useless. */
21661 case ARG_POINTER_REGNUM:
21664 case THUMB_HARD_FRAME_POINTER_REGNUM:
21667 case FRAME_POINTER_REGNUM:
21668 /* This is the reverse of the soft frame pointer
21669 to hard frame pointer elimination below. */
21670 return offsets->soft_frame - offsets->saved_args;
21672 case ARM_HARD_FRAME_POINTER_REGNUM:
21673 /* This is only non-zero in the case where the static chain register
21674 is stored above the frame. */
21675 return offsets->frame - offsets->saved_args - 4;
21677 case STACK_POINTER_REGNUM:
21678 /* If nothing has been pushed on the stack at all
21679 then this will return -4. This *is* correct! */
21680 return offsets->outgoing_args - (offsets->saved_args + 4);
21683 gcc_unreachable ();
21685 gcc_unreachable ();
21687 case FRAME_POINTER_REGNUM:
21690 case THUMB_HARD_FRAME_POINTER_REGNUM:
21693 case ARM_HARD_FRAME_POINTER_REGNUM:
21694 /* The hard frame pointer points to the top entry in the
21695 stack frame. The soft frame pointer to the bottom entry
21696 in the stack frame. If there is no stack frame at all,
21697 then they are identical. */
21699 return offsets->frame - offsets->soft_frame;
21701 case STACK_POINTER_REGNUM:
21702 return offsets->outgoing_args - offsets->soft_frame;
21705 gcc_unreachable ();
21707 gcc_unreachable ();
21710 /* You cannot eliminate from the stack pointer.
21711 In theory you could eliminate from the hard frame
21712 pointer to the stack pointer, but this will never
21713 happen, since if a stack frame is not needed the
21714 hard frame pointer will never be used. */
21715 gcc_unreachable ();
21719 /* Given FROM and TO register numbers, say whether this elimination is
21720 allowed. Frame pointer elimination is automatically handled.
21722 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21723 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21724 pointer, we must eliminate FRAME_POINTER_REGNUM into
21725 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21726 ARG_POINTER_REGNUM. */
21729 arm_can_eliminate (const int from, const int to)
21731 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21732 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21733 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21734 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21738 /* Emit RTL to save coprocessor registers on function entry. Returns the
21739 number of bytes pushed. */
21742 arm_save_coproc_regs(void)
21744 int saved_size = 0;
21746 unsigned start_reg;
21749 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21750 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21752 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21753 insn = gen_rtx_MEM (V2SImode, insn);
21754 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21755 RTX_FRAME_RELATED_P (insn) = 1;
21759 if (TARGET_HARD_FLOAT)
21761 start_reg = FIRST_VFP_REGNUM;
21763 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21765 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21766 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21768 if (start_reg != reg)
21769 saved_size += vfp_emit_fstmd (start_reg,
21770 (reg - start_reg) / 2);
21771 start_reg = reg + 2;
21774 if (start_reg != reg)
21775 saved_size += vfp_emit_fstmd (start_reg,
21776 (reg - start_reg) / 2);
21782 /* Set the Thumb frame pointer from the stack pointer. */
21785 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21787 HOST_WIDE_INT amount;
21790 amount = offsets->outgoing_args - offsets->locals_base;
21792 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21793 stack_pointer_rtx, GEN_INT (amount)));
21796 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21797 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21798 expects the first two operands to be the same. */
21801 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21803 hard_frame_pointer_rtx));
21807 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21808 hard_frame_pointer_rtx,
21809 stack_pointer_rtx));
21811 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21812 plus_constant (Pmode, stack_pointer_rtx, amount));
21813 RTX_FRAME_RELATED_P (dwarf) = 1;
21814 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21817 RTX_FRAME_RELATED_P (insn) = 1;
21820 struct scratch_reg {
21825 /* Return a short-lived scratch register for use as a 2nd scratch register on
21826 function entry after the registers are saved in the prologue. This register
21827 must be released by means of release_scratch_register_on_entry. IP is not
21828 considered since it is always used as the 1st scratch register if available.
21830 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21831 mask of live registers. */
21834 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21835 unsigned long live_regs)
21841 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21847 for (i = 4; i < 11; i++)
21848 if (regno1 != i && (live_regs & (1 << i)) != 0)
21856 /* If IP is used as the 1st scratch register for a nested function,
21857 then either r3 wasn't available or is used to preserve IP. */
21858 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21860 regno = (regno1 == 3 ? 2 : 3);
21862 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21867 sr->reg = gen_rtx_REG (SImode, regno);
21870 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21871 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21872 rtx x = gen_rtx_SET (stack_pointer_rtx,
21873 plus_constant (Pmode, stack_pointer_rtx, -4));
21874 RTX_FRAME_RELATED_P (insn) = 1;
21875 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21879 /* Release a scratch register obtained from the preceding function. */
21882 release_scratch_register_on_entry (struct scratch_reg *sr)
21886 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21887 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21888 rtx x = gen_rtx_SET (stack_pointer_rtx,
21889 plus_constant (Pmode, stack_pointer_rtx, 4));
21890 RTX_FRAME_RELATED_P (insn) = 1;
21891 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21895 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21897 #if PROBE_INTERVAL > 4096
21898 #error Cannot use indexed addressing mode for stack probing
21901 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21902 inclusive. These are offsets from the current stack pointer. REGNO1
21903 is the index number of the 1st scratch register and LIVE_REGS is the
21904 mask of live registers. */
21907 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21908 unsigned int regno1, unsigned long live_regs)
21910 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21912 /* See if we have a constant small number of probes to generate. If so,
21913 that's the easy case. */
21914 if (size <= PROBE_INTERVAL)
21916 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21917 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21918 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21921 /* The run-time loop is made up of 10 insns in the generic case while the
21922 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21923 else if (size <= 5 * PROBE_INTERVAL)
21925 HOST_WIDE_INT i, rem;
21927 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21928 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21929 emit_stack_probe (reg1);
21931 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21932 it exceeds SIZE. If only two probes are needed, this will not
21933 generate any code. Then probe at FIRST + SIZE. */
21934 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21936 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21937 emit_stack_probe (reg1);
21940 rem = size - (i - PROBE_INTERVAL);
21941 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21943 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21944 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21947 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21950 /* Otherwise, do the same as above, but in a loop. Note that we must be
21951 extra careful with variables wrapping around because we might be at
21952 the very top (or the very bottom) of the address space and we have
21953 to be able to handle this case properly; in particular, we use an
21954 equality test for the loop condition. */
21957 HOST_WIDE_INT rounded_size;
21958 struct scratch_reg sr;
21960 get_scratch_register_on_entry (&sr, regno1, live_regs);
21962 emit_move_insn (reg1, GEN_INT (first));
21965 /* Step 1: round SIZE to the previous multiple of the interval. */
21967 rounded_size = size & -PROBE_INTERVAL;
21968 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21971 /* Step 2: compute initial and final value of the loop counter. */
21973 /* TEST_ADDR = SP + FIRST. */
21974 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21976 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21977 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21980 /* Step 3: the loop
21984 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21987 while (TEST_ADDR != LAST_ADDR)
21989 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21990 until it is equal to ROUNDED_SIZE. */
21992 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21995 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21996 that SIZE is equal to ROUNDED_SIZE. */
21998 if (size != rounded_size)
22000 HOST_WIDE_INT rem = size - rounded_size;
22002 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22004 emit_set_insn (sr.reg,
22005 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
22006 emit_stack_probe (plus_constant (Pmode, sr.reg,
22007 PROBE_INTERVAL - rem));
22010 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
22013 release_scratch_register_on_entry (&sr);
22016 /* Make sure nothing is scheduled before we are done. */
22017 emit_insn (gen_blockage ());
22020 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22021 absolute addresses. */
22024 output_probe_stack_range (rtx reg1, rtx reg2)
22026 static int labelno = 0;
22030 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
22033 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22035 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22037 xops[1] = GEN_INT (PROBE_INTERVAL);
22038 output_asm_insn ("sub\t%0, %0, %1", xops);
22040 /* Probe at TEST_ADDR. */
22041 output_asm_insn ("str\tr0, [%0, #0]", xops);
22043 /* Test if TEST_ADDR == LAST_ADDR. */
22045 output_asm_insn ("cmp\t%0, %1", xops);
22048 fputs ("\tbne\t", asm_out_file);
22049 assemble_name_raw (asm_out_file, loop_lab);
22050 fputc ('\n', asm_out_file);
22055 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22058 arm_expand_prologue (void)
22063 unsigned long live_regs_mask;
22064 unsigned long func_type;
22066 int saved_pretend_args = 0;
22067 int saved_regs = 0;
22068 unsigned HOST_WIDE_INT args_to_push;
22069 HOST_WIDE_INT size;
22070 arm_stack_offsets *offsets;
22073 func_type = arm_current_func_type ();
22075 /* Naked functions don't have prologues. */
22076 if (IS_NAKED (func_type))
22078 if (flag_stack_usage_info)
22079 current_function_static_stack_size = 0;
22083 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
22084 args_to_push = crtl->args.pretend_args_size;
22086 /* Compute which register we will have to save onto the stack. */
22087 offsets = arm_get_frame_offsets ();
22088 live_regs_mask = offsets->saved_regs_mask;
22090 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
22092 if (IS_STACKALIGN (func_type))
22096 /* Handle a word-aligned stack pointer. We generate the following:
22101 <save and restore r0 in normal prologue/epilogue>
22105 The unwinder doesn't need to know about the stack realignment.
22106 Just tell it we saved SP in r0. */
22107 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
22109 r0 = gen_rtx_REG (SImode, R0_REGNUM);
22110 r1 = gen_rtx_REG (SImode, R1_REGNUM);
22112 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
22113 RTX_FRAME_RELATED_P (insn) = 1;
22114 add_reg_note (insn, REG_CFA_REGISTER, NULL);
22116 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
22118 /* ??? The CFA changes here, which may cause GDB to conclude that it
22119 has entered a different function. That said, the unwind info is
22120 correct, individually, before and after this instruction because
22121 we've described the save of SP, which will override the default
22122 handling of SP as restoring from the CFA. */
22123 emit_insn (gen_movsi (stack_pointer_rtx, r1));
22126 /* Let's compute the static_chain_stack_bytes required and store it. Right
22127 now the value must be -1 as stored by arm_init_machine_status (). */
22128 cfun->machine->static_chain_stack_bytes
22129 = arm_compute_static_chain_stack_bytes ();
22131 /* The static chain register is the same as the IP register. If it is
22132 clobbered when creating the frame, we need to save and restore it. */
22133 clobber_ip = IS_NESTED (func_type)
22134 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22135 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22136 || flag_stack_clash_protection)
22137 && !df_regs_ever_live_p (LR_REGNUM)
22138 && arm_r3_live_at_start_p ()));
22140 /* Find somewhere to store IP whilst the frame is being created.
22141 We try the following places in order:
22143 1. The last argument register r3 if it is available.
22144 2. A slot on the stack above the frame if there are no
22145 arguments to push onto the stack.
22146 3. Register r3 again, after pushing the argument registers
22147 onto the stack, if this is a varargs function.
22148 4. The last slot on the stack created for the arguments to
22149 push, if this isn't a varargs function.
22151 Note - we only need to tell the dwarf2 backend about the SP
22152 adjustment in the second variant; the static chain register
22153 doesn't need to be unwound, as it doesn't contain a value
22154 inherited from the caller. */
22157 if (!arm_r3_live_at_start_p ())
22158 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22159 else if (args_to_push == 0)
22163 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22166 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22167 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22170 /* Just tell the dwarf backend that we adjusted SP. */
22171 dwarf = gen_rtx_SET (stack_pointer_rtx,
22172 plus_constant (Pmode, stack_pointer_rtx,
22174 RTX_FRAME_RELATED_P (insn) = 1;
22175 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22179 /* Store the args on the stack. */
22180 if (cfun->machine->uses_anonymous_args)
22182 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
22183 (0xf0 >> (args_to_push / 4)) & 0xf);
22184 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22185 saved_pretend_args = 1;
22191 if (args_to_push == 4)
22192 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22194 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
22195 plus_constant (Pmode,
22199 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22201 /* Just tell the dwarf backend that we adjusted SP. */
22202 dwarf = gen_rtx_SET (stack_pointer_rtx,
22203 plus_constant (Pmode, stack_pointer_rtx,
22205 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22208 RTX_FRAME_RELATED_P (insn) = 1;
22209 fp_offset = args_to_push;
22214 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22216 if (IS_INTERRUPT (func_type))
22218 /* Interrupt functions must not corrupt any registers.
22219 Creating a frame pointer however, corrupts the IP
22220 register, so we must push it first. */
22221 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
22223 /* Do not set RTX_FRAME_RELATED_P on this insn.
22224 The dwarf stack unwinding code only wants to see one
22225 stack decrement per function, and this is not it. If
22226 this instruction is labeled as being part of the frame
22227 creation sequence then dwarf2out_frame_debug_expr will
22228 die when it encounters the assignment of IP to FP
22229 later on, since the use of SP here establishes SP as
22230 the CFA register and not IP.
22232 Anyway this instruction is not really part of the stack
22233 frame creation although it is part of the prologue. */
22236 insn = emit_set_insn (ip_rtx,
22237 plus_constant (Pmode, stack_pointer_rtx,
22239 RTX_FRAME_RELATED_P (insn) = 1;
22244 /* Push the argument registers, or reserve space for them. */
22245 if (cfun->machine->uses_anonymous_args)
22246 insn = emit_multi_reg_push
22247 ((0xf0 >> (args_to_push / 4)) & 0xf,
22248 (0xf0 >> (args_to_push / 4)) & 0xf);
22251 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22252 GEN_INT (- args_to_push)));
22253 RTX_FRAME_RELATED_P (insn) = 1;
22256 /* If this is an interrupt service routine, and the link register
22257 is going to be pushed, and we're not generating extra
22258 push of IP (needed when frame is needed and frame layout if apcs),
22259 subtracting four from LR now will mean that the function return
22260 can be done with a single instruction. */
22261 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
22262 && (live_regs_mask & (1 << LR_REGNUM)) != 0
22263 && !(frame_pointer_needed && TARGET_APCS_FRAME)
22266 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
22268 emit_set_insn (lr, plus_constant (SImode, lr, -4));
22271 if (live_regs_mask)
22273 unsigned long dwarf_regs_mask = live_regs_mask;
22275 saved_regs += bit_count (live_regs_mask) * 4;
22276 if (optimize_size && !frame_pointer_needed
22277 && saved_regs == offsets->saved_regs - offsets->saved_args)
22279 /* If no coprocessor registers are being pushed and we don't have
22280 to worry about a frame pointer then push extra registers to
22281 create the stack frame. This is done in a way that does not
22282 alter the frame layout, so is independent of the epilogue. */
22286 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22288 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22289 if (frame && n * 4 >= frame)
22292 live_regs_mask |= (1 << n) - 1;
22293 saved_regs += frame;
22298 && current_tune->prefer_ldrd_strd
22299 && !optimize_function_for_size_p (cfun))
22301 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22303 thumb2_emit_strd_push (live_regs_mask);
22304 else if (TARGET_ARM
22305 && !TARGET_APCS_FRAME
22306 && !IS_INTERRUPT (func_type))
22307 arm_emit_strd_push (live_regs_mask);
22310 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22311 RTX_FRAME_RELATED_P (insn) = 1;
22316 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22317 RTX_FRAME_RELATED_P (insn) = 1;
22321 if (! IS_VOLATILE (func_type))
22322 saved_regs += arm_save_coproc_regs ();
22324 if (frame_pointer_needed && TARGET_ARM)
22326 /* Create the new frame pointer. */
22327 if (TARGET_APCS_FRAME)
22329 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22330 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22331 RTX_FRAME_RELATED_P (insn) = 1;
22335 insn = GEN_INT (saved_regs - (4 + fp_offset));
22336 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22337 stack_pointer_rtx, insn));
22338 RTX_FRAME_RELATED_P (insn) = 1;
22342 size = offsets->outgoing_args - offsets->saved_args;
22343 if (flag_stack_usage_info)
22344 current_function_static_stack_size = size;
22346 /* If this isn't an interrupt service routine and we have a frame, then do
22347 stack checking. We use IP as the first scratch register, except for the
22348 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22349 if (!IS_INTERRUPT (func_type)
22350 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22351 || flag_stack_clash_protection))
22353 unsigned int regno;
22355 if (!IS_NESTED (func_type) || clobber_ip)
22357 else if (df_regs_ever_live_p (LR_REGNUM))
22362 if (crtl->is_leaf && !cfun->calls_alloca)
22364 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22365 arm_emit_probe_stack_range (get_stack_check_protect (),
22366 size - get_stack_check_protect (),
22367 regno, live_regs_mask);
22370 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22371 regno, live_regs_mask);
22374 /* Recover the static chain register. */
22377 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22378 insn = gen_rtx_REG (SImode, 3);
22381 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22382 insn = gen_frame_mem (SImode, insn);
22384 emit_set_insn (ip_rtx, insn);
22385 emit_insn (gen_force_register_use (ip_rtx));
22388 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22390 /* This add can produce multiple insns for a large constant, so we
22391 need to get tricky. */
22392 rtx_insn *last = get_last_insn ();
22394 amount = GEN_INT (offsets->saved_args + saved_regs
22395 - offsets->outgoing_args);
22397 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22401 last = last ? NEXT_INSN (last) : get_insns ();
22402 RTX_FRAME_RELATED_P (last) = 1;
22404 while (last != insn);
22406 /* If the frame pointer is needed, emit a special barrier that
22407 will prevent the scheduler from moving stores to the frame
22408 before the stack adjustment. */
22409 if (frame_pointer_needed)
22410 emit_insn (gen_stack_tie (stack_pointer_rtx,
22411 hard_frame_pointer_rtx));
22415 if (frame_pointer_needed && TARGET_THUMB2)
22416 thumb_set_frame_pointer (offsets);
22418 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22420 unsigned long mask;
22422 mask = live_regs_mask;
22423 mask &= THUMB2_WORK_REGS;
22424 if (!IS_NESTED (func_type))
22425 mask |= (1 << IP_REGNUM);
22426 arm_load_pic_register (mask, NULL_RTX);
22429 /* If we are profiling, make sure no instructions are scheduled before
22430 the call to mcount. Similarly if the user has requested no
22431 scheduling in the prolog. Similarly if we want non-call exceptions
22432 using the EABI unwinder, to prevent faulting instructions from being
22433 swapped with a stack adjustment. */
22434 if (crtl->profile || !TARGET_SCHED_PROLOG
22435 || (arm_except_unwind_info (&global_options) == UI_TARGET
22436 && cfun->can_throw_non_call_exceptions))
22437 emit_insn (gen_blockage ());
22439 /* If the link register is being kept alive, with the return address in it,
22440 then make sure that it does not get reused by the ce2 pass. */
22441 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22442 cfun->machine->lr_save_eliminated = 1;
22445 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22447 arm_print_condition (FILE *stream)
22449 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22451 /* Branch conversion is not implemented for Thumb-2. */
22454 output_operand_lossage ("predicated Thumb instruction");
22457 if (current_insn_predicate != NULL)
22459 output_operand_lossage
22460 ("predicated instruction in conditional sequence");
22464 fputs (arm_condition_codes[arm_current_cc], stream);
22466 else if (current_insn_predicate)
22468 enum arm_cond_code code;
22472 output_operand_lossage ("predicated Thumb instruction");
22476 code = get_arm_condition_code (current_insn_predicate);
22477 fputs (arm_condition_codes[code], stream);
22482 /* Globally reserved letters: acln
22483 Puncutation letters currently used: @_|?().!#
22484 Lower case letters currently used: bcdefhimpqtvwxyz
22485 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22486 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22488 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22490 If CODE is 'd', then the X is a condition operand and the instruction
22491 should only be executed if the condition is true.
22492 if CODE is 'D', then the X is a condition operand and the instruction
22493 should only be executed if the condition is false: however, if the mode
22494 of the comparison is CCFPEmode, then always execute the instruction -- we
22495 do this because in these circumstances !GE does not necessarily imply LT;
22496 in these cases the instruction pattern will take care to make sure that
22497 an instruction containing %d will follow, thereby undoing the effects of
22498 doing this instruction unconditionally.
22499 If CODE is 'N' then X is a floating point operand that must be negated
22501 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22502 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22504 arm_print_operand (FILE *stream, rtx x, int code)
22509 fputs (ASM_COMMENT_START, stream);
22513 fputs (user_label_prefix, stream);
22517 fputs (REGISTER_PREFIX, stream);
22521 arm_print_condition (stream);
22525 /* The current condition code for a condition code setting instruction.
22526 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22527 fputc('s', stream);
22528 arm_print_condition (stream);
22532 /* If the instruction is conditionally executed then print
22533 the current condition code, otherwise print 's'. */
22534 gcc_assert (TARGET_THUMB2);
22535 if (current_insn_predicate)
22536 arm_print_condition (stream);
22538 fputc('s', stream);
22541 /* %# is a "break" sequence. It doesn't output anything, but is used to
22542 separate e.g. operand numbers from following text, if that text consists
22543 of further digits which we don't want to be part of the operand
22551 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22552 fprintf (stream, "%s", fp_const_from_val (&r));
22556 /* An integer or symbol address without a preceding # sign. */
22558 switch (GET_CODE (x))
22561 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22565 output_addr_const (stream, x);
22569 if (GET_CODE (XEXP (x, 0)) == PLUS
22570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22572 output_addr_const (stream, x);
22575 /* Fall through. */
22578 output_operand_lossage ("Unsupported operand for code '%c'", code);
22582 /* An integer that we want to print in HEX. */
22584 switch (GET_CODE (x))
22587 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22591 output_operand_lossage ("Unsupported operand for code '%c'", code);
22596 if (CONST_INT_P (x))
22599 val = ARM_SIGN_EXTEND (~INTVAL (x));
22600 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22604 putc ('~', stream);
22605 output_addr_const (stream, x);
22610 /* Print the log2 of a CONST_INT. */
22614 if (!CONST_INT_P (x)
22615 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22616 output_operand_lossage ("Unsupported operand for code '%c'", code);
22618 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22623 /* The low 16 bits of an immediate constant. */
22624 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22628 fprintf (stream, "%s", arithmetic_instr (x, 1));
22632 fprintf (stream, "%s", arithmetic_instr (x, 0));
22640 shift = shift_op (x, &val);
22644 fprintf (stream, ", %s ", shift);
22646 arm_print_operand (stream, XEXP (x, 1), 0);
22648 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22653 /* An explanation of the 'Q', 'R' and 'H' register operands:
22655 In a pair of registers containing a DI or DF value the 'Q'
22656 operand returns the register number of the register containing
22657 the least significant part of the value. The 'R' operand returns
22658 the register number of the register containing the most
22659 significant part of the value.
22661 The 'H' operand returns the higher of the two register numbers.
22662 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22663 same as the 'Q' operand, since the most significant part of the
22664 value is held in the lower number register. The reverse is true
22665 on systems where WORDS_BIG_ENDIAN is false.
22667 The purpose of these operands is to distinguish between cases
22668 where the endian-ness of the values is important (for example
22669 when they are added together), and cases where the endian-ness
22670 is irrelevant, but the order of register operations is important.
22671 For example when loading a value from memory into a register
22672 pair, the endian-ness does not matter. Provided that the value
22673 from the lower memory address is put into the lower numbered
22674 register, and the value from the higher address is put into the
22675 higher numbered register, the load will work regardless of whether
22676 the value being loaded is big-wordian or little-wordian. The
22677 order of the two register loads can matter however, if the address
22678 of the memory location is actually held in one of the registers
22679 being overwritten by the load.
22681 The 'Q' and 'R' constraints are also available for 64-bit
22684 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22686 rtx part = gen_lowpart (SImode, x);
22687 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22691 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22693 output_operand_lossage ("invalid operand for code '%c'", code);
22697 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22701 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22703 machine_mode mode = GET_MODE (x);
22706 if (mode == VOIDmode)
22708 part = gen_highpart_mode (SImode, mode, x);
22709 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22713 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22715 output_operand_lossage ("invalid operand for code '%c'", code);
22719 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22723 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22725 output_operand_lossage ("invalid operand for code '%c'", code);
22729 asm_fprintf (stream, "%r", REGNO (x) + 1);
22733 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22735 output_operand_lossage ("invalid operand for code '%c'", code);
22739 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22743 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22745 output_operand_lossage ("invalid operand for code '%c'", code);
22749 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22753 asm_fprintf (stream, "%r",
22754 REG_P (XEXP (x, 0))
22755 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22759 asm_fprintf (stream, "{%r-%r}",
22761 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22764 /* Like 'M', but writing doubleword vector registers, for use by Neon
22768 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22769 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22771 asm_fprintf (stream, "{d%d}", regno);
22773 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22778 /* CONST_TRUE_RTX means always -- that's the default. */
22779 if (x == const_true_rtx)
22782 if (!COMPARISON_P (x))
22784 output_operand_lossage ("invalid operand for code '%c'", code);
22788 fputs (arm_condition_codes[get_arm_condition_code (x)],
22793 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22794 want to do that. */
22795 if (x == const_true_rtx)
22797 output_operand_lossage ("instruction never executed");
22800 if (!COMPARISON_P (x))
22802 output_operand_lossage ("invalid operand for code '%c'", code);
22806 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22807 (get_arm_condition_code (x))],
22817 /* Former Maverick support, removed after GCC-4.7. */
22818 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22823 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22824 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22825 /* Bad value for wCG register number. */
22827 output_operand_lossage ("invalid operand for code '%c'", code);
22832 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22835 /* Print an iWMMXt control register name. */
22837 if (!CONST_INT_P (x)
22839 || INTVAL (x) >= 16)
22840 /* Bad value for wC register number. */
22842 output_operand_lossage ("invalid operand for code '%c'", code);
22848 static const char * wc_reg_names [16] =
22850 "wCID", "wCon", "wCSSF", "wCASF",
22851 "wC4", "wC5", "wC6", "wC7",
22852 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22853 "wC12", "wC13", "wC14", "wC15"
22856 fputs (wc_reg_names [INTVAL (x)], stream);
22860 /* Print the high single-precision register of a VFP double-precision
22864 machine_mode mode = GET_MODE (x);
22867 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22869 output_operand_lossage ("invalid operand for code '%c'", code);
22874 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22876 output_operand_lossage ("invalid operand for code '%c'", code);
22880 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22884 /* Print a VFP/Neon double precision or quad precision register name. */
22888 machine_mode mode = GET_MODE (x);
22889 int is_quad = (code == 'q');
22892 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22894 output_operand_lossage ("invalid operand for code '%c'", code);
22899 || !IS_VFP_REGNUM (REGNO (x)))
22901 output_operand_lossage ("invalid operand for code '%c'", code);
22906 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22907 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22909 output_operand_lossage ("invalid operand for code '%c'", code);
22913 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22914 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22918 /* These two codes print the low/high doubleword register of a Neon quad
22919 register, respectively. For pair-structure types, can also print
22920 low/high quadword registers. */
22924 machine_mode mode = GET_MODE (x);
22927 if ((GET_MODE_SIZE (mode) != 16
22928 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22930 output_operand_lossage ("invalid operand for code '%c'", code);
22935 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22937 output_operand_lossage ("invalid operand for code '%c'", code);
22941 if (GET_MODE_SIZE (mode) == 16)
22942 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22943 + (code == 'f' ? 1 : 0));
22945 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22946 + (code == 'f' ? 1 : 0));
22950 /* Print a VFPv3 floating-point constant, represented as an integer
22954 int index = vfp3_const_double_index (x);
22955 gcc_assert (index != -1);
22956 fprintf (stream, "%d", index);
22960 /* Print bits representing opcode features for Neon.
22962 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22963 and polynomials as unsigned.
22965 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22967 Bit 2 is 1 for rounding functions, 0 otherwise. */
22969 /* Identify the type as 's', 'u', 'p' or 'f'. */
22972 HOST_WIDE_INT bits = INTVAL (x);
22973 fputc ("uspf"[bits & 3], stream);
22977 /* Likewise, but signed and unsigned integers are both 'i'. */
22980 HOST_WIDE_INT bits = INTVAL (x);
22981 fputc ("iipf"[bits & 3], stream);
22985 /* As for 'T', but emit 'u' instead of 'p'. */
22988 HOST_WIDE_INT bits = INTVAL (x);
22989 fputc ("usuf"[bits & 3], stream);
22993 /* Bit 2: rounding (vs none). */
22996 HOST_WIDE_INT bits = INTVAL (x);
22997 fputs ((bits & 4) != 0 ? "r" : "", stream);
23001 /* Memory operand for vld1/vst1 instruction. */
23005 bool postinc = FALSE;
23006 rtx postinc_reg = NULL;
23007 unsigned align, memsize, align_bits;
23009 gcc_assert (MEM_P (x));
23010 addr = XEXP (x, 0);
23011 if (GET_CODE (addr) == POST_INC)
23014 addr = XEXP (addr, 0);
23016 if (GET_CODE (addr) == POST_MODIFY)
23018 postinc_reg = XEXP( XEXP (addr, 1), 1);
23019 addr = XEXP (addr, 0);
23021 asm_fprintf (stream, "[%r", REGNO (addr));
23023 /* We know the alignment of this access, so we can emit a hint in the
23024 instruction (for some alignments) as an aid to the memory subsystem
23026 align = MEM_ALIGN (x) >> 3;
23027 memsize = MEM_SIZE (x);
23029 /* Only certain alignment specifiers are supported by the hardware. */
23030 if (memsize == 32 && (align % 32) == 0)
23032 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
23034 else if (memsize >= 8 && (align % 8) == 0)
23039 if (align_bits != 0)
23040 asm_fprintf (stream, ":%d", align_bits);
23042 asm_fprintf (stream, "]");
23045 fputs("!", stream);
23047 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
23055 gcc_assert (MEM_P (x));
23056 addr = XEXP (x, 0);
23057 gcc_assert (REG_P (addr));
23058 asm_fprintf (stream, "[%r]", REGNO (addr));
23062 /* Translate an S register number into a D register number and element index. */
23065 machine_mode mode = GET_MODE (x);
23068 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
23070 output_operand_lossage ("invalid operand for code '%c'", code);
23075 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23077 output_operand_lossage ("invalid operand for code '%c'", code);
23081 regno = regno - FIRST_VFP_REGNUM;
23082 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
23087 gcc_assert (CONST_DOUBLE_P (x));
23089 result = vfp3_const_double_for_fract_bits (x);
23091 result = vfp3_const_double_for_bits (x);
23092 fprintf (stream, "#%d", result);
23095 /* Register specifier for vld1.16/vst1.16. Translate the S register
23096 number into a D register number and element index. */
23099 machine_mode mode = GET_MODE (x);
23102 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
23104 output_operand_lossage ("invalid operand for code '%c'", code);
23109 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23111 output_operand_lossage ("invalid operand for code '%c'", code);
23115 regno = regno - FIRST_VFP_REGNUM;
23116 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
23123 output_operand_lossage ("missing operand");
23127 switch (GET_CODE (x))
23130 asm_fprintf (stream, "%r", REGNO (x));
23134 output_address (GET_MODE (x), XEXP (x, 0));
23140 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
23141 sizeof (fpstr), 0, 1);
23142 fprintf (stream, "#%s", fpstr);
23147 gcc_assert (GET_CODE (x) != NEG);
23148 fputc ('#', stream);
23149 if (GET_CODE (x) == HIGH)
23151 fputs (":lower16:", stream);
23155 output_addr_const (stream, x);
23161 /* Target hook for printing a memory address. */
23163 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
23167 int is_minus = GET_CODE (x) == MINUS;
23170 asm_fprintf (stream, "[%r]", REGNO (x));
23171 else if (GET_CODE (x) == PLUS || is_minus)
23173 rtx base = XEXP (x, 0);
23174 rtx index = XEXP (x, 1);
23175 HOST_WIDE_INT offset = 0;
23177 || (REG_P (index) && REGNO (index) == SP_REGNUM))
23179 /* Ensure that BASE is a register. */
23180 /* (one of them must be). */
23181 /* Also ensure the SP is not used as in index register. */
23182 std::swap (base, index);
23184 switch (GET_CODE (index))
23187 offset = INTVAL (index);
23190 asm_fprintf (stream, "[%r, #%wd]",
23191 REGNO (base), offset);
23195 asm_fprintf (stream, "[%r, %s%r]",
23196 REGNO (base), is_minus ? "-" : "",
23206 asm_fprintf (stream, "[%r, %s%r",
23207 REGNO (base), is_minus ? "-" : "",
23208 REGNO (XEXP (index, 0)));
23209 arm_print_operand (stream, index, 'S');
23210 fputs ("]", stream);
23215 gcc_unreachable ();
23218 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
23219 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
23221 gcc_assert (REG_P (XEXP (x, 0)));
23223 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
23224 asm_fprintf (stream, "[%r, #%s%d]!",
23225 REGNO (XEXP (x, 0)),
23226 GET_CODE (x) == PRE_DEC ? "-" : "",
23227 GET_MODE_SIZE (mode));
23229 asm_fprintf (stream, "[%r], #%s%d",
23230 REGNO (XEXP (x, 0)),
23231 GET_CODE (x) == POST_DEC ? "-" : "",
23232 GET_MODE_SIZE (mode));
23234 else if (GET_CODE (x) == PRE_MODIFY)
23236 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
23237 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23238 asm_fprintf (stream, "#%wd]!",
23239 INTVAL (XEXP (XEXP (x, 1), 1)));
23241 asm_fprintf (stream, "%r]!",
23242 REGNO (XEXP (XEXP (x, 1), 1)));
23244 else if (GET_CODE (x) == POST_MODIFY)
23246 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
23247 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23248 asm_fprintf (stream, "#%wd",
23249 INTVAL (XEXP (XEXP (x, 1), 1)));
23251 asm_fprintf (stream, "%r",
23252 REGNO (XEXP (XEXP (x, 1), 1)));
23254 else output_addr_const (stream, x);
23259 asm_fprintf (stream, "[%r]", REGNO (x));
23260 else if (GET_CODE (x) == POST_INC)
23261 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
23262 else if (GET_CODE (x) == PLUS)
23264 gcc_assert (REG_P (XEXP (x, 0)));
23265 if (CONST_INT_P (XEXP (x, 1)))
23266 asm_fprintf (stream, "[%r, #%wd]",
23267 REGNO (XEXP (x, 0)),
23268 INTVAL (XEXP (x, 1)));
23270 asm_fprintf (stream, "[%r, %r]",
23271 REGNO (XEXP (x, 0)),
23272 REGNO (XEXP (x, 1)));
23275 output_addr_const (stream, x);
23279 /* Target hook for indicating whether a punctuation character for
23280 TARGET_PRINT_OPERAND is valid. */
23282 arm_print_operand_punct_valid_p (unsigned char code)
23284 return (code == '@' || code == '|' || code == '.'
23285 || code == '(' || code == ')' || code == '#'
23286 || (TARGET_32BIT && (code == '?'))
23287 || (TARGET_THUMB2 && (code == '!'))
23288 || (TARGET_THUMB && (code == '_')));
23291 /* Target hook for assembling integer objects. The ARM version needs to
23292 handle word-sized values specially. */
23294 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23298 if (size == UNITS_PER_WORD && aligned_p)
23300 fputs ("\t.word\t", asm_out_file);
23301 output_addr_const (asm_out_file, x);
23303 /* Mark symbols as position independent. We only do this in the
23304 .text segment, not in the .data segment. */
23305 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23306 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23308 /* See legitimize_pic_address for an explanation of the
23309 TARGET_VXWORKS_RTP check. */
23310 /* References to weak symbols cannot be resolved locally:
23311 they may be overridden by a non-weak definition at link
23313 if (!arm_pic_data_is_text_relative
23314 || (GET_CODE (x) == SYMBOL_REF
23315 && (!SYMBOL_REF_LOCAL_P (x)
23316 || (SYMBOL_REF_DECL (x)
23317 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
23318 || (SYMBOL_REF_FUNCTION_P (x)
23319 && !arm_fdpic_local_funcdesc_p (x)))))
23321 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23322 fputs ("(GOTFUNCDESC)", asm_out_file);
23324 fputs ("(GOT)", asm_out_file);
23328 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23329 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
23335 || arm_is_segment_info_known (x, &is_readonly))
23336 fputs ("(GOTOFF)", asm_out_file);
23338 fputs ("(GOT)", asm_out_file);
23343 /* For FDPIC we also have to mark symbol for .data section. */
23345 && !making_const_table
23346 && SYMBOL_REF_P (x)
23347 && SYMBOL_REF_FUNCTION_P (x))
23348 fputs ("(FUNCDESC)", asm_out_file);
23350 fputc ('\n', asm_out_file);
23354 mode = GET_MODE (x);
23356 if (arm_vector_mode_supported_p (mode))
23360 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23362 units = CONST_VECTOR_NUNITS (x);
23363 size = GET_MODE_UNIT_SIZE (mode);
23365 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23366 for (i = 0; i < units; i++)
23368 rtx elt = CONST_VECTOR_ELT (x, i);
23370 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23373 for (i = 0; i < units; i++)
23375 rtx elt = CONST_VECTOR_ELT (x, i);
23377 (*CONST_DOUBLE_REAL_VALUE (elt),
23378 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23379 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23385 return default_assemble_integer (x, size, aligned_p);
23389 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23393 if (!TARGET_AAPCS_BASED)
23396 default_named_section_asm_out_constructor
23397 : default_named_section_asm_out_destructor) (symbol, priority);
23401 /* Put these in the .init_array section, using a special relocation. */
23402 if (priority != DEFAULT_INIT_PRIORITY)
23405 sprintf (buf, "%s.%.5u",
23406 is_ctor ? ".init_array" : ".fini_array",
23408 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23415 switch_to_section (s);
23416 assemble_align (POINTER_SIZE);
23417 fputs ("\t.word\t", asm_out_file);
23418 output_addr_const (asm_out_file, symbol);
23419 fputs ("(target1)\n", asm_out_file);
23422 /* Add a function to the list of static constructors. */
23425 arm_elf_asm_constructor (rtx symbol, int priority)
23427 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23430 /* Add a function to the list of static destructors. */
23433 arm_elf_asm_destructor (rtx symbol, int priority)
23435 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23438 /* A finite state machine takes care of noticing whether or not instructions
23439 can be conditionally executed, and thus decrease execution time and code
23440 size by deleting branch instructions. The fsm is controlled by
23441 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23443 /* The state of the fsm controlling condition codes are:
23444 0: normal, do nothing special
23445 1: make ASM_OUTPUT_OPCODE not output this instruction
23446 2: make ASM_OUTPUT_OPCODE not output this instruction
23447 3: make instructions conditional
23448 4: make instructions conditional
23450 State transitions (state->state by whom under condition):
23451 0 -> 1 final_prescan_insn if the `target' is a label
23452 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23453 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23454 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23455 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23456 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23457 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23458 (the target insn is arm_target_insn).
23460 If the jump clobbers the conditions then we use states 2 and 4.
23462 A similar thing can be done with conditional return insns.
23464 XXX In case the `target' is an unconditional branch, this conditionalising
23465 of the instructions always reduces code size, but not always execution
23466 time. But then, I want to reduce the code size to somewhere near what
23467 /bin/cc produces. */
23469 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23470 instructions. When a COND_EXEC instruction is seen the subsequent
23471 instructions are scanned so that multiple conditional instructions can be
23472 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23473 specify the length and true/false mask for the IT block. These will be
23474 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23476 /* Returns the index of the ARM condition code string in
23477 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23478 COMPARISON should be an rtx like `(eq (...) (...))'. */
23481 maybe_get_arm_condition_code (rtx comparison)
23483 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23484 enum arm_cond_code code;
23485 enum rtx_code comp_code = GET_CODE (comparison);
23487 if (GET_MODE_CLASS (mode) != MODE_CC)
23488 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23489 XEXP (comparison, 1));
23493 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23494 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23495 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23496 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23497 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23498 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23499 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23500 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23501 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23502 case E_CC_DLTUmode: code = ARM_CC;
23505 if (comp_code == EQ)
23506 return ARM_INVERSE_CONDITION_CODE (code);
23507 if (comp_code == NE)
23511 case E_CC_NOOVmode:
23514 case NE: return ARM_NE;
23515 case EQ: return ARM_EQ;
23516 case GE: return ARM_PL;
23517 case LT: return ARM_MI;
23518 default: return ARM_NV;
23524 case NE: return ARM_NE;
23525 case EQ: return ARM_EQ;
23526 default: return ARM_NV;
23532 case NE: return ARM_MI;
23533 case EQ: return ARM_PL;
23534 default: return ARM_NV;
23539 /* We can handle all cases except UNEQ and LTGT. */
23542 case GE: return ARM_GE;
23543 case GT: return ARM_GT;
23544 case LE: return ARM_LS;
23545 case LT: return ARM_MI;
23546 case NE: return ARM_NE;
23547 case EQ: return ARM_EQ;
23548 case ORDERED: return ARM_VC;
23549 case UNORDERED: return ARM_VS;
23550 case UNLT: return ARM_LT;
23551 case UNLE: return ARM_LE;
23552 case UNGT: return ARM_HI;
23553 case UNGE: return ARM_PL;
23554 /* UNEQ and LTGT do not have a representation. */
23555 case UNEQ: /* Fall through. */
23556 case LTGT: /* Fall through. */
23557 default: return ARM_NV;
23563 case NE: return ARM_NE;
23564 case EQ: return ARM_EQ;
23565 case GE: return ARM_LE;
23566 case GT: return ARM_LT;
23567 case LE: return ARM_GE;
23568 case LT: return ARM_GT;
23569 case GEU: return ARM_LS;
23570 case GTU: return ARM_CC;
23571 case LEU: return ARM_CS;
23572 case LTU: return ARM_HI;
23573 default: return ARM_NV;
23579 case LTU: return ARM_CS;
23580 case GEU: return ARM_CC;
23581 case NE: return ARM_CS;
23582 case EQ: return ARM_CC;
23583 default: return ARM_NV;
23589 case NE: return ARM_NE;
23590 case EQ: return ARM_EQ;
23591 case GEU: return ARM_CS;
23592 case GTU: return ARM_HI;
23593 case LEU: return ARM_LS;
23594 case LTU: return ARM_CC;
23595 default: return ARM_NV;
23601 case GE: return ARM_GE;
23602 case LT: return ARM_LT;
23603 case GEU: return ARM_CS;
23604 case LTU: return ARM_CC;
23605 default: return ARM_NV;
23611 case NE: return ARM_VS;
23612 case EQ: return ARM_VC;
23613 default: return ARM_NV;
23619 case NE: return ARM_NE;
23620 case EQ: return ARM_EQ;
23621 case GE: return ARM_GE;
23622 case GT: return ARM_GT;
23623 case LE: return ARM_LE;
23624 case LT: return ARM_LT;
23625 case GEU: return ARM_CS;
23626 case GTU: return ARM_HI;
23627 case LEU: return ARM_LS;
23628 case LTU: return ARM_CC;
23629 default: return ARM_NV;
23632 default: gcc_unreachable ();
23636 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23637 static enum arm_cond_code
23638 get_arm_condition_code (rtx comparison)
23640 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23641 gcc_assert (code != ARM_NV);
23645 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23646 code registers when not targetting Thumb1. The VFP condition register
23647 only exists when generating hard-float code. */
23649 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23655 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23659 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23662 thumb2_final_prescan_insn (rtx_insn *insn)
23664 rtx_insn *first_insn = insn;
23665 rtx body = PATTERN (insn);
23667 enum arm_cond_code code;
23672 /* max_insns_skipped in the tune was already taken into account in the
23673 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23674 just emit the IT blocks as we can. It does not make sense to split
23676 max = MAX_INSN_PER_IT_BLOCK;
23678 /* Remove the previous insn from the count of insns to be output. */
23679 if (arm_condexec_count)
23680 arm_condexec_count--;
23682 /* Nothing to do if we are already inside a conditional block. */
23683 if (arm_condexec_count)
23686 if (GET_CODE (body) != COND_EXEC)
23689 /* Conditional jumps are implemented directly. */
23693 predicate = COND_EXEC_TEST (body);
23694 arm_current_cc = get_arm_condition_code (predicate);
23696 n = get_attr_ce_count (insn);
23697 arm_condexec_count = 1;
23698 arm_condexec_mask = (1 << n) - 1;
23699 arm_condexec_masklen = n;
23700 /* See if subsequent instructions can be combined into the same block. */
23703 insn = next_nonnote_insn (insn);
23705 /* Jumping into the middle of an IT block is illegal, so a label or
23706 barrier terminates the block. */
23707 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23710 body = PATTERN (insn);
23711 /* USE and CLOBBER aren't really insns, so just skip them. */
23712 if (GET_CODE (body) == USE
23713 || GET_CODE (body) == CLOBBER)
23716 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23717 if (GET_CODE (body) != COND_EXEC)
23719 /* Maximum number of conditionally executed instructions in a block. */
23720 n = get_attr_ce_count (insn);
23721 if (arm_condexec_masklen + n > max)
23724 predicate = COND_EXEC_TEST (body);
23725 code = get_arm_condition_code (predicate);
23726 mask = (1 << n) - 1;
23727 if (arm_current_cc == code)
23728 arm_condexec_mask |= (mask << arm_condexec_masklen);
23729 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23732 arm_condexec_count++;
23733 arm_condexec_masklen += n;
23735 /* A jump must be the last instruction in a conditional block. */
23739 /* Restore recog_data (getting the attributes of other insns can
23740 destroy this array, but final.c assumes that it remains intact
23741 across this call). */
23742 extract_constrain_insn_cached (first_insn);
23746 arm_final_prescan_insn (rtx_insn *insn)
23748 /* BODY will hold the body of INSN. */
23749 rtx body = PATTERN (insn);
23751 /* This will be 1 if trying to repeat the trick, and things need to be
23752 reversed if it appears to fail. */
23755 /* If we start with a return insn, we only succeed if we find another one. */
23756 int seeking_return = 0;
23757 enum rtx_code return_code = UNKNOWN;
23759 /* START_INSN will hold the insn from where we start looking. This is the
23760 first insn after the following code_label if REVERSE is true. */
23761 rtx_insn *start_insn = insn;
23763 /* If in state 4, check if the target branch is reached, in order to
23764 change back to state 0. */
23765 if (arm_ccfsm_state == 4)
23767 if (insn == arm_target_insn)
23769 arm_target_insn = NULL;
23770 arm_ccfsm_state = 0;
23775 /* If in state 3, it is possible to repeat the trick, if this insn is an
23776 unconditional branch to a label, and immediately following this branch
23777 is the previous target label which is only used once, and the label this
23778 branch jumps to is not too far off. */
23779 if (arm_ccfsm_state == 3)
23781 if (simplejump_p (insn))
23783 start_insn = next_nonnote_insn (start_insn);
23784 if (BARRIER_P (start_insn))
23786 /* XXX Isn't this always a barrier? */
23787 start_insn = next_nonnote_insn (start_insn);
23789 if (LABEL_P (start_insn)
23790 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23791 && LABEL_NUSES (start_insn) == 1)
23796 else if (ANY_RETURN_P (body))
23798 start_insn = next_nonnote_insn (start_insn);
23799 if (BARRIER_P (start_insn))
23800 start_insn = next_nonnote_insn (start_insn);
23801 if (LABEL_P (start_insn)
23802 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23803 && LABEL_NUSES (start_insn) == 1)
23806 seeking_return = 1;
23807 return_code = GET_CODE (body);
23816 gcc_assert (!arm_ccfsm_state || reverse);
23817 if (!JUMP_P (insn))
23820 /* This jump might be paralleled with a clobber of the condition codes
23821 the jump should always come first */
23822 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23823 body = XVECEXP (body, 0, 0);
23826 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23827 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23830 int fail = FALSE, succeed = FALSE;
23831 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23832 int then_not_else = TRUE;
23833 rtx_insn *this_insn = start_insn;
23836 /* Register the insn jumped to. */
23839 if (!seeking_return)
23840 label = XEXP (SET_SRC (body), 0);
23842 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23843 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23844 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23846 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23847 then_not_else = FALSE;
23849 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23851 seeking_return = 1;
23852 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23854 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23856 seeking_return = 1;
23857 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23858 then_not_else = FALSE;
23861 gcc_unreachable ();
23863 /* See how many insns this branch skips, and what kind of insns. If all
23864 insns are okay, and the label or unconditional branch to the same
23865 label is not too far away, succeed. */
23866 for (insns_skipped = 0;
23867 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23871 this_insn = next_nonnote_insn (this_insn);
23875 switch (GET_CODE (this_insn))
23878 /* Succeed if it is the target label, otherwise fail since
23879 control falls in from somewhere else. */
23880 if (this_insn == label)
23882 arm_ccfsm_state = 1;
23890 /* Succeed if the following insn is the target label.
23892 If return insns are used then the last insn in a function
23893 will be a barrier. */
23894 this_insn = next_nonnote_insn (this_insn);
23895 if (this_insn && this_insn == label)
23897 arm_ccfsm_state = 1;
23905 /* The AAPCS says that conditional calls should not be
23906 used since they make interworking inefficient (the
23907 linker can't transform BL<cond> into BLX). That's
23908 only a problem if the machine has BLX. */
23915 /* Succeed if the following insn is the target label, or
23916 if the following two insns are a barrier and the
23918 this_insn = next_nonnote_insn (this_insn);
23919 if (this_insn && BARRIER_P (this_insn))
23920 this_insn = next_nonnote_insn (this_insn);
23922 if (this_insn && this_insn == label
23923 && insns_skipped < max_insns_skipped)
23925 arm_ccfsm_state = 1;
23933 /* If this is an unconditional branch to the same label, succeed.
23934 If it is to another label, do nothing. If it is conditional,
23936 /* XXX Probably, the tests for SET and the PC are
23939 scanbody = PATTERN (this_insn);
23940 if (GET_CODE (scanbody) == SET
23941 && GET_CODE (SET_DEST (scanbody)) == PC)
23943 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23944 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23946 arm_ccfsm_state = 2;
23949 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23952 /* Fail if a conditional return is undesirable (e.g. on a
23953 StrongARM), but still allow this if optimizing for size. */
23954 else if (GET_CODE (scanbody) == return_code
23955 && !use_return_insn (TRUE, NULL)
23958 else if (GET_CODE (scanbody) == return_code)
23960 arm_ccfsm_state = 2;
23963 else if (GET_CODE (scanbody) == PARALLEL)
23965 switch (get_attr_conds (this_insn))
23975 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23980 /* Instructions using or affecting the condition codes make it
23982 scanbody = PATTERN (this_insn);
23983 if (!(GET_CODE (scanbody) == SET
23984 || GET_CODE (scanbody) == PARALLEL)
23985 || get_attr_conds (this_insn) != CONDS_NOCOND)
23995 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23996 arm_target_label = CODE_LABEL_NUMBER (label);
23999 gcc_assert (seeking_return || arm_ccfsm_state == 2);
24001 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
24003 this_insn = next_nonnote_insn (this_insn);
24004 gcc_assert (!this_insn
24005 || (!BARRIER_P (this_insn)
24006 && !LABEL_P (this_insn)));
24010 /* Oh, dear! we ran off the end.. give up. */
24011 extract_constrain_insn_cached (insn);
24012 arm_ccfsm_state = 0;
24013 arm_target_insn = NULL;
24016 arm_target_insn = this_insn;
24019 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24022 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
24024 if (reverse || then_not_else)
24025 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
24028 /* Restore recog_data (getting the attributes of other insns can
24029 destroy this array, but final.c assumes that it remains intact
24030 across this call. */
24031 extract_constrain_insn_cached (insn);
24035 /* Output IT instructions. */
24037 thumb2_asm_output_opcode (FILE * stream)
24042 if (arm_condexec_mask)
24044 for (n = 0; n < arm_condexec_masklen; n++)
24045 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
24047 asm_fprintf(stream, "i%s\t%s\n\t", buff,
24048 arm_condition_codes[arm_current_cc]);
24049 arm_condexec_mask = 0;
24053 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
24054 UNITS_PER_WORD bytes wide. */
24055 static unsigned int
24056 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
24059 && regno > PC_REGNUM
24060 && regno != FRAME_POINTER_REGNUM
24061 && regno != ARG_POINTER_REGNUM
24062 && !IS_VFP_REGNUM (regno))
24065 return ARM_NUM_REGS (mode);
24068 /* Implement TARGET_HARD_REGNO_MODE_OK. */
24070 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
24072 if (GET_MODE_CLASS (mode) == MODE_CC)
24073 return (regno == CC_REGNUM
24074 || (TARGET_HARD_FLOAT
24075 && regno == VFPCC_REGNUM));
24077 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
24081 /* For the Thumb we only allow values bigger than SImode in
24082 registers 0 - 6, so that there is always a second low
24083 register available to hold the upper part of the value.
24084 We probably we ought to ensure that the register is the
24085 start of an even numbered register pair. */
24086 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
24088 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
24090 if (mode == SFmode || mode == SImode)
24091 return VFP_REGNO_OK_FOR_SINGLE (regno);
24093 if (mode == DFmode)
24094 return VFP_REGNO_OK_FOR_DOUBLE (regno);
24096 if (mode == HFmode)
24097 return VFP_REGNO_OK_FOR_SINGLE (regno);
24099 /* VFP registers can hold HImode values. */
24100 if (mode == HImode)
24101 return VFP_REGNO_OK_FOR_SINGLE (regno);
24104 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
24105 || (VALID_NEON_QREG_MODE (mode)
24106 && NEON_REGNO_OK_FOR_QUAD (regno))
24107 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
24108 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
24109 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
24110 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
24111 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
24116 if (TARGET_REALLY_IWMMXT)
24118 if (IS_IWMMXT_GR_REGNUM (regno))
24119 return mode == SImode;
24121 if (IS_IWMMXT_REGNUM (regno))
24122 return VALID_IWMMXT_REG_MODE (mode);
24125 /* We allow almost any value to be stored in the general registers.
24126 Restrict doubleword quantities to even register pairs in ARM state
24127 so that we can use ldrd. Do not allow very large Neon structure
24128 opaque modes in general registers; they would use too many. */
24129 if (regno <= LAST_ARM_REGNUM)
24131 if (ARM_NUM_REGS (mode) > 4)
24137 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
24140 if (regno == FRAME_POINTER_REGNUM
24141 || regno == ARG_POINTER_REGNUM)
24142 /* We only allow integers in the fake hard registers. */
24143 return GET_MODE_CLASS (mode) == MODE_INT;
24148 /* Implement TARGET_MODES_TIEABLE_P. */
24151 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
24153 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
24156 /* We specifically want to allow elements of "structure" modes to
24157 be tieable to the structure. This more general condition allows
24158 other rarer situations too. */
24160 && (VALID_NEON_DREG_MODE (mode1)
24161 || VALID_NEON_QREG_MODE (mode1)
24162 || VALID_NEON_STRUCT_MODE (mode1))
24163 && (VALID_NEON_DREG_MODE (mode2)
24164 || VALID_NEON_QREG_MODE (mode2)
24165 || VALID_NEON_STRUCT_MODE (mode2)))
24171 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24172 not used in arm mode. */
24175 arm_regno_class (int regno)
24177 if (regno == PC_REGNUM)
24182 if (regno == STACK_POINTER_REGNUM)
24184 if (regno == CC_REGNUM)
24191 if (TARGET_THUMB2 && regno < 8)
24194 if ( regno <= LAST_ARM_REGNUM
24195 || regno == FRAME_POINTER_REGNUM
24196 || regno == ARG_POINTER_REGNUM)
24197 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
24199 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
24200 return TARGET_THUMB2 ? CC_REG : NO_REGS;
24202 if (IS_VFP_REGNUM (regno))
24204 if (regno <= D7_VFP_REGNUM)
24205 return VFP_D0_D7_REGS;
24206 else if (regno <= LAST_LO_VFP_REGNUM)
24207 return VFP_LO_REGS;
24209 return VFP_HI_REGS;
24212 if (IS_IWMMXT_REGNUM (regno))
24213 return IWMMXT_REGS;
24215 if (IS_IWMMXT_GR_REGNUM (regno))
24216 return IWMMXT_GR_REGS;
24221 /* Handle a special case when computing the offset
24222 of an argument from the frame pointer. */
24224 arm_debugger_arg_offset (int value, rtx addr)
24228 /* We are only interested if dbxout_parms() failed to compute the offset. */
24232 /* We can only cope with the case where the address is held in a register. */
24236 /* If we are using the frame pointer to point at the argument, then
24237 an offset of 0 is correct. */
24238 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
24241 /* If we are using the stack pointer to point at the
24242 argument, then an offset of 0 is correct. */
24243 /* ??? Check this is consistent with thumb2 frame layout. */
24244 if ((TARGET_THUMB || !frame_pointer_needed)
24245 && REGNO (addr) == SP_REGNUM)
24248 /* Oh dear. The argument is pointed to by a register rather
24249 than being held in a register, or being stored at a known
24250 offset from the frame pointer. Since GDB only understands
24251 those two kinds of argument we must translate the address
24252 held in the register into an offset from the frame pointer.
24253 We do this by searching through the insns for the function
24254 looking to see where this register gets its value. If the
24255 register is initialized from the frame pointer plus an offset
24256 then we are in luck and we can continue, otherwise we give up.
24258 This code is exercised by producing debugging information
24259 for a function with arguments like this:
24261 double func (double a, double b, int c, double d) {return d;}
24263 Without this code the stab for parameter 'd' will be set to
24264 an offset of 0 from the frame pointer, rather than 8. */
24266 /* The if() statement says:
24268 If the insn is a normal instruction
24269 and if the insn is setting the value in a register
24270 and if the register being set is the register holding the address of the argument
24271 and if the address is computing by an addition
24272 that involves adding to a register
24273 which is the frame pointer
24278 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24280 if ( NONJUMP_INSN_P (insn)
24281 && GET_CODE (PATTERN (insn)) == SET
24282 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
24283 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
24284 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
24285 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24286 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
24289 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
24298 warning (0, "unable to compute real location of stacked parameter");
24299 value = 8; /* XXX magic hack */
24305 /* Implement TARGET_PROMOTED_TYPE. */
24308 arm_promoted_type (const_tree t)
24310 if (SCALAR_FLOAT_TYPE_P (t)
24311 && TYPE_PRECISION (t) == 16
24312 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24313 return float_type_node;
24317 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24318 This simply adds HFmode as a supported mode; even though we don't
24319 implement arithmetic on this type directly, it's supported by
24320 optabs conversions, much the way the double-word arithmetic is
24321 special-cased in the default hook. */
24324 arm_scalar_mode_supported_p (scalar_mode mode)
24326 if (mode == HFmode)
24327 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24328 else if (ALL_FIXED_POINT_MODE_P (mode))
24331 return default_scalar_mode_supported_p (mode);
24334 /* Set the value of FLT_EVAL_METHOD.
24335 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24337 0: evaluate all operations and constants, whose semantic type has at
24338 most the range and precision of type float, to the range and
24339 precision of float; evaluate all other operations and constants to
24340 the range and precision of the semantic type;
24342 N, where _FloatN is a supported interchange floating type
24343 evaluate all operations and constants, whose semantic type has at
24344 most the range and precision of _FloatN type, to the range and
24345 precision of the _FloatN type; evaluate all other operations and
24346 constants to the range and precision of the semantic type;
24348 If we have the ARMv8.2-A extensions then we support _Float16 in native
24349 precision, so we should set this to 16. Otherwise, we support the type,
24350 but want to evaluate expressions in float precision, so set this to
24353 static enum flt_eval_method
24354 arm_excess_precision (enum excess_precision_type type)
24358 case EXCESS_PRECISION_TYPE_FAST:
24359 case EXCESS_PRECISION_TYPE_STANDARD:
24360 /* We can calculate either in 16-bit range and precision or
24361 32-bit range and precision. Make that decision based on whether
24362 we have native support for the ARMv8.2-A 16-bit floating-point
24363 instructions or not. */
24364 return (TARGET_VFP_FP16INST
24365 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24366 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24367 case EXCESS_PRECISION_TYPE_IMPLICIT:
24368 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24370 gcc_unreachable ();
24372 return FLT_EVAL_METHOD_UNPREDICTABLE;
24376 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24377 _Float16 if we are using anything other than ieee format for 16-bit
24378 floating point. Otherwise, punt to the default implementation. */
24379 static opt_scalar_float_mode
24380 arm_floatn_mode (int n, bool extended)
24382 if (!extended && n == 16)
24384 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24386 return opt_scalar_float_mode ();
24389 return default_floatn_mode (n, extended);
24393 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24394 not to early-clobber SRC registers in the process.
24396 We assume that the operands described by SRC and DEST represent a
24397 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24398 number of components into which the copy has been decomposed. */
24400 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24404 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24405 || REGNO (operands[0]) < REGNO (operands[1]))
24407 for (i = 0; i < count; i++)
24409 operands[2 * i] = dest[i];
24410 operands[2 * i + 1] = src[i];
24415 for (i = 0; i < count; i++)
24417 operands[2 * i] = dest[count - i - 1];
24418 operands[2 * i + 1] = src[count - i - 1];
24423 /* Split operands into moves from op[1] + op[2] into op[0]. */
24426 neon_split_vcombine (rtx operands[3])
24428 unsigned int dest = REGNO (operands[0]);
24429 unsigned int src1 = REGNO (operands[1]);
24430 unsigned int src2 = REGNO (operands[2]);
24431 machine_mode halfmode = GET_MODE (operands[1]);
24432 unsigned int halfregs = REG_NREGS (operands[1]);
24433 rtx destlo, desthi;
24435 if (src1 == dest && src2 == dest + halfregs)
24437 /* No-op move. Can't split to nothing; emit something. */
24438 emit_note (NOTE_INSN_DELETED);
24442 /* Preserve register attributes for variable tracking. */
24443 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24444 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24445 GET_MODE_SIZE (halfmode));
24447 /* Special case of reversed high/low parts. Use VSWP. */
24448 if (src2 == dest && src1 == dest + halfregs)
24450 rtx x = gen_rtx_SET (destlo, operands[1]);
24451 rtx y = gen_rtx_SET (desthi, operands[2]);
24452 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24456 if (!reg_overlap_mentioned_p (operands[2], destlo))
24458 /* Try to avoid unnecessary moves if part of the result
24459 is in the right place already. */
24461 emit_move_insn (destlo, operands[1]);
24462 if (src2 != dest + halfregs)
24463 emit_move_insn (desthi, operands[2]);
24467 if (src2 != dest + halfregs)
24468 emit_move_insn (desthi, operands[2]);
24470 emit_move_insn (destlo, operands[1]);
24474 /* Return the number (counting from 0) of
24475 the least significant set bit in MASK. */
24478 number_of_first_bit_set (unsigned mask)
24480 return ctz_hwi (mask);
24483 /* Like emit_multi_reg_push, but allowing for a different set of
24484 registers to be described as saved. MASK is the set of registers
24485 to be saved; REAL_REGS is the set of registers to be described as
24486 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24489 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24491 unsigned long regno;
24492 rtx par[10], tmp, reg;
24496 /* Build the parallel of the registers actually being stored. */
24497 for (i = 0; mask; ++i, mask &= mask - 1)
24499 regno = ctz_hwi (mask);
24500 reg = gen_rtx_REG (SImode, regno);
24503 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24505 tmp = gen_rtx_USE (VOIDmode, reg);
24510 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24511 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24512 tmp = gen_frame_mem (BLKmode, tmp);
24513 tmp = gen_rtx_SET (tmp, par[0]);
24516 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24517 insn = emit_insn (tmp);
24519 /* Always build the stack adjustment note for unwind info. */
24520 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24521 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24524 /* Build the parallel of the registers recorded as saved for unwind. */
24525 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24527 regno = ctz_hwi (real_regs);
24528 reg = gen_rtx_REG (SImode, regno);
24530 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24531 tmp = gen_frame_mem (SImode, tmp);
24532 tmp = gen_rtx_SET (tmp, reg);
24533 RTX_FRAME_RELATED_P (tmp) = 1;
24541 RTX_FRAME_RELATED_P (par[0]) = 1;
24542 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24545 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24550 /* Emit code to push or pop registers to or from the stack. F is the
24551 assembly file. MASK is the registers to pop. */
24553 thumb_pop (FILE *f, unsigned long mask)
24556 int lo_mask = mask & 0xFF;
24560 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24562 /* Special case. Do not generate a POP PC statement here, do it in
24564 thumb_exit (f, -1);
24568 fprintf (f, "\tpop\t{");
24570 /* Look at the low registers first. */
24571 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24575 asm_fprintf (f, "%r", regno);
24577 if ((lo_mask & ~1) != 0)
24582 if (mask & (1 << PC_REGNUM))
24584 /* Catch popping the PC. */
24585 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24586 || IS_CMSE_ENTRY (arm_current_func_type ()))
24588 /* The PC is never poped directly, instead
24589 it is popped into r3 and then BX is used. */
24590 fprintf (f, "}\n");
24592 thumb_exit (f, -1);
24601 asm_fprintf (f, "%r", PC_REGNUM);
24605 fprintf (f, "}\n");
24608 /* Generate code to return from a thumb function.
24609 If 'reg_containing_return_addr' is -1, then the return address is
24610 actually on the stack, at the stack pointer.
24612 Note: do not forget to update length attribute of corresponding insn pattern
24613 when changing assembly output (eg. length attribute of epilogue_insns when
24614 updating Armv8-M Baseline Security Extensions register clearing
24617 thumb_exit (FILE *f, int reg_containing_return_addr)
24619 unsigned regs_available_for_popping;
24620 unsigned regs_to_pop;
24622 unsigned available;
24626 int restore_a4 = FALSE;
24628 /* Compute the registers we need to pop. */
24632 if (reg_containing_return_addr == -1)
24634 regs_to_pop |= 1 << LR_REGNUM;
24638 if (TARGET_BACKTRACE)
24640 /* Restore the (ARM) frame pointer and stack pointer. */
24641 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24645 /* If there is nothing to pop then just emit the BX instruction and
24647 if (pops_needed == 0)
24649 if (crtl->calls_eh_return)
24650 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24652 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24654 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24655 reg_containing_return_addr);
24656 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24659 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24662 /* Otherwise if we are not supporting interworking and we have not created
24663 a backtrace structure and the function was not entered in ARM mode then
24664 just pop the return address straight into the PC. */
24665 else if (!TARGET_INTERWORK
24666 && !TARGET_BACKTRACE
24667 && !is_called_in_ARM_mode (current_function_decl)
24668 && !crtl->calls_eh_return
24669 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24671 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24675 /* Find out how many of the (return) argument registers we can corrupt. */
24676 regs_available_for_popping = 0;
24678 /* If returning via __builtin_eh_return, the bottom three registers
24679 all contain information needed for the return. */
24680 if (crtl->calls_eh_return)
24684 /* If we can deduce the registers used from the function's
24685 return value. This is more reliable that examining
24686 df_regs_ever_live_p () because that will be set if the register is
24687 ever used in the function, not just if the register is used
24688 to hold a return value. */
24690 if (crtl->return_rtx != 0)
24691 mode = GET_MODE (crtl->return_rtx);
24693 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24695 size = GET_MODE_SIZE (mode);
24699 /* In a void function we can use any argument register.
24700 In a function that returns a structure on the stack
24701 we can use the second and third argument registers. */
24702 if (mode == VOIDmode)
24703 regs_available_for_popping =
24704 (1 << ARG_REGISTER (1))
24705 | (1 << ARG_REGISTER (2))
24706 | (1 << ARG_REGISTER (3));
24708 regs_available_for_popping =
24709 (1 << ARG_REGISTER (2))
24710 | (1 << ARG_REGISTER (3));
24712 else if (size <= 4)
24713 regs_available_for_popping =
24714 (1 << ARG_REGISTER (2))
24715 | (1 << ARG_REGISTER (3));
24716 else if (size <= 8)
24717 regs_available_for_popping =
24718 (1 << ARG_REGISTER (3));
24721 /* Match registers to be popped with registers into which we pop them. */
24722 for (available = regs_available_for_popping,
24723 required = regs_to_pop;
24724 required != 0 && available != 0;
24725 available &= ~(available & - available),
24726 required &= ~(required & - required))
24729 /* If we have any popping registers left over, remove them. */
24731 regs_available_for_popping &= ~available;
24733 /* Otherwise if we need another popping register we can use
24734 the fourth argument register. */
24735 else if (pops_needed)
24737 /* If we have not found any free argument registers and
24738 reg a4 contains the return address, we must move it. */
24739 if (regs_available_for_popping == 0
24740 && reg_containing_return_addr == LAST_ARG_REGNUM)
24742 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24743 reg_containing_return_addr = LR_REGNUM;
24745 else if (size > 12)
24747 /* Register a4 is being used to hold part of the return value,
24748 but we have dire need of a free, low register. */
24751 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24754 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24756 /* The fourth argument register is available. */
24757 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24763 /* Pop as many registers as we can. */
24764 thumb_pop (f, regs_available_for_popping);
24766 /* Process the registers we popped. */
24767 if (reg_containing_return_addr == -1)
24769 /* The return address was popped into the lowest numbered register. */
24770 regs_to_pop &= ~(1 << LR_REGNUM);
24772 reg_containing_return_addr =
24773 number_of_first_bit_set (regs_available_for_popping);
24775 /* Remove this register for the mask of available registers, so that
24776 the return address will not be corrupted by further pops. */
24777 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24780 /* If we popped other registers then handle them here. */
24781 if (regs_available_for_popping)
24785 /* Work out which register currently contains the frame pointer. */
24786 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24788 /* Move it into the correct place. */
24789 asm_fprintf (f, "\tmov\t%r, %r\n",
24790 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24792 /* (Temporarily) remove it from the mask of popped registers. */
24793 regs_available_for_popping &= ~(1 << frame_pointer);
24794 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24796 if (regs_available_for_popping)
24800 /* We popped the stack pointer as well,
24801 find the register that contains it. */
24802 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24804 /* Move it into the stack register. */
24805 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24807 /* At this point we have popped all necessary registers, so
24808 do not worry about restoring regs_available_for_popping
24809 to its correct value:
24811 assert (pops_needed == 0)
24812 assert (regs_available_for_popping == (1 << frame_pointer))
24813 assert (regs_to_pop == (1 << STACK_POINTER)) */
24817 /* Since we have just move the popped value into the frame
24818 pointer, the popping register is available for reuse, and
24819 we know that we still have the stack pointer left to pop. */
24820 regs_available_for_popping |= (1 << frame_pointer);
24824 /* If we still have registers left on the stack, but we no longer have
24825 any registers into which we can pop them, then we must move the return
24826 address into the link register and make available the register that
24828 if (regs_available_for_popping == 0 && pops_needed > 0)
24830 regs_available_for_popping |= 1 << reg_containing_return_addr;
24832 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24833 reg_containing_return_addr);
24835 reg_containing_return_addr = LR_REGNUM;
24838 /* If we have registers left on the stack then pop some more.
24839 We know that at most we will want to pop FP and SP. */
24840 if (pops_needed > 0)
24845 thumb_pop (f, regs_available_for_popping);
24847 /* We have popped either FP or SP.
24848 Move whichever one it is into the correct register. */
24849 popped_into = number_of_first_bit_set (regs_available_for_popping);
24850 move_to = number_of_first_bit_set (regs_to_pop);
24852 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24856 /* If we still have not popped everything then we must have only
24857 had one register available to us and we are now popping the SP. */
24858 if (pops_needed > 0)
24862 thumb_pop (f, regs_available_for_popping);
24864 popped_into = number_of_first_bit_set (regs_available_for_popping);
24866 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24868 assert (regs_to_pop == (1 << STACK_POINTER))
24869 assert (pops_needed == 1)
24873 /* If necessary restore the a4 register. */
24876 if (reg_containing_return_addr != LR_REGNUM)
24878 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24879 reg_containing_return_addr = LR_REGNUM;
24882 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24885 if (crtl->calls_eh_return)
24886 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24888 /* Return to caller. */
24889 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24891 /* This is for the cases where LR is not being used to contain the return
24892 address. It may therefore contain information that we might not want
24893 to leak, hence it must be cleared. The value in R0 will never be a
24894 secret at this point, so it is safe to use it, see the clearing code
24895 in 'cmse_nonsecure_entry_clear_before_return'. */
24896 if (reg_containing_return_addr != LR_REGNUM)
24897 asm_fprintf (f, "\tmov\tlr, r0\n");
24899 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24900 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24903 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24906 /* Scan INSN just before assembler is output for it.
24907 For Thumb-1, we track the status of the condition codes; this
24908 information is used in the cbranchsi4_insn pattern. */
24910 thumb1_final_prescan_insn (rtx_insn *insn)
24912 if (flag_print_asm_name)
24913 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24914 INSN_ADDRESSES (INSN_UID (insn)));
24915 /* Don't overwrite the previous setter when we get to a cbranch. */
24916 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24918 enum attr_conds conds;
24920 if (cfun->machine->thumb1_cc_insn)
24922 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24923 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24926 conds = get_attr_conds (insn);
24927 if (conds == CONDS_SET)
24929 rtx set = single_set (insn);
24930 cfun->machine->thumb1_cc_insn = insn;
24931 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24932 cfun->machine->thumb1_cc_op1 = const0_rtx;
24933 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24934 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24936 rtx src1 = XEXP (SET_SRC (set), 1);
24937 if (src1 == const0_rtx)
24938 cfun->machine->thumb1_cc_mode = CCmode;
24940 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24942 /* Record the src register operand instead of dest because
24943 cprop_hardreg pass propagates src. */
24944 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24947 else if (conds != CONDS_NOCOND)
24948 cfun->machine->thumb1_cc_insn = NULL_RTX;
24951 /* Check if unexpected far jump is used. */
24952 if (cfun->machine->lr_save_eliminated
24953 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24954 internal_error("Unexpected thumb1 far jump");
24958 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24960 unsigned HOST_WIDE_INT mask = 0xff;
24963 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24964 if (val == 0) /* XXX */
24967 for (i = 0; i < 25; i++)
24968 if ((val & (mask << i)) == val)
24974 /* Returns nonzero if the current function contains,
24975 or might contain a far jump. */
24977 thumb_far_jump_used_p (void)
24980 bool far_jump = false;
24981 unsigned int func_size = 0;
24983 /* If we have already decided that far jumps may be used,
24984 do not bother checking again, and always return true even if
24985 it turns out that they are not being used. Once we have made
24986 the decision that far jumps are present (and that hence the link
24987 register will be pushed onto the stack) we cannot go back on it. */
24988 if (cfun->machine->far_jump_used)
24991 /* If this function is not being called from the prologue/epilogue
24992 generation code then it must be being called from the
24993 INITIAL_ELIMINATION_OFFSET macro. */
24994 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24996 /* In this case we know that we are being asked about the elimination
24997 of the arg pointer register. If that register is not being used,
24998 then there are no arguments on the stack, and we do not have to
24999 worry that a far jump might force the prologue to push the link
25000 register, changing the stack offsets. In this case we can just
25001 return false, since the presence of far jumps in the function will
25002 not affect stack offsets.
25004 If the arg pointer is live (or if it was live, but has now been
25005 eliminated and so set to dead) then we do have to test to see if
25006 the function might contain a far jump. This test can lead to some
25007 false negatives, since before reload is completed, then length of
25008 branch instructions is not known, so gcc defaults to returning their
25009 longest length, which in turn sets the far jump attribute to true.
25011 A false negative will not result in bad code being generated, but it
25012 will result in a needless push and pop of the link register. We
25013 hope that this does not occur too often.
25015 If we need doubleword stack alignment this could affect the other
25016 elimination offsets so we can't risk getting it wrong. */
25017 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25018 cfun->machine->arg_pointer_live = 1;
25019 else if (!cfun->machine->arg_pointer_live)
25023 /* We should not change far_jump_used during or after reload, as there is
25024 no chance to change stack frame layout. */
25025 if (reload_in_progress || reload_completed)
25028 /* Check to see if the function contains a branch
25029 insn with the far jump attribute set. */
25030 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25032 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25036 func_size += get_attr_length (insn);
25039 /* Attribute far_jump will always be true for thumb1 before
25040 shorten_branch pass. So checking far_jump attribute before
25041 shorten_branch isn't much useful.
25043 Following heuristic tries to estimate more accurately if a far jump
25044 may finally be used. The heuristic is very conservative as there is
25045 no chance to roll-back the decision of not to use far jump.
25047 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25048 2-byte insn is associated with a 4 byte constant pool. Using
25049 function size 2048/3 as the threshold is conservative enough. */
25052 if ((func_size * 3) >= 2048)
25054 /* Record the fact that we have decided that
25055 the function does use far jumps. */
25056 cfun->machine->far_jump_used = 1;
25064 /* Return nonzero if FUNC must be entered in ARM mode. */
25066 is_called_in_ARM_mode (tree func)
25068 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25070 /* Ignore the problem about functions whose address is taken. */
25071 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25075 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25081 /* Given the stack offsets and register mask in OFFSETS, decide how
25082 many additional registers to push instead of subtracting a constant
25083 from SP. For epilogues the principle is the same except we use pop.
25084 FOR_PROLOGUE indicates which we're generating. */
25086 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25088 HOST_WIDE_INT amount;
25089 unsigned long live_regs_mask = offsets->saved_regs_mask;
25090 /* Extract a mask of the ones we can give to the Thumb's push/pop
25092 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25093 /* Then count how many other high registers will need to be pushed. */
25094 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25095 int n_free, reg_base, size;
25097 if (!for_prologue && frame_pointer_needed)
25098 amount = offsets->locals_base - offsets->saved_regs;
25100 amount = offsets->outgoing_args - offsets->saved_regs;
25102 /* If the stack frame size is 512 exactly, we can save one load
25103 instruction, which should make this a win even when optimizing
25105 if (!optimize_size && amount != 512)
25108 /* Can't do this if there are high registers to push. */
25109 if (high_regs_pushed != 0)
25112 /* Shouldn't do it in the prologue if no registers would normally
25113 be pushed at all. In the epilogue, also allow it if we'll have
25114 a pop insn for the PC. */
25117 || TARGET_BACKTRACE
25118 || (live_regs_mask & 1 << LR_REGNUM) == 0
25119 || TARGET_INTERWORK
25120 || crtl->args.pretend_args_size != 0))
25123 /* Don't do this if thumb_expand_prologue wants to emit instructions
25124 between the push and the stack frame allocation. */
25126 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25127 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25134 size = arm_size_return_regs ();
25135 reg_base = ARM_NUM_INTS (size);
25136 live_regs_mask >>= reg_base;
25139 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25140 && (for_prologue || call_used_regs[reg_base + n_free]))
25142 live_regs_mask >>= 1;
25148 gcc_assert (amount / 4 * 4 == amount);
25150 if (amount >= 512 && (amount - n_free * 4) < 512)
25151 return (amount - 508) / 4;
25152 if (amount <= n_free * 4)
25157 /* The bits which aren't usefully expanded as rtl. */
25159 thumb1_unexpanded_epilogue (void)
25161 arm_stack_offsets *offsets;
25163 unsigned long live_regs_mask = 0;
25164 int high_regs_pushed = 0;
25166 int had_to_push_lr;
25169 if (cfun->machine->return_used_this_function != 0)
25172 if (IS_NAKED (arm_current_func_type ()))
25175 offsets = arm_get_frame_offsets ();
25176 live_regs_mask = offsets->saved_regs_mask;
25177 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25179 /* If we can deduce the registers used from the function's return value.
25180 This is more reliable that examining df_regs_ever_live_p () because that
25181 will be set if the register is ever used in the function, not just if
25182 the register is used to hold a return value. */
25183 size = arm_size_return_regs ();
25185 extra_pop = thumb1_extra_regs_pushed (offsets, false);
25188 unsigned long extra_mask = (1 << extra_pop) - 1;
25189 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25192 /* The prolog may have pushed some high registers to use as
25193 work registers. e.g. the testsuite file:
25194 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25195 compiles to produce:
25196 push {r4, r5, r6, r7, lr}
25200 as part of the prolog. We have to undo that pushing here. */
25202 if (high_regs_pushed)
25204 unsigned long mask = live_regs_mask & 0xff;
25207 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25210 /* Oh dear! We have no low registers into which we can pop
25213 ("no low registers available for popping high registers");
25215 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25216 if (live_regs_mask & (1 << next_hi_reg))
25219 while (high_regs_pushed)
25221 /* Find lo register(s) into which the high register(s) can
25223 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25225 if (mask & (1 << regno))
25226 high_regs_pushed--;
25227 if (high_regs_pushed == 0)
25231 if (high_regs_pushed == 0 && regno >= 0)
25232 mask &= ~((1 << regno) - 1);
25234 /* Pop the values into the low register(s). */
25235 thumb_pop (asm_out_file, mask);
25237 /* Move the value(s) into the high registers. */
25238 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25240 if (mask & (1 << regno))
25242 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25245 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
25247 if (live_regs_mask & (1 << next_hi_reg))
25252 live_regs_mask &= ~0x0f00;
25255 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25256 live_regs_mask &= 0xff;
25258 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25260 /* Pop the return address into the PC. */
25261 if (had_to_push_lr)
25262 live_regs_mask |= 1 << PC_REGNUM;
25264 /* Either no argument registers were pushed or a backtrace
25265 structure was created which includes an adjusted stack
25266 pointer, so just pop everything. */
25267 if (live_regs_mask)
25268 thumb_pop (asm_out_file, live_regs_mask);
25270 /* We have either just popped the return address into the
25271 PC or it is was kept in LR for the entire function.
25272 Note that thumb_pop has already called thumb_exit if the
25273 PC was in the list. */
25274 if (!had_to_push_lr)
25275 thumb_exit (asm_out_file, LR_REGNUM);
25279 /* Pop everything but the return address. */
25280 if (live_regs_mask)
25281 thumb_pop (asm_out_file, live_regs_mask);
25283 if (had_to_push_lr)
25287 /* We have no free low regs, so save one. */
25288 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25292 /* Get the return address into a temporary register. */
25293 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25297 /* Move the return address to lr. */
25298 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25300 /* Restore the low register. */
25301 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25306 regno = LAST_ARG_REGNUM;
25311 /* Remove the argument registers that were pushed onto the stack. */
25312 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25313 SP_REGNUM, SP_REGNUM,
25314 crtl->args.pretend_args_size);
25316 thumb_exit (asm_out_file, regno);
25322 /* Functions to save and restore machine-specific function data. */
25323 static struct machine_function *
25324 arm_init_machine_status (void)
25326 struct machine_function *machine;
25327 machine = ggc_cleared_alloc<machine_function> ();
25329 #if ARM_FT_UNKNOWN != 0
25330 machine->func_type = ARM_FT_UNKNOWN;
25332 machine->static_chain_stack_bytes = -1;
25336 /* Return an RTX indicating where the return address to the
25337 calling function can be found. */
25339 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25344 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25347 /* Do anything needed before RTL is emitted for each function. */
25349 arm_init_expanders (void)
25351 /* Arrange to initialize and mark the machine per-function status. */
25352 init_machine_status = arm_init_machine_status;
25354 /* This is to stop the combine pass optimizing away the alignment
25355 adjustment of va_arg. */
25356 /* ??? It is claimed that this should not be necessary. */
25358 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25361 /* Check that FUNC is called with a different mode. */
25364 arm_change_mode_p (tree func)
25366 if (TREE_CODE (func) != FUNCTION_DECL)
25369 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25372 callee_tree = target_option_default_node;
25374 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25375 int flags = callee_opts->x_target_flags;
25377 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25380 /* Like arm_compute_initial_elimination offset. Simpler because there
25381 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25382 to point at the base of the local variables after static stack
25383 space for a function has been allocated. */
25386 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25388 arm_stack_offsets *offsets;
25390 offsets = arm_get_frame_offsets ();
25394 case ARG_POINTER_REGNUM:
25397 case STACK_POINTER_REGNUM:
25398 return offsets->outgoing_args - offsets->saved_args;
25400 case FRAME_POINTER_REGNUM:
25401 return offsets->soft_frame - offsets->saved_args;
25403 case ARM_HARD_FRAME_POINTER_REGNUM:
25404 return offsets->saved_regs - offsets->saved_args;
25406 case THUMB_HARD_FRAME_POINTER_REGNUM:
25407 return offsets->locals_base - offsets->saved_args;
25410 gcc_unreachable ();
25414 case FRAME_POINTER_REGNUM:
25417 case STACK_POINTER_REGNUM:
25418 return offsets->outgoing_args - offsets->soft_frame;
25420 case ARM_HARD_FRAME_POINTER_REGNUM:
25421 return offsets->saved_regs - offsets->soft_frame;
25423 case THUMB_HARD_FRAME_POINTER_REGNUM:
25424 return offsets->locals_base - offsets->soft_frame;
25427 gcc_unreachable ();
25432 gcc_unreachable ();
25436 /* Generate the function's prologue. */
25439 thumb1_expand_prologue (void)
25443 HOST_WIDE_INT amount;
25444 HOST_WIDE_INT size;
25445 arm_stack_offsets *offsets;
25446 unsigned long func_type;
25448 unsigned long live_regs_mask;
25449 unsigned long l_mask;
25450 unsigned high_regs_pushed = 0;
25451 bool lr_needs_saving;
25453 func_type = arm_current_func_type ();
25455 /* Naked functions don't have prologues. */
25456 if (IS_NAKED (func_type))
25458 if (flag_stack_usage_info)
25459 current_function_static_stack_size = 0;
25463 if (IS_INTERRUPT (func_type))
25465 error ("interrupt Service Routines cannot be coded in Thumb mode");
25469 if (is_called_in_ARM_mode (current_function_decl))
25470 emit_insn (gen_prologue_thumb1_interwork ());
25472 offsets = arm_get_frame_offsets ();
25473 live_regs_mask = offsets->saved_regs_mask;
25474 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25476 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25477 l_mask = live_regs_mask & 0x40ff;
25478 /* Then count how many other high registers will need to be pushed. */
25479 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25481 if (crtl->args.pretend_args_size)
25483 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25485 if (cfun->machine->uses_anonymous_args)
25487 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25488 unsigned long mask;
25490 mask = 1ul << (LAST_ARG_REGNUM + 1);
25491 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25493 insn = thumb1_emit_multi_reg_push (mask, 0);
25497 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25498 stack_pointer_rtx, x));
25500 RTX_FRAME_RELATED_P (insn) = 1;
25503 if (TARGET_BACKTRACE)
25505 HOST_WIDE_INT offset = 0;
25506 unsigned work_register;
25507 rtx work_reg, x, arm_hfp_rtx;
25509 /* We have been asked to create a stack backtrace structure.
25510 The code looks like this:
25514 0 sub SP, #16 Reserve space for 4 registers.
25515 2 push {R7} Push low registers.
25516 4 add R7, SP, #20 Get the stack pointer before the push.
25517 6 str R7, [SP, #8] Store the stack pointer
25518 (before reserving the space).
25519 8 mov R7, PC Get hold of the start of this code + 12.
25520 10 str R7, [SP, #16] Store it.
25521 12 mov R7, FP Get hold of the current frame pointer.
25522 14 str R7, [SP, #4] Store it.
25523 16 mov R7, LR Get hold of the current return address.
25524 18 str R7, [SP, #12] Store it.
25525 20 add R7, SP, #16 Point at the start of the
25526 backtrace structure.
25527 22 mov FP, R7 Put this value into the frame pointer. */
25529 work_register = thumb_find_work_register (live_regs_mask);
25530 work_reg = gen_rtx_REG (SImode, work_register);
25531 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25533 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25534 stack_pointer_rtx, GEN_INT (-16)));
25535 RTX_FRAME_RELATED_P (insn) = 1;
25539 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25540 RTX_FRAME_RELATED_P (insn) = 1;
25541 lr_needs_saving = false;
25543 offset = bit_count (l_mask) * UNITS_PER_WORD;
25546 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25547 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25549 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25550 x = gen_frame_mem (SImode, x);
25551 emit_move_insn (x, work_reg);
25553 /* Make sure that the instruction fetching the PC is in the right place
25554 to calculate "start of backtrace creation code + 12". */
25555 /* ??? The stores using the common WORK_REG ought to be enough to
25556 prevent the scheduler from doing anything weird. Failing that
25557 we could always move all of the following into an UNSPEC_VOLATILE. */
25560 x = gen_rtx_REG (SImode, PC_REGNUM);
25561 emit_move_insn (work_reg, x);
25563 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25564 x = gen_frame_mem (SImode, x);
25565 emit_move_insn (x, work_reg);
25567 emit_move_insn (work_reg, arm_hfp_rtx);
25569 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25570 x = gen_frame_mem (SImode, x);
25571 emit_move_insn (x, work_reg);
25575 emit_move_insn (work_reg, arm_hfp_rtx);
25577 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25578 x = gen_frame_mem (SImode, x);
25579 emit_move_insn (x, work_reg);
25581 x = gen_rtx_REG (SImode, PC_REGNUM);
25582 emit_move_insn (work_reg, x);
25584 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25585 x = gen_frame_mem (SImode, x);
25586 emit_move_insn (x, work_reg);
25589 x = gen_rtx_REG (SImode, LR_REGNUM);
25590 emit_move_insn (work_reg, x);
25592 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25593 x = gen_frame_mem (SImode, x);
25594 emit_move_insn (x, work_reg);
25596 x = GEN_INT (offset + 12);
25597 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25599 emit_move_insn (arm_hfp_rtx, work_reg);
25601 /* Optimization: If we are not pushing any low registers but we are going
25602 to push some high registers then delay our first push. This will just
25603 be a push of LR and we can combine it with the push of the first high
25605 else if ((l_mask & 0xff) != 0
25606 || (high_regs_pushed == 0 && lr_needs_saving))
25608 unsigned long mask = l_mask;
25609 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25610 insn = thumb1_emit_multi_reg_push (mask, mask);
25611 RTX_FRAME_RELATED_P (insn) = 1;
25612 lr_needs_saving = false;
25615 if (high_regs_pushed)
25617 unsigned pushable_regs;
25618 unsigned next_hi_reg;
25619 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25620 : crtl->args.info.nregs;
25621 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25623 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25624 if (live_regs_mask & (1 << next_hi_reg))
25627 /* Here we need to mask out registers used for passing arguments
25628 even if they can be pushed. This is to avoid using them to
25629 stash the high registers. Such kind of stash may clobber the
25630 use of arguments. */
25631 pushable_regs = l_mask & (~arg_regs_mask);
25632 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25634 /* Normally, LR can be used as a scratch register once it has been
25635 saved; but if the function examines its own return address then
25636 the value is still live and we need to avoid using it. */
25637 bool return_addr_live
25638 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25641 if (lr_needs_saving || return_addr_live)
25642 pushable_regs &= ~(1 << LR_REGNUM);
25644 if (pushable_regs == 0)
25645 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25647 while (high_regs_pushed > 0)
25649 unsigned long real_regs_mask = 0;
25650 unsigned long push_mask = 0;
25652 for (regno = LR_REGNUM; regno >= 0; regno --)
25654 if (pushable_regs & (1 << regno))
25656 emit_move_insn (gen_rtx_REG (SImode, regno),
25657 gen_rtx_REG (SImode, next_hi_reg));
25659 high_regs_pushed --;
25660 real_regs_mask |= (1 << next_hi_reg);
25661 push_mask |= (1 << regno);
25663 if (high_regs_pushed)
25665 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25667 if (live_regs_mask & (1 << next_hi_reg))
25675 /* If we had to find a work register and we have not yet
25676 saved the LR then add it to the list of regs to push. */
25677 if (lr_needs_saving)
25679 push_mask |= 1 << LR_REGNUM;
25680 real_regs_mask |= 1 << LR_REGNUM;
25681 lr_needs_saving = false;
25682 /* If the return address is not live at this point, we
25683 can add LR to the list of registers that we can use
25685 if (!return_addr_live)
25686 pushable_regs |= 1 << LR_REGNUM;
25689 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25690 RTX_FRAME_RELATED_P (insn) = 1;
25694 /* Load the pic register before setting the frame pointer,
25695 so we can use r7 as a temporary work register. */
25696 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25697 arm_load_pic_register (live_regs_mask, NULL_RTX);
25699 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25700 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25701 stack_pointer_rtx);
25703 size = offsets->outgoing_args - offsets->saved_args;
25704 if (flag_stack_usage_info)
25705 current_function_static_stack_size = size;
25707 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25708 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25709 || flag_stack_clash_protection)
25711 sorry ("%<-fstack-check=specific%> for Thumb-1");
25713 amount = offsets->outgoing_args - offsets->saved_regs;
25714 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25719 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25720 GEN_INT (- amount)));
25721 RTX_FRAME_RELATED_P (insn) = 1;
25727 /* The stack decrement is too big for an immediate value in a single
25728 insn. In theory we could issue multiple subtracts, but after
25729 three of them it becomes more space efficient to place the full
25730 value in the constant pool and load into a register. (Also the
25731 ARM debugger really likes to see only one stack decrement per
25732 function). So instead we look for a scratch register into which
25733 we can load the decrement, and then we subtract this from the
25734 stack pointer. Unfortunately on the thumb the only available
25735 scratch registers are the argument registers, and we cannot use
25736 these as they may hold arguments to the function. Instead we
25737 attempt to locate a call preserved register which is used by this
25738 function. If we can find one, then we know that it will have
25739 been pushed at the start of the prologue and so we can corrupt
25741 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25742 if (live_regs_mask & (1 << regno))
25745 gcc_assert(regno <= LAST_LO_REGNUM);
25747 reg = gen_rtx_REG (SImode, regno);
25749 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25751 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25752 stack_pointer_rtx, reg));
25754 dwarf = gen_rtx_SET (stack_pointer_rtx,
25755 plus_constant (Pmode, stack_pointer_rtx,
25757 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25758 RTX_FRAME_RELATED_P (insn) = 1;
25762 if (frame_pointer_needed)
25763 thumb_set_frame_pointer (offsets);
25765 /* If we are profiling, make sure no instructions are scheduled before
25766 the call to mcount. Similarly if the user has requested no
25767 scheduling in the prolog. Similarly if we want non-call exceptions
25768 using the EABI unwinder, to prevent faulting instructions from being
25769 swapped with a stack adjustment. */
25770 if (crtl->profile || !TARGET_SCHED_PROLOG
25771 || (arm_except_unwind_info (&global_options) == UI_TARGET
25772 && cfun->can_throw_non_call_exceptions))
25773 emit_insn (gen_blockage ());
25775 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25776 if (live_regs_mask & 0xff)
25777 cfun->machine->lr_save_eliminated = 0;
25780 /* Clear caller saved registers not used to pass return values and leaked
25781 condition flags before exiting a cmse_nonsecure_entry function. */
25784 cmse_nonsecure_entry_clear_before_return (void)
25786 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25787 uint32_t padding_bits_to_clear = 0;
25788 auto_sbitmap to_clear_bitmap (maxregno + 1);
25789 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25792 bitmap_clear (to_clear_bitmap);
25793 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25794 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25796 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25798 if (TARGET_HARD_FLOAT)
25800 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25802 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25804 /* Make sure we don't clear the two scratch registers used to clear the
25805 relevant FPSCR bits in output_return_instruction. */
25806 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25807 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25808 emit_use (gen_rtx_REG (SImode, 4));
25809 bitmap_clear_bit (to_clear_bitmap, 4);
25812 /* If the user has defined registers to be caller saved, these are no longer
25813 restored by the function before returning and must thus be cleared for
25814 security purposes. */
25815 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25817 /* We do not touch registers that can be used to pass arguments as per
25818 the AAPCS, since these should never be made callee-saved by user
25820 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25822 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25824 if (call_used_regs[regno])
25825 bitmap_set_bit (to_clear_bitmap, regno);
25828 /* Make sure we do not clear the registers used to return the result in. */
25829 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25830 if (!VOID_TYPE_P (result_type))
25832 uint64_t to_clear_return_mask;
25833 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25835 /* No need to check that we return in registers, because we don't
25836 support returning on stack yet. */
25837 gcc_assert (REG_P (result_rtl));
25838 to_clear_return_mask
25839 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25840 &padding_bits_to_clear);
25841 if (to_clear_return_mask)
25843 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25844 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25846 if (to_clear_return_mask & (1ULL << regno))
25847 bitmap_clear_bit (to_clear_bitmap, regno);
25852 if (padding_bits_to_clear != 0)
25854 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25855 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25857 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25858 returning a composite type, which only uses r0. Let's make sure that
25859 r1-r3 is cleared too. */
25860 bitmap_clear (to_clear_arg_regs_bitmap);
25861 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25862 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25865 /* Clear full registers that leak before returning. */
25866 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25867 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25868 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25872 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25873 POP instruction can be generated. LR should be replaced by PC. All
25874 the checks required are already done by USE_RETURN_INSN (). Hence,
25875 all we really need to check here is if single register is to be
25876 returned, or multiple register return. */
25878 thumb2_expand_return (bool simple_return)
25881 unsigned long saved_regs_mask;
25882 arm_stack_offsets *offsets;
25884 offsets = arm_get_frame_offsets ();
25885 saved_regs_mask = offsets->saved_regs_mask;
25887 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25888 if (saved_regs_mask & (1 << i))
25891 if (!simple_return && saved_regs_mask)
25893 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25894 functions or adapt code to handle according to ACLE. This path should
25895 not be reachable for cmse_nonsecure_entry functions though we prefer
25896 to assert it for now to ensure that future code changes do not silently
25897 change this behavior. */
25898 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25901 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25902 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25903 rtx addr = gen_rtx_MEM (SImode,
25904 gen_rtx_POST_INC (SImode,
25905 stack_pointer_rtx));
25906 set_mem_alias_set (addr, get_frame_alias_set ());
25907 XVECEXP (par, 0, 0) = ret_rtx;
25908 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25909 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25910 emit_jump_insn (par);
25914 saved_regs_mask &= ~ (1 << LR_REGNUM);
25915 saved_regs_mask |= (1 << PC_REGNUM);
25916 arm_emit_multi_reg_pop (saved_regs_mask);
25921 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25922 cmse_nonsecure_entry_clear_before_return ();
25923 emit_jump_insn (simple_return_rtx);
25928 thumb1_expand_epilogue (void)
25930 HOST_WIDE_INT amount;
25931 arm_stack_offsets *offsets;
25934 /* Naked functions don't have prologues. */
25935 if (IS_NAKED (arm_current_func_type ()))
25938 offsets = arm_get_frame_offsets ();
25939 amount = offsets->outgoing_args - offsets->saved_regs;
25941 if (frame_pointer_needed)
25943 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25944 amount = offsets->locals_base - offsets->saved_regs;
25946 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25948 gcc_assert (amount >= 0);
25951 emit_insn (gen_blockage ());
25954 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25955 GEN_INT (amount)));
25958 /* r3 is always free in the epilogue. */
25959 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25961 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25962 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25966 /* Emit a USE (stack_pointer_rtx), so that
25967 the stack adjustment will not be deleted. */
25968 emit_insn (gen_force_register_use (stack_pointer_rtx));
25970 if (crtl->profile || !TARGET_SCHED_PROLOG)
25971 emit_insn (gen_blockage ());
25973 /* Emit a clobber for each insn that will be restored in the epilogue,
25974 so that flow2 will get register lifetimes correct. */
25975 for (regno = 0; regno < 13; regno++)
25976 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25977 emit_clobber (gen_rtx_REG (SImode, regno));
25979 if (! df_regs_ever_live_p (LR_REGNUM))
25980 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25982 /* Clear all caller-saved regs that are not used to return. */
25983 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25984 cmse_nonsecure_entry_clear_before_return ();
25987 /* Epilogue code for APCS frame. */
25989 arm_expand_epilogue_apcs_frame (bool really_return)
25991 unsigned long func_type;
25992 unsigned long saved_regs_mask;
25995 int floats_from_frame = 0;
25996 arm_stack_offsets *offsets;
25998 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25999 func_type = arm_current_func_type ();
26001 /* Get frame offsets for ARM. */
26002 offsets = arm_get_frame_offsets ();
26003 saved_regs_mask = offsets->saved_regs_mask;
26005 /* Find the offset of the floating-point save area in the frame. */
26007 = (offsets->saved_args
26008 + arm_compute_static_chain_stack_bytes ()
26011 /* Compute how many core registers saved and how far away the floats are. */
26012 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26013 if (saved_regs_mask & (1 << i))
26016 floats_from_frame += 4;
26019 if (TARGET_HARD_FLOAT)
26022 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
26024 /* The offset is from IP_REGNUM. */
26025 int saved_size = arm_get_vfp_saved_size ();
26026 if (saved_size > 0)
26029 floats_from_frame += saved_size;
26030 insn = emit_insn (gen_addsi3 (ip_rtx,
26031 hard_frame_pointer_rtx,
26032 GEN_INT (-floats_from_frame)));
26033 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
26034 ip_rtx, hard_frame_pointer_rtx);
26037 /* Generate VFP register multi-pop. */
26038 start_reg = FIRST_VFP_REGNUM;
26040 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26041 /* Look for a case where a reg does not need restoring. */
26042 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26043 && (!df_regs_ever_live_p (i + 1)
26044 || call_used_regs[i + 1]))
26046 if (start_reg != i)
26047 arm_emit_vfp_multi_reg_pop (start_reg,
26048 (i - start_reg) / 2,
26049 gen_rtx_REG (SImode,
26054 /* Restore the remaining regs that we have discovered (or possibly
26055 even all of them, if the conditional in the for loop never
26057 if (start_reg != i)
26058 arm_emit_vfp_multi_reg_pop (start_reg,
26059 (i - start_reg) / 2,
26060 gen_rtx_REG (SImode, IP_REGNUM));
26065 /* The frame pointer is guaranteed to be non-double-word aligned, as
26066 it is set to double-word-aligned old_stack_pointer - 4. */
26068 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26070 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26071 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26073 rtx addr = gen_frame_mem (V2SImode,
26074 plus_constant (Pmode, hard_frame_pointer_rtx,
26076 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26077 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26078 gen_rtx_REG (V2SImode, i),
26084 /* saved_regs_mask should contain IP which contains old stack pointer
26085 at the time of activation creation. Since SP and IP are adjacent registers,
26086 we can restore the value directly into SP. */
26087 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26088 saved_regs_mask &= ~(1 << IP_REGNUM);
26089 saved_regs_mask |= (1 << SP_REGNUM);
26091 /* There are two registers left in saved_regs_mask - LR and PC. We
26092 only need to restore LR (the return address), but to
26093 save time we can load it directly into PC, unless we need a
26094 special function exit sequence, or we are not really returning. */
26096 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26097 && !crtl->calls_eh_return)
26098 /* Delete LR from the register mask, so that LR on
26099 the stack is loaded into the PC in the register mask. */
26100 saved_regs_mask &= ~(1 << LR_REGNUM);
26102 saved_regs_mask &= ~(1 << PC_REGNUM);
26104 num_regs = bit_count (saved_regs_mask);
26105 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26108 emit_insn (gen_blockage ());
26109 /* Unwind the stack to just below the saved registers. */
26110 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26111 hard_frame_pointer_rtx,
26112 GEN_INT (- 4 * num_regs)));
26114 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
26115 stack_pointer_rtx, hard_frame_pointer_rtx);
26118 arm_emit_multi_reg_pop (saved_regs_mask);
26120 if (IS_INTERRUPT (func_type))
26122 /* Interrupt handlers will have pushed the
26123 IP onto the stack, so restore it now. */
26125 rtx addr = gen_rtx_MEM (SImode,
26126 gen_rtx_POST_INC (SImode,
26127 stack_pointer_rtx));
26128 set_mem_alias_set (addr, get_frame_alias_set ());
26129 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26130 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26131 gen_rtx_REG (SImode, IP_REGNUM),
26135 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26138 if (crtl->calls_eh_return)
26139 emit_insn (gen_addsi3 (stack_pointer_rtx,
26141 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26143 if (IS_STACKALIGN (func_type))
26144 /* Restore the original stack pointer. Before prologue, the stack was
26145 realigned and the original stack pointer saved in r0. For details,
26146 see comment in arm_expand_prologue. */
26147 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26149 emit_jump_insn (simple_return_rtx);
26152 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26153 function is not a sibcall. */
26155 arm_expand_epilogue (bool really_return)
26157 unsigned long func_type;
26158 unsigned long saved_regs_mask;
26162 arm_stack_offsets *offsets;
26164 func_type = arm_current_func_type ();
26166 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26167 let output_return_instruction take care of instruction emission if any. */
26168 if (IS_NAKED (func_type)
26169 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26172 emit_jump_insn (simple_return_rtx);
26176 /* If we are throwing an exception, then we really must be doing a
26177 return, so we can't tail-call. */
26178 gcc_assert (!crtl->calls_eh_return || really_return);
26180 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26182 arm_expand_epilogue_apcs_frame (really_return);
26186 /* Get frame offsets for ARM. */
26187 offsets = arm_get_frame_offsets ();
26188 saved_regs_mask = offsets->saved_regs_mask;
26189 num_regs = bit_count (saved_regs_mask);
26191 if (frame_pointer_needed)
26194 /* Restore stack pointer if necessary. */
26197 /* In ARM mode, frame pointer points to first saved register.
26198 Restore stack pointer to last saved register. */
26199 amount = offsets->frame - offsets->saved_regs;
26201 /* Force out any pending memory operations that reference stacked data
26202 before stack de-allocation occurs. */
26203 emit_insn (gen_blockage ());
26204 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26205 hard_frame_pointer_rtx,
26206 GEN_INT (amount)));
26207 arm_add_cfa_adjust_cfa_note (insn, amount,
26209 hard_frame_pointer_rtx);
26211 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26213 emit_insn (gen_force_register_use (stack_pointer_rtx));
26217 /* In Thumb-2 mode, the frame pointer points to the last saved
26219 amount = offsets->locals_base - offsets->saved_regs;
26222 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26223 hard_frame_pointer_rtx,
26224 GEN_INT (amount)));
26225 arm_add_cfa_adjust_cfa_note (insn, amount,
26226 hard_frame_pointer_rtx,
26227 hard_frame_pointer_rtx);
26230 /* Force out any pending memory operations that reference stacked data
26231 before stack de-allocation occurs. */
26232 emit_insn (gen_blockage ());
26233 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26234 hard_frame_pointer_rtx));
26235 arm_add_cfa_adjust_cfa_note (insn, 0,
26237 hard_frame_pointer_rtx);
26238 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26240 emit_insn (gen_force_register_use (stack_pointer_rtx));
26245 /* Pop off outgoing args and local frame to adjust stack pointer to
26246 last saved register. */
26247 amount = offsets->outgoing_args - offsets->saved_regs;
26251 /* Force out any pending memory operations that reference stacked data
26252 before stack de-allocation occurs. */
26253 emit_insn (gen_blockage ());
26254 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26256 GEN_INT (amount)));
26257 arm_add_cfa_adjust_cfa_note (tmp, amount,
26258 stack_pointer_rtx, stack_pointer_rtx);
26259 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26261 emit_insn (gen_force_register_use (stack_pointer_rtx));
26265 if (TARGET_HARD_FLOAT)
26267 /* Generate VFP register multi-pop. */
26268 int end_reg = LAST_VFP_REGNUM + 1;
26270 /* Scan the registers in reverse order. We need to match
26271 any groupings made in the prologue and generate matching
26272 vldm operations. The need to match groups is because,
26273 unlike pop, vldm can only do consecutive regs. */
26274 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26275 /* Look for a case where a reg does not need restoring. */
26276 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26277 && (!df_regs_ever_live_p (i + 1)
26278 || call_used_regs[i + 1]))
26280 /* Restore the regs discovered so far (from reg+2 to
26282 if (end_reg > i + 2)
26283 arm_emit_vfp_multi_reg_pop (i + 2,
26284 (end_reg - (i + 2)) / 2,
26285 stack_pointer_rtx);
26289 /* Restore the remaining regs that we have discovered (or possibly
26290 even all of them, if the conditional in the for loop never
26292 if (end_reg > i + 2)
26293 arm_emit_vfp_multi_reg_pop (i + 2,
26294 (end_reg - (i + 2)) / 2,
26295 stack_pointer_rtx);
26299 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26300 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26303 rtx addr = gen_rtx_MEM (V2SImode,
26304 gen_rtx_POST_INC (SImode,
26305 stack_pointer_rtx));
26306 set_mem_alias_set (addr, get_frame_alias_set ());
26307 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26308 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26309 gen_rtx_REG (V2SImode, i),
26311 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26312 stack_pointer_rtx, stack_pointer_rtx);
26315 if (saved_regs_mask)
26318 bool return_in_pc = false;
26320 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26321 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26322 && !IS_CMSE_ENTRY (func_type)
26323 && !IS_STACKALIGN (func_type)
26325 && crtl->args.pretend_args_size == 0
26326 && saved_regs_mask & (1 << LR_REGNUM)
26327 && !crtl->calls_eh_return)
26329 saved_regs_mask &= ~(1 << LR_REGNUM);
26330 saved_regs_mask |= (1 << PC_REGNUM);
26331 return_in_pc = true;
26334 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26336 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26337 if (saved_regs_mask & (1 << i))
26339 rtx addr = gen_rtx_MEM (SImode,
26340 gen_rtx_POST_INC (SImode,
26341 stack_pointer_rtx));
26342 set_mem_alias_set (addr, get_frame_alias_set ());
26344 if (i == PC_REGNUM)
26346 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26347 XVECEXP (insn, 0, 0) = ret_rtx;
26348 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26350 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26351 insn = emit_jump_insn (insn);
26355 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26357 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26358 gen_rtx_REG (SImode, i),
26360 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26362 stack_pointer_rtx);
26369 && current_tune->prefer_ldrd_strd
26370 && !optimize_function_for_size_p (cfun))
26373 thumb2_emit_ldrd_pop (saved_regs_mask);
26374 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26375 arm_emit_ldrd_pop (saved_regs_mask);
26377 arm_emit_multi_reg_pop (saved_regs_mask);
26380 arm_emit_multi_reg_pop (saved_regs_mask);
26388 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26392 rtx dwarf = NULL_RTX;
26394 emit_insn (gen_addsi3 (stack_pointer_rtx,
26396 GEN_INT (amount)));
26398 RTX_FRAME_RELATED_P (tmp) = 1;
26400 if (cfun->machine->uses_anonymous_args)
26402 /* Restore pretend args. Refer arm_expand_prologue on how to save
26403 pretend_args in stack. */
26404 int num_regs = crtl->args.pretend_args_size / 4;
26405 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26406 for (j = 0, i = 0; j < num_regs; i++)
26407 if (saved_regs_mask & (1 << i))
26409 rtx reg = gen_rtx_REG (SImode, i);
26410 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26413 REG_NOTES (tmp) = dwarf;
26415 arm_add_cfa_adjust_cfa_note (tmp, amount,
26416 stack_pointer_rtx, stack_pointer_rtx);
26419 /* Clear all caller-saved regs that are not used to return. */
26420 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26422 /* CMSE_ENTRY always returns. */
26423 gcc_assert (really_return);
26424 cmse_nonsecure_entry_clear_before_return ();
26427 if (!really_return)
26430 if (crtl->calls_eh_return)
26431 emit_insn (gen_addsi3 (stack_pointer_rtx,
26433 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26435 if (IS_STACKALIGN (func_type))
26436 /* Restore the original stack pointer. Before prologue, the stack was
26437 realigned and the original stack pointer saved in r0. For details,
26438 see comment in arm_expand_prologue. */
26439 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26441 emit_jump_insn (simple_return_rtx);
26444 /* Implementation of insn prologue_thumb1_interwork. This is the first
26445 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26448 thumb1_output_interwork (void)
26451 FILE *f = asm_out_file;
26453 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26454 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26456 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26458 /* Generate code sequence to switch us into Thumb mode. */
26459 /* The .code 32 directive has already been emitted by
26460 ASM_DECLARE_FUNCTION_NAME. */
26461 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26462 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26464 /* Generate a label, so that the debugger will notice the
26465 change in instruction sets. This label is also used by
26466 the assembler to bypass the ARM code when this function
26467 is called from a Thumb encoded function elsewhere in the
26468 same file. Hence the definition of STUB_NAME here must
26469 agree with the definition in gas/config/tc-arm.c. */
26471 #define STUB_NAME ".real_start_of"
26473 fprintf (f, "\t.code\t16\n");
26475 if (arm_dllexport_name_p (name))
26476 name = arm_strip_name_encoding (name);
26478 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26479 fprintf (f, "\t.thumb_func\n");
26480 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26485 /* Handle the case of a double word load into a low register from
26486 a computed memory address. The computed address may involve a
26487 register which is overwritten by the load. */
26489 thumb_load_double_from_address (rtx *operands)
26497 gcc_assert (REG_P (operands[0]));
26498 gcc_assert (MEM_P (operands[1]));
26500 /* Get the memory address. */
26501 addr = XEXP (operands[1], 0);
26503 /* Work out how the memory address is computed. */
26504 switch (GET_CODE (addr))
26507 operands[2] = adjust_address (operands[1], SImode, 4);
26509 if (REGNO (operands[0]) == REGNO (addr))
26511 output_asm_insn ("ldr\t%H0, %2", operands);
26512 output_asm_insn ("ldr\t%0, %1", operands);
26516 output_asm_insn ("ldr\t%0, %1", operands);
26517 output_asm_insn ("ldr\t%H0, %2", operands);
26522 /* Compute <address> + 4 for the high order load. */
26523 operands[2] = adjust_address (operands[1], SImode, 4);
26525 output_asm_insn ("ldr\t%0, %1", operands);
26526 output_asm_insn ("ldr\t%H0, %2", operands);
26530 arg1 = XEXP (addr, 0);
26531 arg2 = XEXP (addr, 1);
26533 if (CONSTANT_P (arg1))
26534 base = arg2, offset = arg1;
26536 base = arg1, offset = arg2;
26538 gcc_assert (REG_P (base));
26540 /* Catch the case of <address> = <reg> + <reg> */
26541 if (REG_P (offset))
26543 int reg_offset = REGNO (offset);
26544 int reg_base = REGNO (base);
26545 int reg_dest = REGNO (operands[0]);
26547 /* Add the base and offset registers together into the
26548 higher destination register. */
26549 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26550 reg_dest + 1, reg_base, reg_offset);
26552 /* Load the lower destination register from the address in
26553 the higher destination register. */
26554 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26555 reg_dest, reg_dest + 1);
26557 /* Load the higher destination register from its own address
26559 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26560 reg_dest + 1, reg_dest + 1);
26564 /* Compute <address> + 4 for the high order load. */
26565 operands[2] = adjust_address (operands[1], SImode, 4);
26567 /* If the computed address is held in the low order register
26568 then load the high order register first, otherwise always
26569 load the low order register first. */
26570 if (REGNO (operands[0]) == REGNO (base))
26572 output_asm_insn ("ldr\t%H0, %2", operands);
26573 output_asm_insn ("ldr\t%0, %1", operands);
26577 output_asm_insn ("ldr\t%0, %1", operands);
26578 output_asm_insn ("ldr\t%H0, %2", operands);
26584 /* With no registers to worry about we can just load the value
26586 operands[2] = adjust_address (operands[1], SImode, 4);
26588 output_asm_insn ("ldr\t%H0, %2", operands);
26589 output_asm_insn ("ldr\t%0, %1", operands);
26593 gcc_unreachable ();
26600 thumb_output_move_mem_multiple (int n, rtx *operands)
26605 if (REGNO (operands[4]) > REGNO (operands[5]))
26606 std::swap (operands[4], operands[5]);
26608 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26609 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26613 if (REGNO (operands[4]) > REGNO (operands[5]))
26614 std::swap (operands[4], operands[5]);
26615 if (REGNO (operands[5]) > REGNO (operands[6]))
26616 std::swap (operands[5], operands[6]);
26617 if (REGNO (operands[4]) > REGNO (operands[5]))
26618 std::swap (operands[4], operands[5]);
26620 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26621 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26625 gcc_unreachable ();
26631 /* Output a call-via instruction for thumb state. */
26633 thumb_call_via_reg (rtx reg)
26635 int regno = REGNO (reg);
26638 gcc_assert (regno < LR_REGNUM);
26640 /* If we are in the normal text section we can use a single instance
26641 per compilation unit. If we are doing function sections, then we need
26642 an entry per section, since we can't rely on reachability. */
26643 if (in_section == text_section)
26645 thumb_call_reg_needed = 1;
26647 if (thumb_call_via_label[regno] == NULL)
26648 thumb_call_via_label[regno] = gen_label_rtx ();
26649 labelp = thumb_call_via_label + regno;
26653 if (cfun->machine->call_via[regno] == NULL)
26654 cfun->machine->call_via[regno] = gen_label_rtx ();
26655 labelp = cfun->machine->call_via + regno;
26658 output_asm_insn ("bl\t%a0", labelp);
26662 /* Routines for generating rtl. */
26664 thumb_expand_cpymemqi (rtx *operands)
26666 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26667 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26668 HOST_WIDE_INT len = INTVAL (operands[2]);
26669 HOST_WIDE_INT offset = 0;
26673 emit_insn (gen_cpymem12b (out, in, out, in));
26679 emit_insn (gen_cpymem8b (out, in, out, in));
26685 rtx reg = gen_reg_rtx (SImode);
26686 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26687 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26694 rtx reg = gen_reg_rtx (HImode);
26695 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26696 plus_constant (Pmode, in,
26698 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26707 rtx reg = gen_reg_rtx (QImode);
26708 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26709 plus_constant (Pmode, in,
26711 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26718 thumb_reload_out_hi (rtx *operands)
26720 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26723 /* Return the length of a function name prefix
26724 that starts with the character 'c'. */
26726 arm_get_strip_length (int c)
26730 ARM_NAME_ENCODING_LENGTHS
26735 /* Return a pointer to a function's name with any
26736 and all prefix encodings stripped from it. */
26738 arm_strip_name_encoding (const char *name)
26742 while ((skip = arm_get_strip_length (* name)))
26748 /* If there is a '*' anywhere in the name's prefix, then
26749 emit the stripped name verbatim, otherwise prepend an
26750 underscore if leading underscores are being used. */
26752 arm_asm_output_labelref (FILE *stream, const char *name)
26757 while ((skip = arm_get_strip_length (* name)))
26759 verbatim |= (*name == '*');
26764 fputs (name, stream);
26766 asm_fprintf (stream, "%U%s", name);
26769 /* This function is used to emit an EABI tag and its associated value.
26770 We emit the numerical value of the tag in case the assembler does not
26771 support textual tags. (Eg gas prior to 2.20). If requested we include
26772 the tag name in a comment so that anyone reading the assembler output
26773 will know which tag is being set.
26775 This function is not static because arm-c.c needs it too. */
26778 arm_emit_eabi_attribute (const char *name, int num, int val)
26780 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26781 if (flag_verbose_asm || flag_debug_asm)
26782 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26783 asm_fprintf (asm_out_file, "\n");
26786 /* This function is used to print CPU tuning information as comment
26787 in assembler file. Pointers are not printed for now. */
26790 arm_print_tune_info (void)
26792 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26793 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26794 current_tune->constant_limit);
26795 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26796 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26797 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26798 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26799 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26800 "prefetch.l1_cache_size:\t%d\n",
26801 current_tune->prefetch.l1_cache_size);
26802 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26803 "prefetch.l1_cache_line_size:\t%d\n",
26804 current_tune->prefetch.l1_cache_line_size);
26805 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26806 "prefer_constant_pool:\t%d\n",
26807 (int) current_tune->prefer_constant_pool);
26808 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26809 "branch_cost:\t(s:speed, p:predictable)\n");
26810 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26811 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26812 current_tune->branch_cost (false, false));
26813 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26814 current_tune->branch_cost (false, true));
26815 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26816 current_tune->branch_cost (true, false));
26817 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26818 current_tune->branch_cost (true, true));
26819 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26820 "prefer_ldrd_strd:\t%d\n",
26821 (int) current_tune->prefer_ldrd_strd);
26822 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26823 "logical_op_non_short_circuit:\t[%d,%d]\n",
26824 (int) current_tune->logical_op_non_short_circuit_thumb,
26825 (int) current_tune->logical_op_non_short_circuit_arm);
26826 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26827 "disparage_flag_setting_t16_encodings:\t%d\n",
26828 (int) current_tune->disparage_flag_setting_t16_encodings);
26829 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26830 "string_ops_prefer_neon:\t%d\n",
26831 (int) current_tune->string_ops_prefer_neon);
26832 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26833 "max_insns_inline_memset:\t%d\n",
26834 current_tune->max_insns_inline_memset);
26835 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26836 current_tune->fusible_ops);
26837 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26838 (int) current_tune->sched_autopref);
26841 /* Print .arch and .arch_extension directives corresponding to the
26842 current architecture configuration. */
26844 arm_print_asm_arch_directives ()
26846 const arch_option *arch
26847 = arm_parse_arch_option_name (all_architectures, "-march",
26848 arm_active_target.arch_name);
26849 auto_sbitmap opt_bits (isa_num_bits);
26853 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26854 arm_last_printed_arch_string = arm_active_target.arch_name;
26855 if (!arch->common.extensions)
26858 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26864 arm_initialize_isa (opt_bits, opt->isa_bits);
26866 /* If every feature bit of this option is set in the target
26867 ISA specification, print out the option name. However,
26868 don't print anything if all the bits are part of the
26869 FPU specification. */
26870 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26871 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26872 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26878 arm_file_start (void)
26884 /* We don't have a specified CPU. Use the architecture to
26887 Note: it might be better to do this unconditionally, then the
26888 assembler would not need to know about all new CPU names as
26890 if (!arm_active_target.core_name)
26892 /* armv7ve doesn't support any extensions. */
26893 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26895 /* Keep backward compatability for assemblers
26896 which don't support armv7ve. */
26897 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26898 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26899 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26900 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26901 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26902 arm_last_printed_arch_string = "armv7ve";
26905 arm_print_asm_arch_directives ();
26907 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26909 asm_fprintf (asm_out_file, "\t.arch %s\n",
26910 arm_active_target.core_name + 8);
26911 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26915 const char* truncated_name
26916 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26917 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26920 if (print_tune_info)
26921 arm_print_tune_info ();
26923 if (! TARGET_SOFT_FLOAT)
26925 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26926 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26928 if (TARGET_HARD_FLOAT_ABI)
26929 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26932 /* Some of these attributes only apply when the corresponding features
26933 are used. However we don't have any easy way of figuring this out.
26934 Conservatively record the setting that would have been used. */
26936 if (flag_rounding_math)
26937 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26939 if (!flag_unsafe_math_optimizations)
26941 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26942 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26944 if (flag_signaling_nans)
26945 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26947 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26948 flag_finite_math_only ? 1 : 3);
26950 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26951 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26952 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26953 flag_short_enums ? 1 : 2);
26955 /* Tag_ABI_optimization_goals. */
26958 else if (optimize >= 2)
26964 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26966 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26969 if (arm_fp16_format)
26970 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26971 (int) arm_fp16_format);
26973 if (arm_lang_output_object_attributes_hook)
26974 arm_lang_output_object_attributes_hook();
26977 default_file_start ();
26981 arm_file_end (void)
26985 if (NEED_INDICATE_EXEC_STACK)
26986 /* Add .note.GNU-stack. */
26987 file_end_indicate_exec_stack ();
26989 if (! thumb_call_reg_needed)
26992 switch_to_section (text_section);
26993 asm_fprintf (asm_out_file, "\t.code 16\n");
26994 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26996 for (regno = 0; regno < LR_REGNUM; regno++)
26998 rtx label = thumb_call_via_label[regno];
27002 targetm.asm_out.internal_label (asm_out_file, "L",
27003 CODE_LABEL_NUMBER (label));
27004 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27010 /* Symbols in the text segment can be accessed without indirecting via the
27011 constant pool; it may take an extra binary operation, but this is still
27012 faster than indirecting via memory. Don't do this when not optimizing,
27013 since we won't be calculating al of the offsets necessary to do this
27017 arm_encode_section_info (tree decl, rtx rtl, int first)
27019 if (optimize > 0 && TREE_CONSTANT (decl))
27020 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27022 default_encode_section_info (decl, rtl, first);
27024 #endif /* !ARM_PE */
27027 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27029 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27030 && !strcmp (prefix, "L"))
27032 arm_ccfsm_state = 0;
27033 arm_target_insn = NULL;
27035 default_internal_label (stream, prefix, labelno);
27038 /* Output code to add DELTA to the first argument, and then jump
27039 to FUNCTION. Used for C++ multiple inheritance. */
27042 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27043 HOST_WIDE_INT, tree function)
27045 static int thunk_label = 0;
27048 int mi_delta = delta;
27049 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27051 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27054 mi_delta = - mi_delta;
27056 final_start_function (emit_barrier (), file, 1);
27060 int labelno = thunk_label++;
27061 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27062 /* Thunks are entered in arm mode when available. */
27063 if (TARGET_THUMB1_ONLY)
27065 /* push r3 so we can use it as a temporary. */
27066 /* TODO: Omit this save if r3 is not used. */
27067 fputs ("\tpush {r3}\n", file);
27068 fputs ("\tldr\tr3, ", file);
27072 fputs ("\tldr\tr12, ", file);
27074 assemble_name (file, label);
27075 fputc ('\n', file);
27078 /* If we are generating PIC, the ldr instruction below loads
27079 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27080 the address of the add + 8, so we have:
27082 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27085 Note that we have "+ 1" because some versions of GNU ld
27086 don't set the low bit of the result for R_ARM_REL32
27087 relocations against thumb function symbols.
27088 On ARMv6M this is +4, not +8. */
27089 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27090 assemble_name (file, labelpc);
27091 fputs (":\n", file);
27092 if (TARGET_THUMB1_ONLY)
27094 /* This is 2 insns after the start of the thunk, so we know it
27095 is 4-byte aligned. */
27096 fputs ("\tadd\tr3, pc, r3\n", file);
27097 fputs ("\tmov r12, r3\n", file);
27100 fputs ("\tadd\tr12, pc, r12\n", file);
27102 else if (TARGET_THUMB1_ONLY)
27103 fputs ("\tmov r12, r3\n", file);
27105 if (TARGET_THUMB1_ONLY)
27107 if (mi_delta > 255)
27109 fputs ("\tldr\tr3, ", file);
27110 assemble_name (file, label);
27111 fputs ("+4\n", file);
27112 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
27113 mi_op, this_regno, this_regno);
27115 else if (mi_delta != 0)
27117 /* Thumb1 unified syntax requires s suffix in instruction name when
27118 one of the operands is immediate. */
27119 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
27120 mi_op, this_regno, this_regno,
27126 /* TODO: Use movw/movt for large constants when available. */
27127 while (mi_delta != 0)
27129 if ((mi_delta & (3 << shift)) == 0)
27133 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27134 mi_op, this_regno, this_regno,
27135 mi_delta & (0xff << shift));
27136 mi_delta &= ~(0xff << shift);
27143 if (TARGET_THUMB1_ONLY)
27144 fputs ("\tpop\t{r3}\n", file);
27146 fprintf (file, "\tbx\tr12\n");
27147 ASM_OUTPUT_ALIGN (file, 2);
27148 assemble_name (file, label);
27149 fputs (":\n", file);
27152 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
27153 rtx tem = XEXP (DECL_RTL (function), 0);
27154 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27155 pipeline offset is four rather than eight. Adjust the offset
27157 tem = plus_constant (GET_MODE (tem), tem,
27158 TARGET_THUMB1_ONLY ? -3 : -7);
27159 tem = gen_rtx_MINUS (GET_MODE (tem),
27161 gen_rtx_SYMBOL_REF (Pmode,
27162 ggc_strdup (labelpc)));
27163 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27166 /* Output ".word .LTHUNKn". */
27167 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27169 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27170 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27174 fputs ("\tb\t", file);
27175 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27176 if (NEED_PLT_RELOC)
27177 fputs ("(PLT)", file);
27178 fputc ('\n', file);
27181 final_end_function ();
27184 /* MI thunk handling for TARGET_32BIT. */
27187 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27188 HOST_WIDE_INT vcall_offset, tree function)
27190 const bool long_call_p = arm_is_long_call_p (function);
27192 /* On ARM, this_regno is R0 or R1 depending on
27193 whether the function returns an aggregate or not.
27195 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
27197 ? R1_REGNUM : R0_REGNUM);
27199 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
27200 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
27201 reload_completed = 1;
27202 emit_note (NOTE_INSN_PROLOGUE_END);
27204 /* Add DELTA to THIS_RTX. */
27206 arm_split_constant (PLUS, Pmode, NULL_RTX,
27207 delta, this_rtx, this_rtx, false);
27209 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
27210 if (vcall_offset != 0)
27212 /* Load *THIS_RTX. */
27213 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
27214 /* Compute *THIS_RTX + VCALL_OFFSET. */
27215 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
27217 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
27218 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
27219 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
27222 /* Generate a tail call to the target function. */
27223 if (!TREE_USED (function))
27225 assemble_external (function);
27226 TREE_USED (function) = 1;
27228 rtx funexp = XEXP (DECL_RTL (function), 0);
27231 emit_move_insn (temp, funexp);
27234 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27235 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
27236 SIBLING_CALL_P (insn) = 1;
27239 /* Indirect calls require a bit of fixup in PIC mode. */
27242 split_all_insns_noflow ();
27246 insn = get_insns ();
27247 shorten_branches (insn);
27248 final_start_function (insn, file, 1);
27249 final (insn, file, 1);
27250 final_end_function ();
27252 /* Stop pretending this is a post-reload pass. */
27253 reload_completed = 0;
27256 /* Output code to add DELTA to the first argument, and then jump
27257 to FUNCTION. Used for C++ multiple inheritance. */
27260 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
27261 HOST_WIDE_INT vcall_offset, tree function)
27263 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
27265 assemble_start_function (thunk, fnname);
27267 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
27269 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
27270 assemble_end_function (thunk, fnname);
27274 arm_emit_vector_const (FILE *file, rtx x)
27277 const char * pattern;
27279 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27281 switch (GET_MODE (x))
27283 case E_V2SImode: pattern = "%08x"; break;
27284 case E_V4HImode: pattern = "%04x"; break;
27285 case E_V8QImode: pattern = "%02x"; break;
27286 default: gcc_unreachable ();
27289 fprintf (file, "0x");
27290 for (i = CONST_VECTOR_NUNITS (x); i--;)
27294 element = CONST_VECTOR_ELT (x, i);
27295 fprintf (file, pattern, INTVAL (element));
27301 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27302 HFmode constant pool entries are actually loaded with ldr. */
27304 arm_emit_fp16_const (rtx c)
27308 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27309 if (WORDS_BIG_ENDIAN)
27310 assemble_zeros (2);
27311 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27312 if (!WORDS_BIG_ENDIAN)
27313 assemble_zeros (2);
27317 arm_output_load_gr (rtx *operands)
27324 if (!MEM_P (operands [1])
27325 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27326 || !REG_P (reg = XEXP (sum, 0))
27327 || !CONST_INT_P (offset = XEXP (sum, 1))
27328 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27329 return "wldrw%?\t%0, %1";
27331 /* Fix up an out-of-range load of a GR register. */
27332 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27333 wcgr = operands[0];
27335 output_asm_insn ("ldr%?\t%0, %1", operands);
27337 operands[0] = wcgr;
27339 output_asm_insn ("tmcr%?\t%0, %1", operands);
27340 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27345 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27347 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27348 named arg and all anonymous args onto the stack.
27349 XXX I know the prologue shouldn't be pushing registers, but it is faster
27353 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27354 const function_arg_info &arg,
27356 int second_time ATTRIBUTE_UNUSED)
27358 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27361 cfun->machine->uses_anonymous_args = 1;
27362 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27364 nregs = pcum->aapcs_ncrn;
27367 int res = arm_needs_doubleword_align (arg.mode, arg.type);
27368 if (res < 0 && warn_psabi)
27369 inform (input_location, "parameter passing for argument of "
27370 "type %qT changed in GCC 7.1", arg.type);
27374 if (res > 1 && warn_psabi)
27375 inform (input_location,
27376 "parameter passing for argument of type "
27377 "%qT changed in GCC 9.1", arg.type);
27382 nregs = pcum->nregs;
27384 if (nregs < NUM_ARG_REGS)
27385 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27388 /* We can't rely on the caller doing the proper promotion when
27389 using APCS or ATPCS. */
27392 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27394 return !TARGET_AAPCS_BASED;
27397 static machine_mode
27398 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27400 int *punsignedp ATTRIBUTE_UNUSED,
27401 const_tree fntype ATTRIBUTE_UNUSED,
27402 int for_return ATTRIBUTE_UNUSED)
27404 if (GET_MODE_CLASS (mode) == MODE_INT
27405 && GET_MODE_SIZE (mode) < 4)
27413 arm_default_short_enums (void)
27415 return ARM_DEFAULT_SHORT_ENUMS;
27419 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27422 arm_align_anon_bitfield (void)
27424 return TARGET_AAPCS_BASED;
27428 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27431 arm_cxx_guard_type (void)
27433 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27437 /* The EABI says test the least significant bit of a guard variable. */
27440 arm_cxx_guard_mask_bit (void)
27442 return TARGET_AAPCS_BASED;
27446 /* The EABI specifies that all array cookies are 8 bytes long. */
27449 arm_get_cookie_size (tree type)
27453 if (!TARGET_AAPCS_BASED)
27454 return default_cxx_get_cookie_size (type);
27456 size = build_int_cst (sizetype, 8);
27461 /* The EABI says that array cookies should also contain the element size. */
27464 arm_cookie_has_size (void)
27466 return TARGET_AAPCS_BASED;
27470 /* The EABI says constructors and destructors should return a pointer to
27471 the object constructed/destroyed. */
27474 arm_cxx_cdtor_returns_this (void)
27476 return TARGET_AAPCS_BASED;
27479 /* The EABI says that an inline function may never be the key
27483 arm_cxx_key_method_may_be_inline (void)
27485 return !TARGET_AAPCS_BASED;
27489 arm_cxx_determine_class_data_visibility (tree decl)
27491 if (!TARGET_AAPCS_BASED
27492 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27495 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27496 is exported. However, on systems without dynamic vague linkage,
27497 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27498 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27499 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27501 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27502 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27506 arm_cxx_class_data_always_comdat (void)
27508 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27509 vague linkage if the class has no key function. */
27510 return !TARGET_AAPCS_BASED;
27514 /* The EABI says __aeabi_atexit should be used to register static
27518 arm_cxx_use_aeabi_atexit (void)
27520 return TARGET_AAPCS_BASED;
27525 arm_set_return_address (rtx source, rtx scratch)
27527 arm_stack_offsets *offsets;
27528 HOST_WIDE_INT delta;
27530 unsigned long saved_regs;
27532 offsets = arm_get_frame_offsets ();
27533 saved_regs = offsets->saved_regs_mask;
27535 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27536 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27539 if (frame_pointer_needed)
27540 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27543 /* LR will be the first saved register. */
27544 delta = offsets->outgoing_args - (offsets->frame + 4);
27549 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27550 GEN_INT (delta & ~4095)));
27555 addr = stack_pointer_rtx;
27557 addr = plus_constant (Pmode, addr, delta);
27560 /* The store needs to be marked to prevent DSE from deleting
27561 it as dead if it is based on fp. */
27562 mem = gen_frame_mem (Pmode, addr);
27563 MEM_VOLATILE_P (mem) = true;
27564 emit_move_insn (mem, source);
27570 thumb_set_return_address (rtx source, rtx scratch)
27572 arm_stack_offsets *offsets;
27573 HOST_WIDE_INT delta;
27574 HOST_WIDE_INT limit;
27577 unsigned long mask;
27581 offsets = arm_get_frame_offsets ();
27582 mask = offsets->saved_regs_mask;
27583 if (mask & (1 << LR_REGNUM))
27586 /* Find the saved regs. */
27587 if (frame_pointer_needed)
27589 delta = offsets->soft_frame - offsets->saved_args;
27590 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27596 delta = offsets->outgoing_args - offsets->saved_args;
27599 /* Allow for the stack frame. */
27600 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27602 /* The link register is always the first saved register. */
27605 /* Construct the address. */
27606 addr = gen_rtx_REG (SImode, reg);
27609 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27610 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27614 addr = plus_constant (Pmode, addr, delta);
27616 /* The store needs to be marked to prevent DSE from deleting
27617 it as dead if it is based on fp. */
27618 mem = gen_frame_mem (Pmode, addr);
27619 MEM_VOLATILE_P (mem) = true;
27620 emit_move_insn (mem, source);
27623 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27626 /* Implements target hook vector_mode_supported_p. */
27628 arm_vector_mode_supported_p (machine_mode mode)
27630 /* Neon also supports V2SImode, etc. listed in the clause below. */
27631 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27632 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27633 || mode == V2DImode || mode == V8HFmode))
27636 if ((TARGET_NEON || TARGET_IWMMXT)
27637 && ((mode == V2SImode)
27638 || (mode == V4HImode)
27639 || (mode == V8QImode)))
27642 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27643 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27644 || mode == V2HAmode))
27650 /* Implements target hook array_mode_supported_p. */
27653 arm_array_mode_supported_p (machine_mode mode,
27654 unsigned HOST_WIDE_INT nelems)
27656 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27657 for now, as the lane-swapping logic needs to be extended in the expanders.
27658 See PR target/82518. */
27659 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27660 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27661 && (nelems >= 2 && nelems <= 4))
27667 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27668 registers when autovectorizing for Neon, at least until multiple vector
27669 widths are supported properly by the middle-end. */
27671 static machine_mode
27672 arm_preferred_simd_mode (scalar_mode mode)
27678 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27680 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27682 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27684 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27686 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27693 if (TARGET_REALLY_IWMMXT)
27709 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27711 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27712 using r0-r4 for function arguments, r7 for the stack frame and don't have
27713 enough left over to do doubleword arithmetic. For Thumb-2 all the
27714 potentially problematic instructions accept high registers so this is not
27715 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27716 that require many low registers. */
27718 arm_class_likely_spilled_p (reg_class_t rclass)
27720 if ((TARGET_THUMB1 && rclass == LO_REGS)
27721 || rclass == CC_REG)
27727 /* Implements target hook small_register_classes_for_mode_p. */
27729 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27731 return TARGET_THUMB1;
27734 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27735 ARM insns and therefore guarantee that the shift count is modulo 256.
27736 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27737 guarantee no particular behavior for out-of-range counts. */
27739 static unsigned HOST_WIDE_INT
27740 arm_shift_truncation_mask (machine_mode mode)
27742 return mode == SImode ? 255 : 0;
27746 /* Map internal gcc register numbers to DWARF2 register numbers. */
27749 arm_dbx_register_number (unsigned int regno)
27754 if (IS_VFP_REGNUM (regno))
27756 /* See comment in arm_dwarf_register_span. */
27757 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27758 return 64 + regno - FIRST_VFP_REGNUM;
27760 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27763 if (IS_IWMMXT_GR_REGNUM (regno))
27764 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27766 if (IS_IWMMXT_REGNUM (regno))
27767 return 112 + regno - FIRST_IWMMXT_REGNUM;
27769 return DWARF_FRAME_REGISTERS;
27772 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27773 GCC models tham as 64 32-bit registers, so we need to describe this to
27774 the DWARF generation code. Other registers can use the default. */
27776 arm_dwarf_register_span (rtx rtl)
27784 regno = REGNO (rtl);
27785 if (!IS_VFP_REGNUM (regno))
27788 /* XXX FIXME: The EABI defines two VFP register ranges:
27789 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27791 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27792 corresponding D register. Until GDB supports this, we shall use the
27793 legacy encodings. We also use these encodings for D0-D15 for
27794 compatibility with older debuggers. */
27795 mode = GET_MODE (rtl);
27796 if (GET_MODE_SIZE (mode) < 8)
27799 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27801 nregs = GET_MODE_SIZE (mode) / 4;
27802 for (i = 0; i < nregs; i += 2)
27803 if (TARGET_BIG_END)
27805 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27806 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27810 parts[i] = gen_rtx_REG (SImode, regno + i);
27811 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27816 nregs = GET_MODE_SIZE (mode) / 8;
27817 for (i = 0; i < nregs; i++)
27818 parts[i] = gen_rtx_REG (DImode, regno + i);
27821 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27824 #if ARM_UNWIND_INFO
27825 /* Emit unwind directives for a store-multiple instruction or stack pointer
27826 push during alignment.
27827 These should only ever be generated by the function prologue code, so
27828 expect them to have a particular form.
27829 The store-multiple instruction sometimes pushes pc as the last register,
27830 although it should not be tracked into unwind information, or for -Os
27831 sometimes pushes some dummy registers before first register that needs
27832 to be tracked in unwind information; such dummy registers are there just
27833 to avoid separate stack adjustment, and will not be restored in the
27837 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27840 HOST_WIDE_INT offset;
27841 HOST_WIDE_INT nregs;
27845 unsigned padfirst = 0, padlast = 0;
27848 e = XVECEXP (p, 0, 0);
27849 gcc_assert (GET_CODE (e) == SET);
27851 /* First insn will adjust the stack pointer. */
27852 gcc_assert (GET_CODE (e) == SET
27853 && REG_P (SET_DEST (e))
27854 && REGNO (SET_DEST (e)) == SP_REGNUM
27855 && GET_CODE (SET_SRC (e)) == PLUS);
27857 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27858 nregs = XVECLEN (p, 0) - 1;
27859 gcc_assert (nregs);
27861 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27864 /* For -Os dummy registers can be pushed at the beginning to
27865 avoid separate stack pointer adjustment. */
27866 e = XVECEXP (p, 0, 1);
27867 e = XEXP (SET_DEST (e), 0);
27868 if (GET_CODE (e) == PLUS)
27869 padfirst = INTVAL (XEXP (e, 1));
27870 gcc_assert (padfirst == 0 || optimize_size);
27871 /* The function prologue may also push pc, but not annotate it as it is
27872 never restored. We turn this into a stack pointer adjustment. */
27873 e = XVECEXP (p, 0, nregs);
27874 e = XEXP (SET_DEST (e), 0);
27875 if (GET_CODE (e) == PLUS)
27876 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27878 padlast = offset - 4;
27879 gcc_assert (padlast == 0 || padlast == 4);
27881 fprintf (asm_out_file, "\t.pad #4\n");
27883 fprintf (asm_out_file, "\t.save {");
27885 else if (IS_VFP_REGNUM (reg))
27888 fprintf (asm_out_file, "\t.vsave {");
27891 /* Unknown register type. */
27892 gcc_unreachable ();
27894 /* If the stack increment doesn't match the size of the saved registers,
27895 something has gone horribly wrong. */
27896 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27900 /* The remaining insns will describe the stores. */
27901 for (i = 1; i <= nregs; i++)
27903 /* Expect (set (mem <addr>) (reg)).
27904 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27905 e = XVECEXP (p, 0, i);
27906 gcc_assert (GET_CODE (e) == SET
27907 && MEM_P (SET_DEST (e))
27908 && REG_P (SET_SRC (e)));
27910 reg = REGNO (SET_SRC (e));
27911 gcc_assert (reg >= lastreg);
27914 fprintf (asm_out_file, ", ");
27915 /* We can't use %r for vfp because we need to use the
27916 double precision register names. */
27917 if (IS_VFP_REGNUM (reg))
27918 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27920 asm_fprintf (asm_out_file, "%r", reg);
27924 /* Check that the addresses are consecutive. */
27925 e = XEXP (SET_DEST (e), 0);
27926 if (GET_CODE (e) == PLUS)
27927 gcc_assert (REG_P (XEXP (e, 0))
27928 && REGNO (XEXP (e, 0)) == SP_REGNUM
27929 && CONST_INT_P (XEXP (e, 1))
27930 && offset == INTVAL (XEXP (e, 1)));
27934 && REGNO (e) == SP_REGNUM);
27935 offset += reg_size;
27938 fprintf (asm_out_file, "}\n");
27940 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27943 /* Emit unwind directives for a SET. */
27946 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27954 switch (GET_CODE (e0))
27957 /* Pushing a single register. */
27958 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27959 || !REG_P (XEXP (XEXP (e0, 0), 0))
27960 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27963 asm_fprintf (asm_out_file, "\t.save ");
27964 if (IS_VFP_REGNUM (REGNO (e1)))
27965 asm_fprintf(asm_out_file, "{d%d}\n",
27966 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27968 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27972 if (REGNO (e0) == SP_REGNUM)
27974 /* A stack increment. */
27975 if (GET_CODE (e1) != PLUS
27976 || !REG_P (XEXP (e1, 0))
27977 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27978 || !CONST_INT_P (XEXP (e1, 1)))
27981 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27982 -INTVAL (XEXP (e1, 1)));
27984 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27986 HOST_WIDE_INT offset;
27988 if (GET_CODE (e1) == PLUS)
27990 if (!REG_P (XEXP (e1, 0))
27991 || !CONST_INT_P (XEXP (e1, 1)))
27993 reg = REGNO (XEXP (e1, 0));
27994 offset = INTVAL (XEXP (e1, 1));
27995 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27996 HARD_FRAME_POINTER_REGNUM, reg,
27999 else if (REG_P (e1))
28002 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28003 HARD_FRAME_POINTER_REGNUM, reg);
28008 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28010 /* Move from sp to reg. */
28011 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28013 else if (GET_CODE (e1) == PLUS
28014 && REG_P (XEXP (e1, 0))
28015 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28016 && CONST_INT_P (XEXP (e1, 1)))
28018 /* Set reg to offset from sp. */
28019 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28020 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28032 /* Emit unwind directives for the given insn. */
28035 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
28038 bool handled_one = false;
28040 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28043 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28044 && (TREE_NOTHROW (current_function_decl)
28045 || crtl->all_throwers_are_sibcalls))
28048 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28051 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28053 switch (REG_NOTE_KIND (note))
28055 case REG_FRAME_RELATED_EXPR:
28056 pat = XEXP (note, 0);
28059 case REG_CFA_REGISTER:
28060 pat = XEXP (note, 0);
28063 pat = PATTERN (insn);
28064 if (GET_CODE (pat) == PARALLEL)
28065 pat = XVECEXP (pat, 0, 0);
28068 /* Only emitted for IS_STACKALIGN re-alignment. */
28073 src = SET_SRC (pat);
28074 dest = SET_DEST (pat);
28076 gcc_assert (src == stack_pointer_rtx);
28077 reg = REGNO (dest);
28078 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28081 handled_one = true;
28084 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28085 to get correct dwarf information for shrink-wrap. We should not
28086 emit unwind information for it because these are used either for
28087 pretend arguments or notes to adjust sp and restore registers from
28089 case REG_CFA_DEF_CFA:
28090 case REG_CFA_ADJUST_CFA:
28091 case REG_CFA_RESTORE:
28094 case REG_CFA_EXPRESSION:
28095 case REG_CFA_OFFSET:
28096 /* ??? Only handling here what we actually emit. */
28097 gcc_unreachable ();
28105 pat = PATTERN (insn);
28108 switch (GET_CODE (pat))
28111 arm_unwind_emit_set (asm_out_file, pat);
28115 /* Store multiple. */
28116 arm_unwind_emit_sequence (asm_out_file, pat);
28125 /* Output a reference from a function exception table to the type_info
28126 object X. The EABI specifies that the symbol should be relocated by
28127 an R_ARM_TARGET2 relocation. */
28130 arm_output_ttype (rtx x)
28132 fputs ("\t.word\t", asm_out_file);
28133 output_addr_const (asm_out_file, x);
28134 /* Use special relocations for symbol references. */
28135 if (!CONST_INT_P (x))
28136 fputs ("(TARGET2)", asm_out_file);
28137 fputc ('\n', asm_out_file);
28142 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28145 arm_asm_emit_except_personality (rtx personality)
28147 fputs ("\t.personality\t", asm_out_file);
28148 output_addr_const (asm_out_file, personality);
28149 fputc ('\n', asm_out_file);
28151 #endif /* ARM_UNWIND_INFO */
28153 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28156 arm_asm_init_sections (void)
28158 #if ARM_UNWIND_INFO
28159 exception_section = get_unnamed_section (0, output_section_asm_op,
28161 #endif /* ARM_UNWIND_INFO */
28163 #ifdef OBJECT_FORMAT_ELF
28164 if (target_pure_code)
28165 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
28169 /* Output unwind directives for the start/end of a function. */
28172 arm_output_fn_unwind (FILE * f, bool prologue)
28174 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28178 fputs ("\t.fnstart\n", f);
28181 /* If this function will never be unwound, then mark it as such.
28182 The came condition is used in arm_unwind_emit to suppress
28183 the frame annotations. */
28184 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28185 && (TREE_NOTHROW (current_function_decl)
28186 || crtl->all_throwers_are_sibcalls))
28187 fputs("\t.cantunwind\n", f);
28189 fputs ("\t.fnend\n", f);
28194 arm_emit_tls_decoration (FILE *fp, rtx x)
28196 enum tls_reloc reloc;
28199 val = XVECEXP (x, 0, 0);
28200 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28202 output_addr_const (fp, val);
28207 fputs ("(tlsgd)", fp);
28209 case TLS_GD32_FDPIC:
28210 fputs ("(tlsgd_fdpic)", fp);
28213 fputs ("(tlsldm)", fp);
28215 case TLS_LDM32_FDPIC:
28216 fputs ("(tlsldm_fdpic)", fp);
28219 fputs ("(tlsldo)", fp);
28222 fputs ("(gottpoff)", fp);
28224 case TLS_IE32_FDPIC:
28225 fputs ("(gottpoff_fdpic)", fp);
28228 fputs ("(tpoff)", fp);
28231 fputs ("(tlsdesc)", fp);
28234 gcc_unreachable ();
28243 fputs (" + (. - ", fp);
28244 output_addr_const (fp, XVECEXP (x, 0, 2));
28245 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28246 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28247 output_addr_const (fp, XVECEXP (x, 0, 3));
28257 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28260 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28262 gcc_assert (size == 4);
28263 fputs ("\t.word\t", file);
28264 output_addr_const (file, x);
28265 fputs ("(tlsldo)", file);
28268 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28271 arm_output_addr_const_extra (FILE *fp, rtx x)
28273 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28274 return arm_emit_tls_decoration (fp, x);
28275 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28278 int labelno = INTVAL (XVECEXP (x, 0, 0));
28280 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28281 assemble_name_raw (fp, label);
28285 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28287 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28291 output_addr_const (fp, XVECEXP (x, 0, 0));
28295 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28297 output_addr_const (fp, XVECEXP (x, 0, 0));
28301 output_addr_const (fp, XVECEXP (x, 0, 1));
28305 else if (GET_CODE (x) == CONST_VECTOR)
28306 return arm_emit_vector_const (fp, x);
28311 /* Output assembly for a shift instruction.
28312 SET_FLAGS determines how the instruction modifies the condition codes.
28313 0 - Do not set condition codes.
28314 1 - Set condition codes.
28315 2 - Use smallest instruction. */
28317 arm_output_shift(rtx * operands, int set_flags)
28320 static const char flag_chars[3] = {'?', '.', '!'};
28325 c = flag_chars[set_flags];
28326 shift = shift_op(operands[3], &val);
28330 operands[2] = GEN_INT(val);
28331 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28334 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28336 output_asm_insn (pattern, operands);
28340 /* Output assembly for a WMMX immediate shift instruction. */
28342 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28344 int shift = INTVAL (operands[2]);
28346 machine_mode opmode = GET_MODE (operands[0]);
28348 gcc_assert (shift >= 0);
28350 /* If the shift value in the register versions is > 63 (for D qualifier),
28351 31 (for W qualifier) or 15 (for H qualifier). */
28352 if (((opmode == V4HImode) && (shift > 15))
28353 || ((opmode == V2SImode) && (shift > 31))
28354 || ((opmode == DImode) && (shift > 63)))
28358 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28359 output_asm_insn (templ, operands);
28360 if (opmode == DImode)
28362 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28363 output_asm_insn (templ, operands);
28368 /* The destination register will contain all zeros. */
28369 sprintf (templ, "wzero\t%%0");
28370 output_asm_insn (templ, operands);
28375 if ((opmode == DImode) && (shift > 32))
28377 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28378 output_asm_insn (templ, operands);
28379 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28380 output_asm_insn (templ, operands);
28384 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28385 output_asm_insn (templ, operands);
28390 /* Output assembly for a WMMX tinsr instruction. */
28392 arm_output_iwmmxt_tinsr (rtx *operands)
28394 int mask = INTVAL (operands[3]);
28397 int units = mode_nunits[GET_MODE (operands[0])];
28398 gcc_assert ((mask & (mask - 1)) == 0);
28399 for (i = 0; i < units; ++i)
28401 if ((mask & 0x01) == 1)
28407 gcc_assert (i < units);
28409 switch (GET_MODE (operands[0]))
28412 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28415 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28418 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28421 gcc_unreachable ();
28424 output_asm_insn (templ, operands);
28429 /* Output a Thumb-1 casesi dispatch sequence. */
28431 thumb1_output_casesi (rtx *operands)
28433 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28435 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28437 switch (GET_MODE(diff_vec))
28440 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28441 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28443 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28444 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28446 return "bl\t%___gnu_thumb1_case_si";
28448 gcc_unreachable ();
28452 /* Output a Thumb-2 casesi instruction. */
28454 thumb2_output_casesi (rtx *operands)
28456 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28458 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28460 output_asm_insn ("cmp\t%0, %1", operands);
28461 output_asm_insn ("bhi\t%l3", operands);
28462 switch (GET_MODE(diff_vec))
28465 return "tbb\t[%|pc, %0]";
28467 return "tbh\t[%|pc, %0, lsl #1]";
28471 output_asm_insn ("adr\t%4, %l2", operands);
28472 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28473 output_asm_insn ("add\t%4, %4, %5", operands);
28478 output_asm_insn ("adr\t%4, %l2", operands);
28479 return "ldr\t%|pc, [%4, %0, lsl #2]";
28482 gcc_unreachable ();
28486 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28487 per-core tuning structs. */
28489 arm_issue_rate (void)
28491 return current_tune->issue_rate;
28494 /* Return how many instructions should scheduler lookahead to choose the
28497 arm_first_cycle_multipass_dfa_lookahead (void)
28499 int issue_rate = arm_issue_rate ();
28501 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28504 /* Enable modeling of L2 auto-prefetcher. */
28506 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28508 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28512 arm_mangle_type (const_tree type)
28514 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28515 has to be managled as if it is in the "std" namespace. */
28516 if (TARGET_AAPCS_BASED
28517 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28518 return "St9__va_list";
28520 /* Half-precision float. */
28521 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28524 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28526 if (TYPE_NAME (type) != NULL)
28527 return arm_mangle_builtin_type (type);
28529 /* Use the default mangling. */
28533 /* Order of allocation of core registers for Thumb: this allocation is
28534 written over the corresponding initial entries of the array
28535 initialized with REG_ALLOC_ORDER. We allocate all low registers
28536 first. Saving and restoring a low register is usually cheaper than
28537 using a call-clobbered high register. */
28539 static const int thumb_core_reg_alloc_order[] =
28541 3, 2, 1, 0, 4, 5, 6, 7,
28542 12, 14, 8, 9, 10, 11
28545 /* Adjust register allocation order when compiling for Thumb. */
28548 arm_order_regs_for_local_alloc (void)
28550 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28551 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28553 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28554 sizeof (thumb_core_reg_alloc_order));
28557 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28560 arm_frame_pointer_required (void)
28562 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28565 /* If the function receives nonlocal gotos, it needs to save the frame
28566 pointer in the nonlocal_goto_save_area object. */
28567 if (cfun->has_nonlocal_label)
28570 /* The frame pointer is required for non-leaf APCS frames. */
28571 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28574 /* If we are probing the stack in the prologue, we will have a faulting
28575 instruction prior to the stack adjustment and this requires a frame
28576 pointer if we want to catch the exception using the EABI unwinder. */
28577 if (!IS_INTERRUPT (arm_current_func_type ())
28578 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28579 || flag_stack_clash_protection)
28580 && arm_except_unwind_info (&global_options) == UI_TARGET
28581 && cfun->can_throw_non_call_exceptions)
28583 HOST_WIDE_INT size = get_frame_size ();
28585 /* That's irrelevant if there is no stack adjustment. */
28589 /* That's relevant only if there is a stack probe. */
28590 if (crtl->is_leaf && !cfun->calls_alloca)
28592 /* We don't have the final size of the frame so adjust. */
28593 size += 32 * UNITS_PER_WORD;
28594 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28604 /* Only thumb1 can't support conditional execution, so return true if
28605 the target is not thumb1. */
28607 arm_have_conditional_execution (void)
28609 return !TARGET_THUMB1;
28612 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28613 static HOST_WIDE_INT
28614 arm_vector_alignment (const_tree type)
28616 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28618 if (TARGET_AAPCS_BASED)
28619 align = MIN (align, 64);
28625 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
28627 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28629 sizes->safe_push (16);
28630 sizes->safe_push (8);
28635 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28637 /* Vectors which aren't in packed structures will not be less aligned than
28638 the natural alignment of their element type, so this is safe. */
28639 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28642 return default_builtin_vector_alignment_reachable (type, is_packed);
28646 arm_builtin_support_vector_misalignment (machine_mode mode,
28647 const_tree type, int misalignment,
28650 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28652 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28657 /* If the misalignment is unknown, we should be able to handle the access
28658 so long as it is not to a member of a packed data structure. */
28659 if (misalignment == -1)
28662 /* Return true if the misalignment is a multiple of the natural alignment
28663 of the vector's element type. This is probably always going to be
28664 true in practice, since we've already established that this isn't a
28666 return ((misalignment % align) == 0);
28669 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28674 arm_conditional_register_usage (void)
28678 if (TARGET_THUMB1 && optimize_size)
28680 /* When optimizing for size on Thumb-1, it's better not
28681 to use the HI regs, because of the overhead of
28683 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28684 fixed_regs[regno] = call_used_regs[regno] = 1;
28687 /* The link register can be clobbered by any branch insn,
28688 but we have no way to track that at present, so mark
28689 it as unavailable. */
28691 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28693 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28695 /* VFPv3 registers are disabled when earlier VFP
28696 versions are selected due to the definition of
28697 LAST_VFP_REGNUM. */
28698 for (regno = FIRST_VFP_REGNUM;
28699 regno <= LAST_VFP_REGNUM; ++ regno)
28701 fixed_regs[regno] = 0;
28702 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28703 || regno >= FIRST_VFP_REGNUM + 32;
28707 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
28709 regno = FIRST_IWMMXT_GR_REGNUM;
28710 /* The 2002/10/09 revision of the XScale ABI has wCG0
28711 and wCG1 as call-preserved registers. The 2002/11/21
28712 revision changed this so that all wCG registers are
28713 scratch registers. */
28714 for (regno = FIRST_IWMMXT_GR_REGNUM;
28715 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28716 fixed_regs[regno] = 0;
28717 /* The XScale ABI has wR0 - wR9 as scratch registers,
28718 the rest as call-preserved registers. */
28719 for (regno = FIRST_IWMMXT_REGNUM;
28720 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28722 fixed_regs[regno] = 0;
28723 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28727 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28729 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28730 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28732 else if (TARGET_APCS_STACK)
28734 fixed_regs[10] = 1;
28735 call_used_regs[10] = 1;
28737 /* -mcaller-super-interworking reserves r11 for calls to
28738 _interwork_r11_call_via_rN(). Making the register global
28739 is an easy way of ensuring that it remains valid for all
28741 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28742 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28744 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28745 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28746 if (TARGET_CALLER_INTERWORKING)
28747 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28749 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28753 arm_preferred_rename_class (reg_class_t rclass)
28755 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28756 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28757 and code size can be reduced. */
28758 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28764 /* Compute the attribute "length" of insn "*push_multi".
28765 So this function MUST be kept in sync with that insn pattern. */
28767 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28769 int i, regno, hi_reg;
28770 int num_saves = XVECLEN (parallel_op, 0);
28780 regno = REGNO (first_op);
28781 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28782 list is 8-bit. Normally this means all registers in the list must be
28783 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28784 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28785 with 16-bit encoding. */
28786 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28787 for (i = 1; i < num_saves && !hi_reg; i++)
28789 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28790 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28798 /* Compute the attribute "length" of insn. Currently, this function is used
28799 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28800 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28801 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28802 true if OPERANDS contains insn which explicit updates base register. */
28805 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28814 rtx parallel_op = operands[0];
28815 /* Initialize to elements number of PARALLEL. */
28816 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28817 /* Initialize the value to base register. */
28818 unsigned regno = REGNO (operands[1]);
28819 /* Skip return and write back pattern.
28820 We only need register pop pattern for later analysis. */
28821 unsigned first_indx = 0;
28822 first_indx += return_pc ? 1 : 0;
28823 first_indx += write_back_p ? 1 : 0;
28825 /* A pop operation can be done through LDM or POP. If the base register is SP
28826 and if it's with write back, then a LDM will be alias of POP. */
28827 bool pop_p = (regno == SP_REGNUM && write_back_p);
28828 bool ldm_p = !pop_p;
28830 /* Check base register for LDM. */
28831 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28834 /* Check each register in the list. */
28835 for (; indx >= first_indx; indx--)
28837 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28838 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28839 comment in arm_attr_length_push_multi. */
28840 if (REGNO_REG_CLASS (regno) == HI_REGS
28841 && (regno != PC_REGNUM || ldm_p))
28848 /* Compute the number of instructions emitted by output_move_double. */
28850 arm_count_output_move_double_insns (rtx *operands)
28854 /* output_move_double may modify the operands array, so call it
28855 here on a copy of the array. */
28856 ops[0] = operands[0];
28857 ops[1] = operands[1];
28858 output_move_double (ops, false, &count);
28862 /* Same as above, but operands are a register/memory pair in SImode.
28863 Assumes operands has the base register in position 0 and memory in position
28864 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
28866 arm_count_ldrdstrd_insns (rtx *operands, bool load)
28870 int regnum, memnum;
28872 regnum = 0, memnum = 1;
28874 regnum = 1, memnum = 0;
28875 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
28876 ops[memnum] = adjust_address (operands[2], DImode, 0);
28877 output_move_double (ops, false, &count);
28883 vfp3_const_double_for_fract_bits (rtx operand)
28885 REAL_VALUE_TYPE r0;
28887 if (!CONST_DOUBLE_P (operand))
28890 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28891 if (exact_real_inverse (DFmode, &r0)
28892 && !REAL_VALUE_NEGATIVE (r0))
28894 if (exact_real_truncate (DFmode, &r0))
28896 HOST_WIDE_INT value = real_to_integer (&r0);
28897 value = value & 0xffffffff;
28898 if ((value != 0) && ( (value & (value - 1)) == 0))
28900 int ret = exact_log2 (value);
28901 gcc_assert (IN_RANGE (ret, 0, 31));
28909 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28910 log2 is in [1, 32], return that log2. Otherwise return -1.
28911 This is used in the patterns for vcvt.s32.f32 floating-point to
28912 fixed-point conversions. */
28915 vfp3_const_double_for_bits (rtx x)
28917 const REAL_VALUE_TYPE *r;
28919 if (!CONST_DOUBLE_P (x))
28922 r = CONST_DOUBLE_REAL_VALUE (x);
28924 if (REAL_VALUE_NEGATIVE (*r)
28925 || REAL_VALUE_ISNAN (*r)
28926 || REAL_VALUE_ISINF (*r)
28927 || !real_isinteger (r, SFmode))
28930 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28932 /* The exact_log2 above will have returned -1 if this is
28933 not an exact log2. */
28934 if (!IN_RANGE (hwint, 1, 32))
28941 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28944 arm_pre_atomic_barrier (enum memmodel model)
28946 if (need_atomic_barrier_p (model, true))
28947 emit_insn (gen_memory_barrier ());
28951 arm_post_atomic_barrier (enum memmodel model)
28953 if (need_atomic_barrier_p (model, false))
28954 emit_insn (gen_memory_barrier ());
28957 /* Emit the load-exclusive and store-exclusive instructions.
28958 Use acquire and release versions if necessary. */
28961 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28963 rtx (*gen) (rtx, rtx);
28969 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28970 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28971 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28972 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28974 gcc_unreachable ();
28981 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28982 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28983 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28984 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28986 gcc_unreachable ();
28990 emit_insn (gen (rval, mem));
28994 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28997 rtx (*gen) (rtx, rtx, rtx);
29003 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
29004 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
29005 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
29006 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
29008 gcc_unreachable ();
29015 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
29016 case E_HImode: gen = gen_arm_store_exclusivehi; break;
29017 case E_SImode: gen = gen_arm_store_exclusivesi; break;
29018 case E_DImode: gen = gen_arm_store_exclusivedi; break;
29020 gcc_unreachable ();
29024 emit_insn (gen (bval, rval, mem));
29027 /* Mark the previous jump instruction as unlikely. */
29030 emit_unlikely_jump (rtx insn)
29032 rtx_insn *jump = emit_jump_insn (insn);
29033 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
29036 /* Expand a compare and swap pattern. */
29039 arm_expand_compare_and_swap (rtx operands[])
29041 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29042 machine_mode mode, cmp_mode;
29044 bval = operands[0];
29045 rval = operands[1];
29047 oldval = operands[3];
29048 newval = operands[4];
29049 is_weak = operands[5];
29050 mod_s = operands[6];
29051 mod_f = operands[7];
29052 mode = GET_MODE (mem);
29054 /* Normally the succ memory model must be stronger than fail, but in the
29055 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29056 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29058 if (TARGET_HAVE_LDACQ
29059 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
29060 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
29061 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29067 /* For narrow modes, we're going to perform the comparison in SImode,
29068 so do the zero-extension now. */
29069 rval = gen_reg_rtx (SImode);
29070 oldval = convert_modes (SImode, mode, oldval, true);
29074 /* Force the value into a register if needed. We waited until after
29075 the zero-extension above to do this properly. */
29076 if (!arm_add_operand (oldval, SImode))
29077 oldval = force_reg (SImode, oldval);
29081 if (!cmpdi_operand (oldval, mode))
29082 oldval = force_reg (mode, oldval);
29086 gcc_unreachable ();
29090 cmp_mode = E_SImode;
29092 cmp_mode = CC_Zmode;
29094 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
29095 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
29096 oldval, newval, is_weak, mod_s, mod_f));
29098 if (mode == QImode || mode == HImode)
29099 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29101 /* In all cases, we arrange for success to be signaled by Z set.
29102 This arrangement allows for the boolean result to be used directly
29103 in a subsequent branch, post optimization. For Thumb-1 targets, the
29104 boolean negation of the result is also stored in bval because Thumb-1
29105 backend lacks dependency tracking for CC flag due to flag-setting not
29106 being represented at RTL level. */
29108 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
29111 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
29112 emit_insn (gen_rtx_SET (bval, x));
29116 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29117 another memory store between the load-exclusive and store-exclusive can
29118 reset the monitor from Exclusive to Open state. This means we must wait
29119 until after reload to split the pattern, lest we get a register spill in
29120 the middle of the atomic sequence. Success of the compare and swap is
29121 indicated by the Z flag set for 32bit targets and by neg_bval being zero
29122 for Thumb-1 targets (ie. negation of the boolean value returned by
29123 atomic_compare_and_swapmode standard pattern in operand 0). */
29126 arm_split_compare_and_swap (rtx operands[])
29128 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
29130 enum memmodel mod_s, mod_f;
29132 rtx_code_label *label1, *label2;
29135 rval = operands[1];
29137 oldval = operands[3];
29138 newval = operands[4];
29139 is_weak = (operands[5] != const0_rtx);
29140 mod_s_rtx = operands[6];
29141 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
29142 mod_f = memmodel_from_int (INTVAL (operands[7]));
29143 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
29144 mode = GET_MODE (mem);
29146 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
29148 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
29149 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
29151 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
29152 a full barrier is emitted after the store-release. */
29154 use_acquire = false;
29156 /* Checks whether a barrier is needed and emits one accordingly. */
29157 if (!(use_acquire || use_release))
29158 arm_pre_atomic_barrier (mod_s);
29163 label1 = gen_label_rtx ();
29164 emit_label (label1);
29166 label2 = gen_label_rtx ();
29168 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29170 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29171 as required to communicate with arm_expand_compare_and_swap. */
29174 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
29175 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29176 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29177 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29178 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29182 emit_move_insn (neg_bval, const1_rtx);
29183 cond = gen_rtx_NE (VOIDmode, rval, oldval);
29184 if (thumb1_cmpneg_operand (oldval, SImode))
29185 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
29188 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
29191 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
29193 /* Weak or strong, we want EQ to be true for success, so that we
29194 match the flags that we got from the compare above. */
29197 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29198 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
29199 emit_insn (gen_rtx_SET (cond, x));
29204 /* Z is set to boolean value of !neg_bval, as required to communicate
29205 with arm_expand_compare_and_swap. */
29206 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
29207 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
29210 if (!is_mm_relaxed (mod_f))
29211 emit_label (label2);
29213 /* Checks whether a barrier is needed and emits one accordingly. */
29215 || !(use_acquire || use_release))
29216 arm_post_atomic_barrier (mod_s);
29218 if (is_mm_relaxed (mod_f))
29219 emit_label (label2);
29222 /* Split an atomic operation pattern. Operation is given by CODE and is one
29223 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29224 operation). Operation is performed on the content at MEM and on VALUE
29225 following the memory model MODEL_RTX. The content at MEM before and after
29226 the operation is returned in OLD_OUT and NEW_OUT respectively while the
29227 success of the operation is returned in COND. Using a scratch register or
29228 an operand register for these determines what result is returned for that
29232 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29233 rtx value, rtx model_rtx, rtx cond)
29235 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
29236 machine_mode mode = GET_MODE (mem);
29237 machine_mode wmode = (mode == DImode ? DImode : SImode);
29238 rtx_code_label *label;
29239 bool all_low_regs, bind_old_new;
29242 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
29244 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
29245 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
29247 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
29248 a full barrier is emitted after the store-release. */
29250 use_acquire = false;
29252 /* Checks whether a barrier is needed and emits one accordingly. */
29253 if (!(use_acquire || use_release))
29254 arm_pre_atomic_barrier (model);
29256 label = gen_label_rtx ();
29257 emit_label (label);
29260 new_out = gen_lowpart (wmode, new_out);
29262 old_out = gen_lowpart (wmode, old_out);
29265 value = simplify_gen_subreg (wmode, value, mode, 0);
29267 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29269 /* Does the operation require destination and first operand to use the same
29270 register? This is decided by register constraints of relevant insn
29271 patterns in thumb1.md. */
29272 gcc_assert (!new_out || REG_P (new_out));
29273 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
29274 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
29275 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
29280 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
29282 /* We want to return the old value while putting the result of the operation
29283 in the same register as the old value so copy the old value over to the
29284 destination register and use that register for the operation. */
29285 if (old_out && bind_old_new)
29287 emit_move_insn (new_out, old_out);
29298 x = gen_rtx_AND (wmode, old_out, value);
29299 emit_insn (gen_rtx_SET (new_out, x));
29300 x = gen_rtx_NOT (wmode, new_out);
29301 emit_insn (gen_rtx_SET (new_out, x));
29305 if (CONST_INT_P (value))
29307 value = GEN_INT (-INTVAL (value));
29313 if (mode == DImode)
29315 /* DImode plus/minus need to clobber flags. */
29316 /* The adddi3 and subdi3 patterns are incorrectly written so that
29317 they require matching operands, even when we could easily support
29318 three operands. Thankfully, this can be fixed up post-splitting,
29319 as the individual add+adc patterns do accept three operands and
29320 post-reload cprop can make these moves go away. */
29321 emit_move_insn (new_out, old_out);
29323 x = gen_adddi3 (new_out, new_out, value);
29325 x = gen_subdi3 (new_out, new_out, value);
29332 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29333 emit_insn (gen_rtx_SET (new_out, x));
29337 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29340 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29341 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29343 /* Checks whether a barrier is needed and emits one accordingly. */
29345 || !(use_acquire || use_release))
29346 arm_post_atomic_barrier (model);
29349 #define MAX_VECT_LEN 16
29351 struct expand_vec_perm_d
29353 rtx target, op0, op1;
29354 vec_perm_indices perm;
29355 machine_mode vmode;
29360 /* Generate a variable permutation. */
29363 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29365 machine_mode vmode = GET_MODE (target);
29366 bool one_vector_p = rtx_equal_p (op0, op1);
29368 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29369 gcc_checking_assert (GET_MODE (op0) == vmode);
29370 gcc_checking_assert (GET_MODE (op1) == vmode);
29371 gcc_checking_assert (GET_MODE (sel) == vmode);
29372 gcc_checking_assert (TARGET_NEON);
29376 if (vmode == V8QImode)
29377 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29379 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29385 if (vmode == V8QImode)
29387 pair = gen_reg_rtx (V16QImode);
29388 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29389 pair = gen_lowpart (TImode, pair);
29390 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29394 pair = gen_reg_rtx (OImode);
29395 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29396 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29402 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29404 machine_mode vmode = GET_MODE (target);
29405 unsigned int nelt = GET_MODE_NUNITS (vmode);
29406 bool one_vector_p = rtx_equal_p (op0, op1);
29409 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29410 numbering of elements for big-endian, we must reverse the order. */
29411 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29413 /* The VTBL instruction does not use a modulo index, so we must take care
29414 of that ourselves. */
29415 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29416 mask = gen_const_vec_duplicate (vmode, mask);
29417 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29419 arm_expand_vec_perm_1 (target, op0, op1, sel);
29422 /* Map lane ordering between architectural lane order, and GCC lane order,
29423 taking into account ABI. See comment above output_move_neon for details. */
29426 neon_endian_lane_map (machine_mode mode, int lane)
29428 if (BYTES_BIG_ENDIAN)
29430 int nelems = GET_MODE_NUNITS (mode);
29431 /* Reverse lane order. */
29432 lane = (nelems - 1 - lane);
29433 /* Reverse D register order, to match ABI. */
29434 if (GET_MODE_SIZE (mode) == 16)
29435 lane = lane ^ (nelems / 2);
29440 /* Some permutations index into pairs of vectors, this is a helper function
29441 to map indexes into those pairs of vectors. */
29444 neon_pair_endian_lane_map (machine_mode mode, int lane)
29446 int nelem = GET_MODE_NUNITS (mode);
29447 if (BYTES_BIG_ENDIAN)
29449 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29453 /* Generate or test for an insn that supports a constant permutation. */
29455 /* Recognize patterns for the VUZP insns. */
29458 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29460 unsigned int i, odd, mask, nelt = d->perm.length ();
29461 rtx out0, out1, in0, in1;
29465 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29468 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29469 big endian pattern on 64 bit vectors, so we correct for that. */
29470 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29471 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29473 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29475 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29477 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29481 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29483 for (i = 0; i < nelt; i++)
29486 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29487 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29497 if (swap_nelt != 0)
29498 std::swap (in0, in1);
29501 out1 = gen_reg_rtx (d->vmode);
29503 std::swap (out0, out1);
29505 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29509 /* Recognize patterns for the VZIP insns. */
29512 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29514 unsigned int i, high, mask, nelt = d->perm.length ();
29515 rtx out0, out1, in0, in1;
29519 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29522 is_swapped = BYTES_BIG_ENDIAN;
29524 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29527 if (first_elem == neon_endian_lane_map (d->vmode, high))
29529 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29533 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29535 for (i = 0; i < nelt / 2; i++)
29538 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29539 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29543 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29544 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29556 std::swap (in0, in1);
29559 out1 = gen_reg_rtx (d->vmode);
29561 std::swap (out0, out1);
29563 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29567 /* Recognize patterns for the VREV insns. */
29569 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29571 unsigned int i, j, diff, nelt = d->perm.length ();
29572 rtx (*gen) (machine_mode, rtx, rtx);
29574 if (!d->one_vector_p)
29585 gen = gen_neon_vrev64;
29596 gen = gen_neon_vrev32;
29602 gen = gen_neon_vrev64;
29613 gen = gen_neon_vrev16;
29617 gen = gen_neon_vrev32;
29623 gen = gen_neon_vrev64;
29633 for (i = 0; i < nelt ; i += diff + 1)
29634 for (j = 0; j <= diff; j += 1)
29636 /* This is guaranteed to be true as the value of diff
29637 is 7, 3, 1 and we should have enough elements in the
29638 queue to generate this. Getting a vector mask with a
29639 value of diff other than these values implies that
29640 something is wrong by the time we get here. */
29641 gcc_assert (i + j < nelt);
29642 if (d->perm[i + j] != i + diff - j)
29650 emit_insn (gen (d->vmode, d->target, d->op0));
29654 /* Recognize patterns for the VTRN insns. */
29657 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29659 unsigned int i, odd, mask, nelt = d->perm.length ();
29660 rtx out0, out1, in0, in1;
29662 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29665 /* Note that these are little-endian tests. Adjust for big-endian later. */
29666 if (d->perm[0] == 0)
29668 else if (d->perm[0] == 1)
29672 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29674 for (i = 0; i < nelt; i += 2)
29676 if (d->perm[i] != i + odd)
29678 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29688 if (BYTES_BIG_ENDIAN)
29690 std::swap (in0, in1);
29695 out1 = gen_reg_rtx (d->vmode);
29697 std::swap (out0, out1);
29699 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29703 /* Recognize patterns for the VEXT insns. */
29706 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29708 unsigned int i, nelt = d->perm.length ();
29711 unsigned int location;
29713 unsigned int next = d->perm[0] + 1;
29715 /* TODO: Handle GCC's numbering of elements for big-endian. */
29716 if (BYTES_BIG_ENDIAN)
29719 /* Check if the extracted indexes are increasing by one. */
29720 for (i = 1; i < nelt; next++, i++)
29722 /* If we hit the most significant element of the 2nd vector in
29723 the previous iteration, no need to test further. */
29724 if (next == 2 * nelt)
29727 /* If we are operating on only one vector: it could be a
29728 rotation. If there are only two elements of size < 64, let
29729 arm_evpc_neon_vrev catch it. */
29730 if (d->one_vector_p && (next == nelt))
29732 if ((nelt == 2) && (d->vmode != V2DImode))
29738 if (d->perm[i] != next)
29742 location = d->perm[0];
29748 offset = GEN_INT (location);
29750 if(d->vmode == E_DImode)
29753 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29757 /* The NEON VTBL instruction is a fully variable permuation that's even
29758 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29759 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29760 can do slightly better by expanding this as a constant where we don't
29761 have to apply a mask. */
29764 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29766 rtx rperm[MAX_VECT_LEN], sel;
29767 machine_mode vmode = d->vmode;
29768 unsigned int i, nelt = d->perm.length ();
29770 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29771 numbering of elements for big-endian, we must reverse the order. */
29772 if (BYTES_BIG_ENDIAN)
29778 /* Generic code will try constant permutation twice. Once with the
29779 original mode and again with the elements lowered to QImode.
29780 So wait and don't do the selector expansion ourselves. */
29781 if (vmode != V8QImode && vmode != V16QImode)
29784 for (i = 0; i < nelt; ++i)
29785 rperm[i] = GEN_INT (d->perm[i]);
29786 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29787 sel = force_reg (vmode, sel);
29789 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29794 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29796 /* Check if the input mask matches vext before reordering the
29799 if (arm_evpc_neon_vext (d))
29802 /* The pattern matching functions above are written to look for a small
29803 number to begin the sequence (0, 1, N/2). If we begin with an index
29804 from the second operand, we can swap the operands. */
29805 unsigned int nelt = d->perm.length ();
29806 if (d->perm[0] >= nelt)
29808 d->perm.rotate_inputs (1);
29809 std::swap (d->op0, d->op1);
29814 if (arm_evpc_neon_vuzp (d))
29816 if (arm_evpc_neon_vzip (d))
29818 if (arm_evpc_neon_vrev (d))
29820 if (arm_evpc_neon_vtrn (d))
29822 return arm_evpc_neon_vtbl (d);
29827 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29830 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29831 const vec_perm_indices &sel)
29833 struct expand_vec_perm_d d;
29834 int i, nelt, which;
29836 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29844 gcc_assert (VECTOR_MODE_P (d.vmode));
29845 d.testing_p = !target;
29847 nelt = GET_MODE_NUNITS (d.vmode);
29848 for (i = which = 0; i < nelt; ++i)
29850 int ei = sel[i] & (2 * nelt - 1);
29851 which |= (ei < nelt ? 1 : 2);
29860 d.one_vector_p = false;
29861 if (d.testing_p || !rtx_equal_p (op0, op1))
29864 /* The elements of PERM do not suggest that only the first operand
29865 is used, but both operands are identical. Allow easier matching
29866 of the permutation by folding the permutation into the single
29871 d.one_vector_p = true;
29876 d.one_vector_p = true;
29880 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29883 return arm_expand_vec_perm_const_1 (&d);
29885 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29886 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29887 if (!d.one_vector_p)
29888 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29891 bool ret = arm_expand_vec_perm_const_1 (&d);
29898 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29900 /* If we are soft float and we do not have ldrd
29901 then all auto increment forms are ok. */
29902 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29907 /* Post increment and Pre Decrement are supported for all
29908 instruction forms except for vector forms. */
29911 if (VECTOR_MODE_P (mode))
29913 if (code != ARM_PRE_DEC)
29923 /* Without LDRD and mode size greater than
29924 word size, there is no point in auto-incrementing
29925 because ldm and stm will not have these forms. */
29926 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29929 /* Vector and floating point modes do not support
29930 these auto increment forms. */
29931 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29944 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29945 on ARM, since we know that shifts by negative amounts are no-ops.
29946 Additionally, the default expansion code is not available or suitable
29947 for post-reload insn splits (this can occur when the register allocator
29948 chooses not to do a shift in NEON).
29950 This function is used in both initial expand and post-reload splits, and
29951 handles all kinds of 64-bit shifts.
29953 Input requirements:
29954 - It is safe for the input and output to be the same register, but
29955 early-clobber rules apply for the shift amount and scratch registers.
29956 - Shift by register requires both scratch registers. In all other cases
29957 the scratch registers may be NULL.
29958 - Ashiftrt by a register also clobbers the CC register. */
29960 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29961 rtx amount, rtx scratch1, rtx scratch2)
29963 rtx out_high = gen_highpart (SImode, out);
29964 rtx out_low = gen_lowpart (SImode, out);
29965 rtx in_high = gen_highpart (SImode, in);
29966 rtx in_low = gen_lowpart (SImode, in);
29969 in = the register pair containing the input value.
29970 out = the destination register pair.
29971 up = the high- or low-part of each pair.
29972 down = the opposite part to "up".
29973 In a shift, we can consider bits to shift from "up"-stream to
29974 "down"-stream, so in a left-shift "up" is the low-part and "down"
29975 is the high-part of each register pair. */
29977 rtx out_up = code == ASHIFT ? out_low : out_high;
29978 rtx out_down = code == ASHIFT ? out_high : out_low;
29979 rtx in_up = code == ASHIFT ? in_low : in_high;
29980 rtx in_down = code == ASHIFT ? in_high : in_low;
29982 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29984 && (REG_P (out) || GET_CODE (out) == SUBREG)
29985 && GET_MODE (out) == DImode);
29987 && (REG_P (in) || GET_CODE (in) == SUBREG)
29988 && GET_MODE (in) == DImode);
29990 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29991 && GET_MODE (amount) == SImode)
29992 || CONST_INT_P (amount)));
29993 gcc_assert (scratch1 == NULL
29994 || (GET_CODE (scratch1) == SCRATCH)
29995 || (GET_MODE (scratch1) == SImode
29996 && REG_P (scratch1)));
29997 gcc_assert (scratch2 == NULL
29998 || (GET_CODE (scratch2) == SCRATCH)
29999 || (GET_MODE (scratch2) == SImode
30000 && REG_P (scratch2)));
30001 gcc_assert (!REG_P (out) || !REG_P (amount)
30002 || !HARD_REGISTER_P (out)
30003 || (REGNO (out) != REGNO (amount)
30004 && REGNO (out) + 1 != REGNO (amount)));
30006 /* Macros to make following code more readable. */
30007 #define SUB_32(DEST,SRC) \
30008 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30009 #define RSB_32(DEST,SRC) \
30010 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30011 #define SUB_S_32(DEST,SRC) \
30012 gen_addsi3_compare0 ((DEST), (SRC), \
30014 #define SET(DEST,SRC) \
30015 gen_rtx_SET ((DEST), (SRC))
30016 #define SHIFT(CODE,SRC,AMOUNT) \
30017 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30018 #define LSHIFT(CODE,SRC,AMOUNT) \
30019 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30020 SImode, (SRC), (AMOUNT))
30021 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30022 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30023 SImode, (SRC), (AMOUNT))
30025 gen_rtx_IOR (SImode, (A), (B))
30026 #define BRANCH(COND,LABEL) \
30027 gen_arm_cond_branch ((LABEL), \
30028 gen_rtx_ ## COND (CCmode, cc_reg, \
30032 /* Shifts by register and shifts by constant are handled separately. */
30033 if (CONST_INT_P (amount))
30035 /* We have a shift-by-constant. */
30037 /* First, handle out-of-range shift amounts.
30038 In both cases we try to match the result an ARM instruction in a
30039 shift-by-register would give. This helps reduce execution
30040 differences between optimization levels, but it won't stop other
30041 parts of the compiler doing different things. This is "undefined
30042 behavior, in any case. */
30043 if (INTVAL (amount) <= 0)
30044 emit_insn (gen_movdi (out, in));
30045 else if (INTVAL (amount) >= 64)
30047 if (code == ASHIFTRT)
30049 rtx const31_rtx = GEN_INT (31);
30050 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30051 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30054 emit_insn (gen_movdi (out, const0_rtx));
30057 /* Now handle valid shifts. */
30058 else if (INTVAL (amount) < 32)
30060 /* Shifts by a constant less than 32. */
30061 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30063 /* Clearing the out register in DImode first avoids lots
30064 of spilling and results in less stack usage.
30065 Later this redundant insn is completely removed.
30066 Do that only if "in" and "out" are different registers. */
30067 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30068 emit_insn (SET (out, const0_rtx));
30069 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30070 emit_insn (SET (out_down,
30071 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30073 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30077 /* Shifts by a constant greater than 31. */
30078 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30080 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30081 emit_insn (SET (out, const0_rtx));
30082 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30083 if (code == ASHIFTRT)
30084 emit_insn (gen_ashrsi3 (out_up, in_up,
30087 emit_insn (SET (out_up, const0_rtx));
30092 /* We have a shift-by-register. */
30093 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30095 /* This alternative requires the scratch registers. */
30096 gcc_assert (scratch1 && REG_P (scratch1));
30097 gcc_assert (scratch2 && REG_P (scratch2));
30099 /* We will need the values "amount-32" and "32-amount" later.
30100 Swapping them around now allows the later code to be more general. */
30104 emit_insn (SUB_32 (scratch1, amount));
30105 emit_insn (RSB_32 (scratch2, amount));
30108 emit_insn (RSB_32 (scratch1, amount));
30109 /* Also set CC = amount > 32. */
30110 emit_insn (SUB_S_32 (scratch2, amount));
30113 emit_insn (RSB_32 (scratch1, amount));
30114 emit_insn (SUB_32 (scratch2, amount));
30117 gcc_unreachable ();
30120 /* Emit code like this:
30123 out_down = in_down << amount;
30124 out_down = (in_up << (amount - 32)) | out_down;
30125 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30126 out_up = in_up << amount;
30129 out_down = in_down >> amount;
30130 out_down = (in_up << (32 - amount)) | out_down;
30132 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30133 out_up = in_up << amount;
30136 out_down = in_down >> amount;
30137 out_down = (in_up << (32 - amount)) | out_down;
30139 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30140 out_up = in_up << amount;
30142 The ARM and Thumb2 variants are the same but implemented slightly
30143 differently. If this were only called during expand we could just
30144 use the Thumb2 case and let combine do the right thing, but this
30145 can also be called from post-reload splitters. */
30147 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30149 if (!TARGET_THUMB2)
30151 /* Emit code for ARM mode. */
30152 emit_insn (SET (out_down,
30153 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30154 if (code == ASHIFTRT)
30156 rtx_code_label *done_label = gen_label_rtx ();
30157 emit_jump_insn (BRANCH (LT, done_label));
30158 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30160 emit_label (done_label);
30163 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30168 /* Emit code for Thumb2 mode.
30169 Thumb2 can't do shift and or in one insn. */
30170 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30171 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30173 if (code == ASHIFTRT)
30175 rtx_code_label *done_label = gen_label_rtx ();
30176 emit_jump_insn (BRANCH (LT, done_label));
30177 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30178 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30179 emit_label (done_label);
30183 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30184 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30188 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30202 /* Returns true if the pattern is a valid symbolic address, which is either a
30203 symbol_ref or (symbol_ref + addend).
30205 According to the ARM ELF ABI, the initial addend of REL-type relocations
30206 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30207 literal field of the instruction as a 16-bit signed value in the range
30208 -32768 <= A < 32768. */
30211 arm_valid_symbolic_address_p (rtx addr)
30213 rtx xop0, xop1 = NULL_RTX;
30216 if (target_word_relocations)
30219 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
30222 /* (const (plus: symbol_ref const_int)) */
30223 if (GET_CODE (addr) == CONST)
30224 tmp = XEXP (addr, 0);
30226 if (GET_CODE (tmp) == PLUS)
30228 xop0 = XEXP (tmp, 0);
30229 xop1 = XEXP (tmp, 1);
30231 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
30232 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
30238 /* Returns true if a valid comparison operation and makes
30239 the operands in a form that is valid. */
30241 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30243 enum rtx_code code = GET_CODE (*comparison);
30245 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30246 ? GET_MODE (*op2) : GET_MODE (*op1);
30248 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30250 if (code == UNEQ || code == LTGT)
30253 code_int = (int)code;
30254 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30255 PUT_CODE (*comparison, (enum rtx_code)code_int);
30260 if (!arm_add_operand (*op1, mode))
30261 *op1 = force_reg (mode, *op1);
30262 if (!arm_add_operand (*op2, mode))
30263 *op2 = force_reg (mode, *op2);
30267 if (!cmpdi_operand (*op1, mode))
30268 *op1 = force_reg (mode, *op1);
30269 if (!cmpdi_operand (*op2, mode))
30270 *op2 = force_reg (mode, *op2);
30274 if (!TARGET_VFP_FP16INST)
30276 /* FP16 comparisons are done in SF mode. */
30278 *op1 = convert_to_mode (mode, *op1, 1);
30279 *op2 = convert_to_mode (mode, *op2, 1);
30280 /* Fall through. */
30283 if (!vfp_compare_operand (*op1, mode))
30284 *op1 = force_reg (mode, *op1);
30285 if (!vfp_compare_operand (*op2, mode))
30286 *op2 = force_reg (mode, *op2);
30296 /* Maximum number of instructions to set block of memory. */
30298 arm_block_set_max_insns (void)
30300 if (optimize_function_for_size_p (cfun))
30303 return current_tune->max_insns_inline_memset;
30306 /* Return TRUE if it's profitable to set block of memory for
30307 non-vectorized case. VAL is the value to set the memory
30308 with. LENGTH is the number of bytes to set. ALIGN is the
30309 alignment of the destination memory in bytes. UNALIGNED_P
30310 is TRUE if we can only set the memory with instructions
30311 meeting alignment requirements. USE_STRD_P is TRUE if we
30312 can use strd to set the memory. */
30314 arm_block_set_non_vect_profit_p (rtx val,
30315 unsigned HOST_WIDE_INT length,
30316 unsigned HOST_WIDE_INT align,
30317 bool unaligned_p, bool use_strd_p)
30320 /* For leftovers in bytes of 0-7, we can set the memory block using
30321 strb/strh/str with minimum instruction number. */
30322 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30326 num = arm_const_inline_cost (SET, val);
30327 num += length / align + length % align;
30329 else if (use_strd_p)
30331 num = arm_const_double_inline_cost (val);
30332 num += (length >> 3) + leftover[length & 7];
30336 num = arm_const_inline_cost (SET, val);
30337 num += (length >> 2) + leftover[length & 3];
30340 /* We may be able to combine last pair STRH/STRB into a single STR
30341 by shifting one byte back. */
30342 if (unaligned_access && length > 3 && (length & 3) == 3)
30345 return (num <= arm_block_set_max_insns ());
30348 /* Return TRUE if it's profitable to set block of memory for
30349 vectorized case. LENGTH is the number of bytes to set.
30350 ALIGN is the alignment of destination memory in bytes.
30351 MODE is the vector mode used to set the memory. */
30353 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30354 unsigned HOST_WIDE_INT align,
30358 bool unaligned_p = ((align & 3) != 0);
30359 unsigned int nelt = GET_MODE_NUNITS (mode);
30361 /* Instruction loading constant value. */
30363 /* Instructions storing the memory. */
30364 num += (length + nelt - 1) / nelt;
30365 /* Instructions adjusting the address expression. Only need to
30366 adjust address expression if it's 4 bytes aligned and bytes
30367 leftover can only be stored by mis-aligned store instruction. */
30368 if (!unaligned_p && (length & 3) != 0)
30371 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30372 if (!unaligned_p && mode == V16QImode)
30375 return (num <= arm_block_set_max_insns ());
30378 /* Set a block of memory using vectorization instructions for the
30379 unaligned case. We fill the first LENGTH bytes of the memory
30380 area starting from DSTBASE with byte constant VALUE. ALIGN is
30381 the alignment requirement of memory. Return TRUE if succeeded. */
30383 arm_block_set_unaligned_vect (rtx dstbase,
30384 unsigned HOST_WIDE_INT length,
30385 unsigned HOST_WIDE_INT value,
30386 unsigned HOST_WIDE_INT align)
30388 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30391 rtx (*gen_func) (rtx, rtx);
30393 unsigned HOST_WIDE_INT v = value;
30394 unsigned int offset = 0;
30395 gcc_assert ((align & 0x3) != 0);
30396 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30397 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30398 if (length >= nelt_v16)
30401 gen_func = gen_movmisalignv16qi;
30406 gen_func = gen_movmisalignv8qi;
30408 nelt_mode = GET_MODE_NUNITS (mode);
30409 gcc_assert (length >= nelt_mode);
30410 /* Skip if it isn't profitable. */
30411 if (!arm_block_set_vect_profit_p (length, align, mode))
30414 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30415 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30417 v = sext_hwi (v, BITS_PER_WORD);
30419 reg = gen_reg_rtx (mode);
30420 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30421 /* Emit instruction loading the constant value. */
30422 emit_move_insn (reg, val_vec);
30424 /* Handle nelt_mode bytes in a vector. */
30425 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30427 emit_insn ((*gen_func) (mem, reg));
30428 if (i + 2 * nelt_mode <= length)
30430 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30431 offset += nelt_mode;
30432 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30436 /* If there are not less than nelt_v8 bytes leftover, we must be in
30438 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30440 /* Handle (8, 16) bytes leftover. */
30441 if (i + nelt_v8 < length)
30443 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30444 offset += length - i;
30445 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30447 /* We are shifting bytes back, set the alignment accordingly. */
30448 if ((length & 1) != 0 && align >= 2)
30449 set_mem_align (mem, BITS_PER_UNIT);
30451 emit_insn (gen_movmisalignv16qi (mem, reg));
30453 /* Handle (0, 8] bytes leftover. */
30454 else if (i < length && i + nelt_v8 >= length)
30456 if (mode == V16QImode)
30457 reg = gen_lowpart (V8QImode, reg);
30459 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30460 + (nelt_mode - nelt_v8))));
30461 offset += (length - i) + (nelt_mode - nelt_v8);
30462 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30464 /* We are shifting bytes back, set the alignment accordingly. */
30465 if ((length & 1) != 0 && align >= 2)
30466 set_mem_align (mem, BITS_PER_UNIT);
30468 emit_insn (gen_movmisalignv8qi (mem, reg));
30474 /* Set a block of memory using vectorization instructions for the
30475 aligned case. We fill the first LENGTH bytes of the memory area
30476 starting from DSTBASE with byte constant VALUE. ALIGN is the
30477 alignment requirement of memory. Return TRUE if succeeded. */
30479 arm_block_set_aligned_vect (rtx dstbase,
30480 unsigned HOST_WIDE_INT length,
30481 unsigned HOST_WIDE_INT value,
30482 unsigned HOST_WIDE_INT align)
30484 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30485 rtx dst, addr, mem;
30488 unsigned int offset = 0;
30490 gcc_assert ((align & 0x3) == 0);
30491 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30492 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30493 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30498 nelt_mode = GET_MODE_NUNITS (mode);
30499 gcc_assert (length >= nelt_mode);
30500 /* Skip if it isn't profitable. */
30501 if (!arm_block_set_vect_profit_p (length, align, mode))
30504 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30506 reg = gen_reg_rtx (mode);
30507 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30508 /* Emit instruction loading the constant value. */
30509 emit_move_insn (reg, val_vec);
30512 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30513 if (mode == V16QImode)
30515 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30516 emit_insn (gen_movmisalignv16qi (mem, reg));
30518 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30519 if (i + nelt_v8 < length && i + nelt_v16 > length)
30521 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30522 offset += length - nelt_mode;
30523 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30524 /* We are shifting bytes back, set the alignment accordingly. */
30525 if ((length & 0x3) == 0)
30526 set_mem_align (mem, BITS_PER_UNIT * 4);
30527 else if ((length & 0x1) == 0)
30528 set_mem_align (mem, BITS_PER_UNIT * 2);
30530 set_mem_align (mem, BITS_PER_UNIT);
30532 emit_insn (gen_movmisalignv16qi (mem, reg));
30535 /* Fall through for bytes leftover. */
30537 nelt_mode = GET_MODE_NUNITS (mode);
30538 reg = gen_lowpart (V8QImode, reg);
30541 /* Handle 8 bytes in a vector. */
30542 for (; (i + nelt_mode <= length); i += nelt_mode)
30544 addr = plus_constant (Pmode, dst, i);
30545 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30546 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30547 emit_move_insn (mem, reg);
30549 emit_insn (gen_unaligned_storev8qi (mem, reg));
30552 /* Handle single word leftover by shifting 4 bytes back. We can
30553 use aligned access for this case. */
30554 if (i + UNITS_PER_WORD == length)
30556 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30557 offset += i - UNITS_PER_WORD;
30558 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30559 /* We are shifting 4 bytes back, set the alignment accordingly. */
30560 if (align > UNITS_PER_WORD)
30561 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30563 emit_insn (gen_unaligned_storev8qi (mem, reg));
30565 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30566 We have to use unaligned access for this case. */
30567 else if (i < length)
30569 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30570 offset += length - nelt_mode;
30571 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30572 /* We are shifting bytes back, set the alignment accordingly. */
30573 if ((length & 1) == 0)
30574 set_mem_align (mem, BITS_PER_UNIT * 2);
30576 set_mem_align (mem, BITS_PER_UNIT);
30578 emit_insn (gen_movmisalignv8qi (mem, reg));
30584 /* Set a block of memory using plain strh/strb instructions, only
30585 using instructions allowed by ALIGN on processor. We fill the
30586 first LENGTH bytes of the memory area starting from DSTBASE
30587 with byte constant VALUE. ALIGN is the alignment requirement
30590 arm_block_set_unaligned_non_vect (rtx dstbase,
30591 unsigned HOST_WIDE_INT length,
30592 unsigned HOST_WIDE_INT value,
30593 unsigned HOST_WIDE_INT align)
30596 rtx dst, addr, mem;
30597 rtx val_exp, val_reg, reg;
30599 HOST_WIDE_INT v = value;
30601 gcc_assert (align == 1 || align == 2);
30604 v |= (value << BITS_PER_UNIT);
30606 v = sext_hwi (v, BITS_PER_WORD);
30607 val_exp = GEN_INT (v);
30608 /* Skip if it isn't profitable. */
30609 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30610 align, true, false))
30613 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30614 mode = (align == 2 ? HImode : QImode);
30615 val_reg = force_reg (SImode, val_exp);
30616 reg = gen_lowpart (mode, val_reg);
30618 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30620 addr = plus_constant (Pmode, dst, i);
30621 mem = adjust_automodify_address (dstbase, mode, addr, i);
30622 emit_move_insn (mem, reg);
30625 /* Handle single byte leftover. */
30626 if (i + 1 == length)
30628 reg = gen_lowpart (QImode, val_reg);
30629 addr = plus_constant (Pmode, dst, i);
30630 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30631 emit_move_insn (mem, reg);
30635 gcc_assert (i == length);
30639 /* Set a block of memory using plain strd/str/strh/strb instructions,
30640 to permit unaligned copies on processors which support unaligned
30641 semantics for those instructions. We fill the first LENGTH bytes
30642 of the memory area starting from DSTBASE with byte constant VALUE.
30643 ALIGN is the alignment requirement of memory. */
30645 arm_block_set_aligned_non_vect (rtx dstbase,
30646 unsigned HOST_WIDE_INT length,
30647 unsigned HOST_WIDE_INT value,
30648 unsigned HOST_WIDE_INT align)
30651 rtx dst, addr, mem;
30652 rtx val_exp, val_reg, reg;
30653 unsigned HOST_WIDE_INT v;
30656 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30657 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30659 v = (value | (value << 8) | (value << 16) | (value << 24));
30660 if (length < UNITS_PER_WORD)
30661 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30664 v |= (v << BITS_PER_WORD);
30666 v = sext_hwi (v, BITS_PER_WORD);
30668 val_exp = GEN_INT (v);
30669 /* Skip if it isn't profitable. */
30670 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30671 align, false, use_strd_p))
30676 /* Try without strd. */
30677 v = (v >> BITS_PER_WORD);
30678 v = sext_hwi (v, BITS_PER_WORD);
30679 val_exp = GEN_INT (v);
30680 use_strd_p = false;
30681 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30682 align, false, use_strd_p))
30687 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30688 /* Handle double words using strd if possible. */
30691 val_reg = force_reg (DImode, val_exp);
30693 for (; (i + 8 <= length); i += 8)
30695 addr = plus_constant (Pmode, dst, i);
30696 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30697 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30698 emit_move_insn (mem, reg);
30700 emit_insn (gen_unaligned_storedi (mem, reg));
30704 val_reg = force_reg (SImode, val_exp);
30706 /* Handle words. */
30707 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30708 for (; (i + 4 <= length); i += 4)
30710 addr = plus_constant (Pmode, dst, i);
30711 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30712 if ((align & 3) == 0)
30713 emit_move_insn (mem, reg);
30715 emit_insn (gen_unaligned_storesi (mem, reg));
30718 /* Merge last pair of STRH and STRB into a STR if possible. */
30719 if (unaligned_access && i > 0 && (i + 3) == length)
30721 addr = plus_constant (Pmode, dst, i - 1);
30722 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30723 /* We are shifting one byte back, set the alignment accordingly. */
30724 if ((align & 1) == 0)
30725 set_mem_align (mem, BITS_PER_UNIT);
30727 /* Most likely this is an unaligned access, and we can't tell at
30728 compilation time. */
30729 emit_insn (gen_unaligned_storesi (mem, reg));
30733 /* Handle half word leftover. */
30734 if (i + 2 <= length)
30736 reg = gen_lowpart (HImode, val_reg);
30737 addr = plus_constant (Pmode, dst, i);
30738 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30739 if ((align & 1) == 0)
30740 emit_move_insn (mem, reg);
30742 emit_insn (gen_unaligned_storehi (mem, reg));
30747 /* Handle single byte leftover. */
30748 if (i + 1 == length)
30750 reg = gen_lowpart (QImode, val_reg);
30751 addr = plus_constant (Pmode, dst, i);
30752 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30753 emit_move_insn (mem, reg);
30759 /* Set a block of memory using vectorization instructions for both
30760 aligned and unaligned cases. We fill the first LENGTH bytes of
30761 the memory area starting from DSTBASE with byte constant VALUE.
30762 ALIGN is the alignment requirement of memory. */
30764 arm_block_set_vect (rtx dstbase,
30765 unsigned HOST_WIDE_INT length,
30766 unsigned HOST_WIDE_INT value,
30767 unsigned HOST_WIDE_INT align)
30769 /* Check whether we need to use unaligned store instruction. */
30770 if (((align & 3) != 0 || (length & 3) != 0)
30771 /* Check whether unaligned store instruction is available. */
30772 && (!unaligned_access || BYTES_BIG_ENDIAN))
30775 if ((align & 3) == 0)
30776 return arm_block_set_aligned_vect (dstbase, length, value, align);
30778 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30781 /* Expand string store operation. Firstly we try to do that by using
30782 vectorization instructions, then try with ARM unaligned access and
30783 double-word store if profitable. OPERANDS[0] is the destination,
30784 OPERANDS[1] is the number of bytes, operands[2] is the value to
30785 initialize the memory, OPERANDS[3] is the known alignment of the
30788 arm_gen_setmem (rtx *operands)
30790 rtx dstbase = operands[0];
30791 unsigned HOST_WIDE_INT length;
30792 unsigned HOST_WIDE_INT value;
30793 unsigned HOST_WIDE_INT align;
30795 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30798 length = UINTVAL (operands[1]);
30802 value = (UINTVAL (operands[2]) & 0xFF);
30803 align = UINTVAL (operands[3]);
30804 if (TARGET_NEON && length >= 8
30805 && current_tune->string_ops_prefer_neon
30806 && arm_block_set_vect (dstbase, length, value, align))
30809 if (!unaligned_access && (align & 3) != 0)
30810 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30812 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30817 arm_macro_fusion_p (void)
30819 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30822 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30823 for MOVW / MOVT macro fusion. */
30826 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30828 /* We are trying to fuse
30829 movw imm / movt imm
30830 instructions as a group that gets scheduled together. */
30832 rtx set_dest = SET_DEST (curr_set);
30834 if (GET_MODE (set_dest) != SImode)
30837 /* We are trying to match:
30838 prev (movw) == (set (reg r0) (const_int imm16))
30839 curr (movt) == (set (zero_extract (reg r0)
30842 (const_int imm16_1))
30844 prev (movw) == (set (reg r1)
30845 (high (symbol_ref ("SYM"))))
30846 curr (movt) == (set (reg r0)
30848 (symbol_ref ("SYM")))) */
30850 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30852 if (CONST_INT_P (SET_SRC (curr_set))
30853 && CONST_INT_P (SET_SRC (prev_set))
30854 && REG_P (XEXP (set_dest, 0))
30855 && REG_P (SET_DEST (prev_set))
30856 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30860 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30861 && REG_P (SET_DEST (curr_set))
30862 && REG_P (SET_DEST (prev_set))
30863 && GET_CODE (SET_SRC (prev_set)) == HIGH
30864 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30871 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30873 rtx prev_set = single_set (prev);
30874 rtx curr_set = single_set (curr);
30880 if (any_condjump_p (curr))
30883 if (!arm_macro_fusion_p ())
30886 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30887 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30893 /* Return true iff the instruction fusion described by OP is enabled. */
30895 arm_fusion_enabled_p (tune_params::fuse_ops op)
30897 return current_tune->fusible_ops & op;
30900 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30901 scheduled for speculative execution. Reject the long-running division
30902 and square-root instructions. */
30905 arm_sched_can_speculate_insn (rtx_insn *insn)
30907 switch (get_attr_type (insn))
30915 case TYPE_NEON_FP_SQRT_S:
30916 case TYPE_NEON_FP_SQRT_D:
30917 case TYPE_NEON_FP_SQRT_S_Q:
30918 case TYPE_NEON_FP_SQRT_D_Q:
30919 case TYPE_NEON_FP_DIV_S:
30920 case TYPE_NEON_FP_DIV_D:
30921 case TYPE_NEON_FP_DIV_S_Q:
30922 case TYPE_NEON_FP_DIV_D_Q:
30929 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30931 static unsigned HOST_WIDE_INT
30932 arm_asan_shadow_offset (void)
30934 return HOST_WIDE_INT_1U << 29;
30938 /* This is a temporary fix for PR60655. Ideally we need
30939 to handle most of these cases in the generic part but
30940 currently we reject minus (..) (sym_ref). We try to
30941 ameliorate the case with minus (sym_ref1) (sym_ref2)
30942 where they are in the same section. */
30945 arm_const_not_ok_for_debug_p (rtx p)
30947 tree decl_op0 = NULL;
30948 tree decl_op1 = NULL;
30950 if (GET_CODE (p) == UNSPEC)
30952 if (GET_CODE (p) == MINUS)
30954 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30956 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30958 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30959 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30961 if ((VAR_P (decl_op1)
30962 || TREE_CODE (decl_op1) == CONST_DECL)
30963 && (VAR_P (decl_op0)
30964 || TREE_CODE (decl_op0) == CONST_DECL))
30965 return (get_variable_section (decl_op1, false)
30966 != get_variable_section (decl_op0, false));
30968 if (TREE_CODE (decl_op1) == LABEL_DECL
30969 && TREE_CODE (decl_op0) == LABEL_DECL)
30970 return (DECL_CONTEXT (decl_op1)
30971 != DECL_CONTEXT (decl_op0));
30981 /* return TRUE if x is a reference to a value in a constant pool */
30983 arm_is_constant_pool_ref (rtx x)
30986 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30987 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30990 /* Remember the last target of arm_set_current_function. */
30991 static GTY(()) tree arm_previous_fndecl;
30993 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30996 save_restore_target_globals (tree new_tree)
30998 /* If we have a previous state, use it. */
30999 if (TREE_TARGET_GLOBALS (new_tree))
31000 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31001 else if (new_tree == target_option_default_node)
31002 restore_target_globals (&default_target_globals);
31005 /* Call target_reinit and save the state for TARGET_GLOBALS. */
31006 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31009 arm_option_params_internal ();
31012 /* Invalidate arm_previous_fndecl. */
31015 arm_reset_previous_fndecl (void)
31017 arm_previous_fndecl = NULL_TREE;
31020 /* Establish appropriate back-end context for processing the function
31021 FNDECL. The argument might be NULL to indicate processing at top
31022 level, outside of any function scope. */
31025 arm_set_current_function (tree fndecl)
31027 if (!fndecl || fndecl == arm_previous_fndecl)
31030 tree old_tree = (arm_previous_fndecl
31031 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
31034 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31036 /* If current function has no attributes but previous one did,
31037 use the default node. */
31038 if (! new_tree && old_tree)
31039 new_tree = target_option_default_node;
31041 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
31042 the default have been handled by save_restore_target_globals from
31043 arm_pragma_target_parse. */
31044 if (old_tree == new_tree)
31047 arm_previous_fndecl = fndecl;
31049 /* First set the target options. */
31050 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31052 save_restore_target_globals (new_tree);
31055 /* Implement TARGET_OPTION_PRINT. */
31058 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
31060 int flags = ptr->x_target_flags;
31061 const char *fpu_name;
31063 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
31064 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
31066 fprintf (file, "%*sselected isa %s\n", indent, "",
31067 TARGET_THUMB2_P (flags) ? "thumb2" :
31068 TARGET_THUMB_P (flags) ? "thumb1" :
31071 if (ptr->x_arm_arch_string)
31072 fprintf (file, "%*sselected architecture %s\n", indent, "",
31073 ptr->x_arm_arch_string);
31075 if (ptr->x_arm_cpu_string)
31076 fprintf (file, "%*sselected CPU %s\n", indent, "",
31077 ptr->x_arm_cpu_string);
31079 if (ptr->x_arm_tune_string)
31080 fprintf (file, "%*sselected tune %s\n", indent, "",
31081 ptr->x_arm_tune_string);
31083 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
31086 /* Hook to determine if one function can safely inline another. */
31089 arm_can_inline_p (tree caller, tree callee)
31091 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
31092 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
31093 bool can_inline = true;
31095 struct cl_target_option *caller_opts
31096 = TREE_TARGET_OPTION (caller_tree ? caller_tree
31097 : target_option_default_node);
31099 struct cl_target_option *callee_opts
31100 = TREE_TARGET_OPTION (callee_tree ? callee_tree
31101 : target_option_default_node);
31103 if (callee_opts == caller_opts)
31106 /* Callee's ISA features should be a subset of the caller's. */
31107 struct arm_build_target caller_target;
31108 struct arm_build_target callee_target;
31109 caller_target.isa = sbitmap_alloc (isa_num_bits);
31110 callee_target.isa = sbitmap_alloc (isa_num_bits);
31112 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
31114 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
31116 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
31117 can_inline = false;
31119 sbitmap_free (caller_target.isa);
31120 sbitmap_free (callee_target.isa);
31122 /* OK to inline between different modes.
31123 Function with mode specific instructions, e.g using asm,
31124 must be explicitly protected with noinline. */
31128 /* Hook to fix function's alignment affected by target attribute. */
31131 arm_relayout_function (tree fndecl)
31133 if (DECL_USER_ALIGN (fndecl))
31136 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31139 callee_tree = target_option_default_node;
31141 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
31144 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
31147 /* Inner function to process the attribute((target(...))), take an argument and
31148 set the current options from the argument. If we have a list, recursively
31149 go over the list. */
31152 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
31154 if (TREE_CODE (args) == TREE_LIST)
31158 for (; args; args = TREE_CHAIN (args))
31159 if (TREE_VALUE (args)
31160 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
31165 else if (TREE_CODE (args) != STRING_CST)
31167 error ("attribute %<target%> argument not a string");
31171 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
31174 while ((q = strtok (argstr, ",")) != NULL)
31177 if (!strcmp (q, "thumb"))
31178 opts->x_target_flags |= MASK_THUMB;
31180 else if (!strcmp (q, "arm"))
31181 opts->x_target_flags &= ~MASK_THUMB;
31183 else if (!strcmp (q, "general-regs-only"))
31184 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
31186 else if (!strncmp (q, "fpu=", 4))
31189 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
31190 &fpu_index, CL_TARGET))
31192 error ("invalid fpu for target attribute or pragma %qs", q);
31195 if (fpu_index == TARGET_FPU_auto)
31197 /* This doesn't really make sense until we support
31198 general dynamic selection of the architecture and all
31200 sorry ("auto fpu selection not currently permitted here");
31203 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
31205 else if (!strncmp (q, "arch=", 5))
31207 char *arch = q + 5;
31208 const arch_option *arm_selected_arch
31209 = arm_parse_arch_option_name (all_architectures, "arch", arch);
31211 if (!arm_selected_arch)
31213 error ("invalid architecture for target attribute or pragma %qs",
31218 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
31220 else if (q[0] == '+')
31222 opts->x_arm_arch_string
31223 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
31227 error ("unknown target attribute or pragma %qs", q);
31235 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
31238 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
31239 struct gcc_options *opts_set)
31241 struct cl_target_option cl_opts;
31243 if (!arm_valid_target_attribute_rec (args, opts))
31246 cl_target_option_save (&cl_opts, opts);
31247 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
31248 arm_option_check_internal (opts);
31249 /* Do any overrides, such as global options arch=xxx.
31250 We do this since arm_active_target was overridden. */
31251 arm_option_reconfigure_globals ();
31252 arm_options_perform_arch_sanity_checks ();
31253 arm_option_override_internal (opts, opts_set);
31255 return build_target_option_node (opts);
31259 add_attribute (const char * mode, tree *attributes)
31261 size_t len = strlen (mode);
31262 tree value = build_string (len, mode);
31264 TREE_TYPE (value) = build_array_type (char_type_node,
31265 build_index_type (size_int (len)));
31267 *attributes = tree_cons (get_identifier ("target"),
31268 build_tree_list (NULL_TREE, value),
31272 /* For testing. Insert thumb or arm modes alternatively on functions. */
31275 arm_insert_attributes (tree fndecl, tree * attributes)
31279 if (! TARGET_FLIP_THUMB)
31282 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
31283 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
31286 /* Nested definitions must inherit mode. */
31287 if (current_function_decl)
31289 mode = TARGET_THUMB ? "thumb" : "arm";
31290 add_attribute (mode, attributes);
31294 /* If there is already a setting don't change it. */
31295 if (lookup_attribute ("target", *attributes) != NULL)
31298 mode = thumb_flipper ? "thumb" : "arm";
31299 add_attribute (mode, attributes);
31301 thumb_flipper = !thumb_flipper;
31304 /* Hook to validate attribute((target("string"))). */
31307 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
31308 tree args, int ARG_UNUSED (flags))
31311 struct gcc_options func_options;
31312 tree cur_tree, new_optimize;
31313 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31315 /* Get the optimization options of the current function. */
31316 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31318 /* If the function changed the optimization levels as well as setting target
31319 options, start with the optimizations specified. */
31320 if (!func_optimize)
31321 func_optimize = optimization_default_node;
31323 /* Init func_options. */
31324 memset (&func_options, 0, sizeof (func_options));
31325 init_options_struct (&func_options, NULL);
31326 lang_hooks.init_options_struct (&func_options);
31328 /* Initialize func_options to the defaults. */
31329 cl_optimization_restore (&func_options,
31330 TREE_OPTIMIZATION (func_optimize));
31332 cl_target_option_restore (&func_options,
31333 TREE_TARGET_OPTION (target_option_default_node));
31335 /* Set func_options flags with new target mode. */
31336 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31337 &global_options_set);
31339 if (cur_tree == NULL_TREE)
31342 new_optimize = build_optimization_node (&func_options);
31344 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31346 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31348 finalize_options_struct (&func_options);
31353 /* Match an ISA feature bitmap to a named FPU. We always use the
31354 first entry that exactly matches the feature set, so that we
31355 effectively canonicalize the FPU name for the assembler. */
31357 arm_identify_fpu_from_isa (sbitmap isa)
31359 auto_sbitmap fpubits (isa_num_bits);
31360 auto_sbitmap cand_fpubits (isa_num_bits);
31362 bitmap_and (fpubits, isa, isa_all_fpubits);
31364 /* If there are no ISA feature bits relating to the FPU, we must be
31365 doing soft-float. */
31366 if (bitmap_empty_p (fpubits))
31369 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31371 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31372 if (bitmap_equal_p (fpubits, cand_fpubits))
31373 return all_fpus[i].name;
31375 /* We must find an entry, or things have gone wrong. */
31376 gcc_unreachable ();
31379 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31380 by the function fndecl. */
31382 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31384 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31386 struct cl_target_option *targ_options;
31388 targ_options = TREE_TARGET_OPTION (target_parts);
31390 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31391 gcc_assert (targ_options);
31393 /* Only update the assembler .arch string if it is distinct from the last
31394 such string we printed. arch_to_print is set conditionally in case
31395 targ_options->x_arm_arch_string is NULL which can be the case
31396 when cc1 is invoked directly without passing -march option. */
31397 std::string arch_to_print;
31398 if (targ_options->x_arm_arch_string)
31399 arch_to_print = targ_options->x_arm_arch_string;
31401 if (arch_to_print != arm_last_printed_arch_string)
31403 std::string arch_name
31404 = arch_to_print.substr (0, arch_to_print.find ("+"));
31405 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31406 const arch_option *arch
31407 = arm_parse_arch_option_name (all_architectures, "-march",
31408 targ_options->x_arm_arch_string);
31409 auto_sbitmap opt_bits (isa_num_bits);
31412 if (arch->common.extensions)
31414 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31420 arm_initialize_isa (opt_bits, opt->isa_bits);
31421 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31422 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31423 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31429 arm_last_printed_arch_string = arch_to_print;
31432 fprintf (stream, "\t.syntax unified\n");
31436 if (is_called_in_ARM_mode (decl)
31437 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31438 && cfun->is_thunk))
31439 fprintf (stream, "\t.code 32\n");
31440 else if (TARGET_THUMB1)
31441 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31443 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31446 fprintf (stream, "\t.arm\n");
31448 std::string fpu_to_print
31449 = TARGET_SOFT_FLOAT
31450 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31452 if (fpu_to_print != arm_last_printed_arch_string)
31454 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31455 arm_last_printed_fpu_string = fpu_to_print;
31458 if (TARGET_POKE_FUNCTION_NAME)
31459 arm_poke_function_name (stream, (const char *) name);
31462 /* If MEM is in the form of [base+offset], extract the two parts
31463 of address and set to BASE and OFFSET, otherwise return false
31464 after clearing BASE and OFFSET. */
31467 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31471 gcc_assert (MEM_P (mem));
31473 addr = XEXP (mem, 0);
31475 /* Strip off const from addresses like (const (addr)). */
31476 if (GET_CODE (addr) == CONST)
31477 addr = XEXP (addr, 0);
31479 if (GET_CODE (addr) == REG)
31482 *offset = const0_rtx;
31486 if (GET_CODE (addr) == PLUS
31487 && GET_CODE (XEXP (addr, 0)) == REG
31488 && CONST_INT_P (XEXP (addr, 1)))
31490 *base = XEXP (addr, 0);
31491 *offset = XEXP (addr, 1);
31496 *offset = NULL_RTX;
31501 /* If INSN is a load or store of address in the form of [base+offset],
31502 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31503 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31504 otherwise return FALSE. */
31507 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31511 gcc_assert (INSN_P (insn));
31512 x = PATTERN (insn);
31513 if (GET_CODE (x) != SET)
31517 dest = SET_DEST (x);
31518 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31521 extract_base_offset_in_addr (dest, base, offset);
31523 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31526 extract_base_offset_in_addr (src, base, offset);
31531 return (*base != NULL_RTX && *offset != NULL_RTX);
31534 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31536 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31537 and PRI are only calculated for these instructions. For other instruction,
31538 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31539 instruction fusion can be supported by returning different priorities.
31541 It's important that irrelevant instructions get the largest FUSION_PRI. */
31544 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31545 int *fusion_pri, int *pri)
31551 gcc_assert (INSN_P (insn));
31554 if (!fusion_load_store (insn, &base, &offset, &is_load))
31561 /* Load goes first. */
31563 *fusion_pri = tmp - 1;
31565 *fusion_pri = tmp - 2;
31569 /* INSN with smaller base register goes first. */
31570 tmp -= ((REGNO (base) & 0xff) << 20);
31572 /* INSN with smaller offset goes first. */
31573 off_val = (int)(INTVAL (offset));
31575 tmp -= (off_val & 0xfffff);
31577 tmp += ((- off_val) & 0xfffff);
31584 /* Construct and return a PARALLEL RTX vector with elements numbering the
31585 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31586 the vector - from the perspective of the architecture. This does not
31587 line up with GCC's perspective on lane numbers, so we end up with
31588 different masks depending on our target endian-ness. The diagram
31589 below may help. We must draw the distinction when building masks
31590 which select one half of the vector. An instruction selecting
31591 architectural low-lanes for a big-endian target, must be described using
31592 a mask selecting GCC high-lanes.
31594 Big-Endian Little-Endian
31596 GCC 0 1 2 3 3 2 1 0
31597 | x | x | x | x | | x | x | x | x |
31598 Architecture 3 2 1 0 3 2 1 0
31600 Low Mask: { 2, 3 } { 0, 1 }
31601 High Mask: { 0, 1 } { 2, 3 }
31605 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31607 int nunits = GET_MODE_NUNITS (mode);
31608 rtvec v = rtvec_alloc (nunits / 2);
31609 int high_base = nunits / 2;
31615 if (BYTES_BIG_ENDIAN)
31616 base = high ? low_base : high_base;
31618 base = high ? high_base : low_base;
31620 for (i = 0; i < nunits / 2; i++)
31621 RTVEC_ELT (v, i) = GEN_INT (base + i);
31623 t1 = gen_rtx_PARALLEL (mode, v);
31627 /* Check OP for validity as a PARALLEL RTX vector with elements
31628 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31629 from the perspective of the architecture. See the diagram above
31630 arm_simd_vect_par_cnst_half_p for more details. */
31633 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31636 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31637 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31638 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31641 if (!VECTOR_MODE_P (mode))
31644 if (count_op != count_ideal)
31647 for (i = 0; i < count_ideal; i++)
31649 rtx elt_op = XVECEXP (op, 0, i);
31650 rtx elt_ideal = XVECEXP (ideal, 0, i);
31652 if (!CONST_INT_P (elt_op)
31653 || INTVAL (elt_ideal) != INTVAL (elt_op))
31659 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31662 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31665 /* For now, we punt and not handle this for TARGET_THUMB1. */
31666 if (vcall_offset && TARGET_THUMB1)
31669 /* Otherwise ok. */
31673 /* Generate RTL for a conditional branch with rtx comparison CODE in
31674 mode CC_MODE. The destination of the unlikely conditional branch
31678 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31682 x = gen_rtx_fmt_ee (code, VOIDmode,
31683 gen_rtx_REG (cc_mode, CC_REGNUM),
31686 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31687 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31689 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31692 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31694 For pure-code sections there is no letter code for this attribute, so
31695 output all the section flags numerically when this is needed. */
31698 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31701 if (flags & SECTION_ARM_PURECODE)
31705 if (!(flags & SECTION_DEBUG))
31707 if (flags & SECTION_EXCLUDE)
31708 *num |= 0x80000000;
31709 if (flags & SECTION_WRITE)
31711 if (flags & SECTION_CODE)
31713 if (flags & SECTION_MERGE)
31715 if (flags & SECTION_STRINGS)
31717 if (flags & SECTION_TLS)
31719 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31728 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31730 If pure-code is passed as an option, make sure all functions are in
31731 sections that have the SHF_ARM_PURECODE attribute. */
31734 arm_function_section (tree decl, enum node_frequency freq,
31735 bool startup, bool exit)
31737 const char * section_name;
31740 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31741 return default_function_section (decl, freq, startup, exit);
31743 if (!target_pure_code)
31744 return default_function_section (decl, freq, startup, exit);
31747 section_name = DECL_SECTION_NAME (decl);
31749 /* If a function is not in a named section then it falls under the 'default'
31750 text section, also known as '.text'. We can preserve previous behavior as
31751 the default text section already has the SHF_ARM_PURECODE section
31755 section *default_sec = default_function_section (decl, freq, startup,
31758 /* If default_sec is not null, then it must be a special section like for
31759 example .text.startup. We set the pure-code attribute and return the
31760 same section to preserve existing behavior. */
31762 default_sec->common.flags |= SECTION_ARM_PURECODE;
31763 return default_sec;
31766 /* Otherwise look whether a section has already been created with
31768 sec = get_named_section (decl, section_name, 0);
31770 /* If that is not the case passing NULL as the section's name to
31771 'get_named_section' will create a section with the declaration's
31773 sec = get_named_section (decl, NULL, 0);
31775 /* Set the SHF_ARM_PURECODE attribute. */
31776 sec->common.flags |= SECTION_ARM_PURECODE;
31781 /* Implements the TARGET_SECTION_FLAGS hook.
31783 If DECL is a function declaration and pure-code is passed as an option
31784 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31785 section's name and RELOC indicates whether the declarations initializer may
31786 contain runtime relocations. */
31788 static unsigned int
31789 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31791 unsigned int flags = default_section_type_flags (decl, name, reloc);
31793 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31794 flags |= SECTION_ARM_PURECODE;
31799 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31802 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31804 rtx *quot_p, rtx *rem_p)
31806 if (mode == SImode)
31807 gcc_assert (!TARGET_IDIV);
31809 scalar_int_mode libval_mode
31810 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31812 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31814 op0, GET_MODE (op0),
31815 op1, GET_MODE (op1));
31817 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31818 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31819 GET_MODE_SIZE (mode));
31821 gcc_assert (quotient);
31822 gcc_assert (remainder);
31824 *quot_p = quotient;
31825 *rem_p = remainder;
31828 /* This function checks for the availability of the coprocessor builtin passed
31829 in BUILTIN for the current target. Returns true if it is available and
31830 false otherwise. If a BUILTIN is passed for which this function has not
31831 been implemented it will cause an exception. */
31834 arm_coproc_builtin_available (enum unspecv builtin)
31836 /* None of these builtins are available in Thumb mode if the target only
31837 supports Thumb-1. */
31855 case VUNSPEC_LDC2L:
31857 case VUNSPEC_STC2L:
31860 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31867 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31869 if (arm_arch6 || arm_arch5te)
31872 case VUNSPEC_MCRR2:
31873 case VUNSPEC_MRRC2:
31878 gcc_unreachable ();
31883 /* This function returns true if OP is a valid memory operand for the ldc and
31884 stc coprocessor instructions and false otherwise. */
31887 arm_coproc_ldc_stc_legitimate_address (rtx op)
31889 HOST_WIDE_INT range;
31890 /* Has to be a memory operand. */
31896 /* We accept registers. */
31900 switch GET_CODE (op)
31904 /* Or registers with an offset. */
31905 if (!REG_P (XEXP (op, 0)))
31910 /* The offset must be an immediate though. */
31911 if (!CONST_INT_P (op))
31914 range = INTVAL (op);
31916 /* Within the range of [-1020,1020]. */
31917 if (!IN_RANGE (range, -1020, 1020))
31920 /* And a multiple of 4. */
31921 return (range % 4) == 0;
31927 return REG_P (XEXP (op, 0));
31929 gcc_unreachable ();
31934 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31936 In VFPv1, VFP registers could only be accessed in the mode they were
31937 set, so subregs would be invalid there. However, we don't support
31938 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31940 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31941 VFP registers in little-endian order. We can't describe that accurately to
31942 GCC, so avoid taking subregs of such values.
31944 The only exception is going from a 128-bit to a 64-bit type. In that
31945 case the data layout happens to be consistent for big-endian, so we
31946 explicitly allow that case. */
31949 arm_can_change_mode_class (machine_mode from, machine_mode to,
31950 reg_class_t rclass)
31953 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31954 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31955 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31956 && reg_classes_intersect_p (VFP_REGS, rclass))
31961 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31962 strcpy from constants will be faster. */
31964 static HOST_WIDE_INT
31965 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31967 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31968 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31969 return MAX (align, BITS_PER_WORD * factor);
31973 /* Emit a speculation barrier on target architectures that do not have
31974 DSB/ISB directly. Such systems probably don't need a barrier
31975 themselves, but if the code is ever run on a later architecture, it
31976 might become a problem. */
31978 arm_emit_speculation_barrier_function ()
31980 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31984 namespace selftest {
31986 /* Scan the static data tables generated by parsecpu.awk looking for
31987 potential issues with the data. We primarily check for
31988 inconsistencies in the option extensions at present (extensions
31989 that duplicate others but aren't marked as aliases). Furthermore,
31990 for correct canonicalization later options must never be a subset
31991 of an earlier option. Any extension should also only specify other
31992 feature bits and never an architecture bit. The architecture is inferred
31993 from the declaration of the extension. */
31995 arm_test_cpu_arch_data (void)
31997 const arch_option *arch;
31998 const cpu_option *cpu;
31999 auto_sbitmap target_isa (isa_num_bits);
32000 auto_sbitmap isa1 (isa_num_bits);
32001 auto_sbitmap isa2 (isa_num_bits);
32003 for (arch = all_architectures; arch->common.name != NULL; ++arch)
32005 const cpu_arch_extension *ext1, *ext2;
32007 if (arch->common.extensions == NULL)
32010 arm_initialize_isa (target_isa, arch->common.isa_bits);
32012 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
32017 arm_initialize_isa (isa1, ext1->isa_bits);
32018 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32020 if (ext2->alias || ext1->remove != ext2->remove)
32023 arm_initialize_isa (isa2, ext2->isa_bits);
32024 /* If the option is a subset of the parent option, it doesn't
32025 add anything and so isn't useful. */
32026 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32028 /* If the extension specifies any architectural bits then
32029 disallow it. Extensions should only specify feature bits. */
32030 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32035 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
32037 const cpu_arch_extension *ext1, *ext2;
32039 if (cpu->common.extensions == NULL)
32042 arm_initialize_isa (target_isa, arch->common.isa_bits);
32044 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
32049 arm_initialize_isa (isa1, ext1->isa_bits);
32050 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32052 if (ext2->alias || ext1->remove != ext2->remove)
32055 arm_initialize_isa (isa2, ext2->isa_bits);
32056 /* If the option is a subset of the parent option, it doesn't
32057 add anything and so isn't useful. */
32058 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32060 /* If the extension specifies any architectural bits then
32061 disallow it. Extensions should only specify feature bits. */
32062 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32068 /* Scan the static data tables generated by parsecpu.awk looking for
32069 potential issues with the data. Here we check for consistency between the
32070 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32071 a feature bit that is not defined by any FPU flag. */
32073 arm_test_fpu_data (void)
32075 auto_sbitmap isa_all_fpubits (isa_num_bits);
32076 auto_sbitmap fpubits (isa_num_bits);
32077 auto_sbitmap tmpset (isa_num_bits);
32079 static const enum isa_feature fpu_bitlist[]
32080 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
32081 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
32083 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32085 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
32086 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
32087 bitmap_clear (isa_all_fpubits);
32088 bitmap_copy (isa_all_fpubits, tmpset);
32091 if (!bitmap_empty_p (isa_all_fpubits))
32093 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
32094 " group that are not defined by any FPU.\n"
32095 " Check your arm-cpus.in.\n");
32096 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
32101 arm_run_selftests (void)
32103 arm_test_cpu_arch_data ();
32104 arm_test_fpu_data ();
32106 } /* Namespace selftest. */
32108 #undef TARGET_RUN_TARGET_SELFTESTS
32109 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32110 #endif /* CHECKING_P */
32112 struct gcc_target targetm = TARGET_INITIALIZER;
32114 #include "gt-arm.h"