1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
64 #include "target-globals.h"
66 #include "tm-constrs.h"
68 #include "optabs-libfuncs.h"
73 /* This file should be included last. */
74 #include "target-def.h"
76 /* Forward definitions of types. */
77 typedef struct minipool_node Mnode;
78 typedef struct minipool_fixup Mfix;
80 /* The last .arch and .fpu assembly strings that we printed. */
81 static std::string arm_last_printed_arch_string;
82 static std::string arm_last_printed_fpu_string;
84 void (*arm_lang_output_object_attributes_hook)(void);
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx);
93 static int arm_needs_doubleword_align (machine_mode, const_tree);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets *arm_get_frame_offsets (void);
96 static void arm_compute_frame_layout (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
99 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned bitmap_popcount (const sbitmap);
102 static int arm_address_register_rtx_p (rtx, int);
103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
104 static bool is_called_in_ARM_mode (tree);
105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
110 inline static int thumb1_index_register_rtx_p (rtx, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx, int);
116 static void arm_print_operand_address (FILE *, machine_mode, rtx);
117 static bool arm_print_operand_punct_valid_p (unsigned char code);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
119 static arm_cc get_arm_condition_code (rtx);
120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
121 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 static const char *shift_op (rtx, HOST_WIDE_INT *);
124 static struct machine_function *arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_forward_ref (Mfix *);
129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_backward_ref (Mfix *);
131 static void assign_minipool_offsets (Mfix *);
132 static void arm_print_value (FILE *, rtx);
133 static void dump_minipool (rtx_insn *);
134 static int arm_barrier_cost (rtx_insn *);
135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_core_reg_mask (void);
143 static unsigned long arm_isr_value (tree);
144 static unsigned long arm_compute_func_type (void);
145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
152 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
153 static void arm_output_function_epilogue (FILE *);
154 static void arm_output_function_prologue (FILE *);
155 static int arm_comp_type_attributes (const_tree, const_tree);
156 static void arm_set_default_type_attributes (tree);
157 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
158 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
159 static int optimal_immediate_sequence (enum rtx_code code,
160 unsigned HOST_WIDE_INT val,
161 struct four_ints *return_sequence);
162 static int optimal_immediate_sequence_1 (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence,
166 static int arm_get_strip_length (int);
167 static bool arm_function_ok_for_sibcall (tree, tree);
168 static machine_mode arm_promote_function_mode (const_tree,
171 static bool arm_return_in_memory (const_tree, const_tree);
172 static rtx arm_function_value (const_tree, const_tree, bool);
173 static rtx arm_libcall_value_1 (machine_mode);
174 static rtx arm_libcall_value (machine_mode, const_rtx);
175 static bool arm_function_value_regno_p (const unsigned int);
176 static void arm_internal_label (FILE *, const char *, unsigned long);
177 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 static bool arm_have_conditional_execution (void);
180 static bool arm_cannot_force_const_mem (machine_mode, rtx);
181 static bool arm_legitimate_constant_p (machine_mode, rtx);
182 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
183 static int arm_insn_cost (rtx_insn *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
193 static void arm_function_arg_advance (cumulative_args_t,
194 const function_arg_info &);
195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 static rtx aapcs_libcall_value (machine_mode);
200 static int aapcs_select_return_coproc (const_tree, const_tree);
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
207 static void arm_encode_section_info (tree, rtx, int);
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree, tree *);
214 static void arm_setup_incoming_varargs (cumulative_args_t,
215 const function_arg_info &, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t,
217 const function_arg_info &);
218 static bool arm_promote_prototypes (const_tree);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree);
222 static bool arm_must_pass_in_stack (const function_arg_info &);
223 static bool arm_return_in_memory (const_tree, const_tree);
225 static void arm_unwind_emit (FILE *, rtx_insn *);
226 static bool arm_output_ttype (rtx);
227 static void arm_asm_emit_except_personality (rtx);
229 static void arm_asm_init_sections (void);
230 static rtx arm_dwarf_register_span (rtx);
232 static tree arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree arm_get_cookie_size (tree);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree, rtx);
244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
247 static void arm_option_restore (struct gcc_options *,
248 struct cl_target_option *);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option *);
251 static void arm_set_current_function (tree);
252 static bool arm_can_inline_p (tree, tree);
253 static void arm_relayout_function (tree);
254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
256 static bool arm_sched_can_speculate_insn (rtx_insn *);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn *);
259 static int arm_issue_rate (void);
260 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
309 enum vect_cost_model_location where);
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 vec<const char *> &, vec<rtx> &,
332 /* Table of machine attributes. */
333 static const struct attribute_spec arm_attribute_table[] =
335 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
336 affects_type_identity, handler, exclude } */
337 /* Function calls made to this symbol must be done indirectly, because
338 it may lie outside of the 26 bit addressing range of a normal function
340 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Whereas these functions are always known to reside within the 26 bit
343 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
344 /* Specify the procedure call conventions for a function. */
345 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
347 /* Interrupt Service Routines have special prologue and epilogue requirements. */
348 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
350 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
352 { "naked", 0, 0, true, false, false, false,
353 arm_handle_fndecl_attribute, NULL },
355 /* ARM/PE has three new attributes:
357 dllexport - for exporting a function/variable that will live in a dll
358 dllimport - for importing a function/variable from a dll
360 Microsoft allows multiple declspecs in one __declspec, separating
361 them with spaces. We do NOT support this. Instead, use __declspec
364 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
365 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
366 { "interfacearm", 0, 0, true, false, false, false,
367 arm_handle_fndecl_attribute, NULL },
368 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
369 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
371 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
373 { "notshared", 0, 0, false, true, false, false,
374 arm_handle_notshared_attribute, NULL },
376 /* ARMv8-M Security Extensions support. */
377 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
378 arm_handle_cmse_nonsecure_entry, NULL },
379 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
380 arm_handle_cmse_nonsecure_call, NULL },
381 { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
440 #undef TARGET_OPTION_SAVE
441 #define TARGET_OPTION_SAVE arm_option_save
443 #undef TARGET_OPTION_RESTORE
444 #define TARGET_OPTION_RESTORE arm_option_restore
446 #undef TARGET_OPTION_PRINT
447 #define TARGET_OPTION_PRINT arm_option_print
449 #undef TARGET_COMP_TYPE_ATTRIBUTES
450 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
452 #undef TARGET_SCHED_CAN_SPECULATE_INSN
453 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
455 #undef TARGET_SCHED_MACRO_FUSION_P
456 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
458 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
459 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
461 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
462 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
464 #undef TARGET_SCHED_ADJUST_COST
465 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
467 #undef TARGET_SET_CURRENT_FUNCTION
468 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
470 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
471 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
473 #undef TARGET_SCHED_REORDER
474 #define TARGET_SCHED_REORDER arm_sched_reorder
476 #undef TARGET_REGISTER_MOVE_COST
477 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
479 #undef TARGET_MEMORY_MOVE_COST
480 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
482 #undef TARGET_ENCODE_SECTION_INFO
484 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
486 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
489 #undef TARGET_STRIP_NAME_ENCODING
490 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
492 #undef TARGET_ASM_INTERNAL_LABEL
493 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
495 #undef TARGET_FLOATN_MODE
496 #define TARGET_FLOATN_MODE arm_floatn_mode
498 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
499 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
501 #undef TARGET_FUNCTION_VALUE
502 #define TARGET_FUNCTION_VALUE arm_function_value
504 #undef TARGET_LIBCALL_VALUE
505 #define TARGET_LIBCALL_VALUE arm_libcall_value
507 #undef TARGET_FUNCTION_VALUE_REGNO_P
508 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
510 #undef TARGET_ASM_OUTPUT_MI_THUNK
511 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
512 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
513 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
515 #undef TARGET_RTX_COSTS
516 #define TARGET_RTX_COSTS arm_rtx_costs
517 #undef TARGET_ADDRESS_COST
518 #define TARGET_ADDRESS_COST arm_address_cost
519 #undef TARGET_INSN_COST
520 #define TARGET_INSN_COST arm_insn_cost
522 #undef TARGET_SHIFT_TRUNCATION_MASK
523 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
524 #undef TARGET_VECTOR_MODE_SUPPORTED_P
525 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
526 #undef TARGET_ARRAY_MODE_SUPPORTED_P
527 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
528 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
529 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
530 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
531 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
532 arm_autovectorize_vector_modes
534 #undef TARGET_MACHINE_DEPENDENT_REORG
535 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
537 #undef TARGET_INIT_BUILTINS
538 #define TARGET_INIT_BUILTINS arm_init_builtins
539 #undef TARGET_EXPAND_BUILTIN
540 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
541 #undef TARGET_BUILTIN_DECL
542 #define TARGET_BUILTIN_DECL arm_builtin_decl
544 #undef TARGET_INIT_LIBFUNCS
545 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
547 #undef TARGET_PROMOTE_FUNCTION_MODE
548 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
549 #undef TARGET_PROMOTE_PROTOTYPES
550 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
551 #undef TARGET_PASS_BY_REFERENCE
552 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
553 #undef TARGET_ARG_PARTIAL_BYTES
554 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
555 #undef TARGET_FUNCTION_ARG
556 #define TARGET_FUNCTION_ARG arm_function_arg
557 #undef TARGET_FUNCTION_ARG_ADVANCE
558 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
559 #undef TARGET_FUNCTION_ARG_PADDING
560 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
561 #undef TARGET_FUNCTION_ARG_BOUNDARY
562 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
564 #undef TARGET_SETUP_INCOMING_VARARGS
565 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
567 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
568 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
570 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
571 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
572 #undef TARGET_TRAMPOLINE_INIT
573 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
574 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
575 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
577 #undef TARGET_WARN_FUNC_RETURN
578 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
580 #undef TARGET_DEFAULT_SHORT_ENUMS
581 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
583 #undef TARGET_ALIGN_ANON_BITFIELD
584 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
586 #undef TARGET_NARROW_VOLATILE_BITFIELD
587 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
589 #undef TARGET_CXX_GUARD_TYPE
590 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
592 #undef TARGET_CXX_GUARD_MASK_BIT
593 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
595 #undef TARGET_CXX_GET_COOKIE_SIZE
596 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
598 #undef TARGET_CXX_COOKIE_HAS_SIZE
599 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
601 #undef TARGET_CXX_CDTOR_RETURNS_THIS
602 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
604 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
605 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
607 #undef TARGET_CXX_USE_AEABI_ATEXIT
608 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
610 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
611 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
612 arm_cxx_determine_class_data_visibility
614 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
615 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
617 #undef TARGET_RETURN_IN_MSB
618 #define TARGET_RETURN_IN_MSB arm_return_in_msb
620 #undef TARGET_RETURN_IN_MEMORY
621 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
623 #undef TARGET_MUST_PASS_IN_STACK
624 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
627 #undef TARGET_ASM_UNWIND_EMIT
628 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
630 /* EABI unwinding tables use a different format for the typeinfo tables. */
631 #undef TARGET_ASM_TTYPE
632 #define TARGET_ASM_TTYPE arm_output_ttype
634 #undef TARGET_ARM_EABI_UNWINDER
635 #define TARGET_ARM_EABI_UNWINDER true
637 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
638 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
640 #endif /* ARM_UNWIND_INFO */
642 #undef TARGET_ASM_INIT_SECTIONS
643 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
645 #undef TARGET_DWARF_REGISTER_SPAN
646 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
648 #undef TARGET_CANNOT_COPY_INSN_P
649 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
652 #undef TARGET_HAVE_TLS
653 #define TARGET_HAVE_TLS true
656 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
657 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
659 #undef TARGET_LEGITIMATE_CONSTANT_P
660 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
662 #undef TARGET_CANNOT_FORCE_CONST_MEM
663 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
665 #undef TARGET_MAX_ANCHOR_OFFSET
666 #define TARGET_MAX_ANCHOR_OFFSET 4095
668 /* The minimum is set such that the total size of the block
669 for a particular anchor is -4088 + 1 + 4095 bytes, which is
670 divisible by eight, ensuring natural spacing of anchors. */
671 #undef TARGET_MIN_ANCHOR_OFFSET
672 #define TARGET_MIN_ANCHOR_OFFSET -4088
674 #undef TARGET_SCHED_ISSUE_RATE
675 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
677 #undef TARGET_SCHED_VARIABLE_ISSUE
678 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
680 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
681 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
682 arm_first_cycle_multipass_dfa_lookahead
684 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
685 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
686 arm_first_cycle_multipass_dfa_lookahead_guard
688 #undef TARGET_MANGLE_TYPE
689 #define TARGET_MANGLE_TYPE arm_mangle_type
691 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
692 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
694 #undef TARGET_BUILD_BUILTIN_VA_LIST
695 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
696 #undef TARGET_EXPAND_BUILTIN_VA_START
697 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
698 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
699 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
702 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
703 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
706 #undef TARGET_LEGITIMATE_ADDRESS_P
707 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
709 #undef TARGET_PREFERRED_RELOAD_CLASS
710 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
712 #undef TARGET_PROMOTED_TYPE
713 #define TARGET_PROMOTED_TYPE arm_promoted_type
715 #undef TARGET_SCALAR_MODE_SUPPORTED_P
716 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
718 #undef TARGET_COMPUTE_FRAME_LAYOUT
719 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
721 #undef TARGET_FRAME_POINTER_REQUIRED
722 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
724 #undef TARGET_CAN_ELIMINATE
725 #define TARGET_CAN_ELIMINATE arm_can_eliminate
727 #undef TARGET_CONDITIONAL_REGISTER_USAGE
728 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
730 #undef TARGET_CLASS_LIKELY_SPILLED_P
731 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
733 #undef TARGET_VECTORIZE_BUILTINS
734 #define TARGET_VECTORIZE_BUILTINS
736 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
737 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
738 arm_builtin_vectorized_function
740 #undef TARGET_VECTOR_ALIGNMENT
741 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
743 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
744 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
745 arm_vector_alignment_reachable
747 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
748 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
749 arm_builtin_support_vector_misalignment
751 #undef TARGET_PREFERRED_RENAME_CLASS
752 #define TARGET_PREFERRED_RENAME_CLASS \
753 arm_preferred_rename_class
755 #undef TARGET_VECTORIZE_VEC_PERM_CONST
756 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
758 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
759 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
760 arm_builtin_vectorization_cost
761 #undef TARGET_VECTORIZE_ADD_STMT_COST
762 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
764 #undef TARGET_CANONICALIZE_COMPARISON
765 #define TARGET_CANONICALIZE_COMPARISON \
766 arm_canonicalize_comparison
768 #undef TARGET_ASAN_SHADOW_OFFSET
769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
771 #undef MAX_INSN_PER_IT_BLOCK
772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
774 #undef TARGET_CAN_USE_DOLOOP_P
775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
783 #undef TARGET_SCHED_FUSION_PRIORITY
784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
786 #undef TARGET_ASM_FUNCTION_SECTION
787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
792 #undef TARGET_SECTION_TYPE_FLAGS
793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
798 #undef TARGET_C_EXCESS_PRECISION
799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
801 /* Although the architecture reserves bits 0 and 1, only the former is
802 used for ARM/Thumb ISA selection in v7 and earlier versions. */
803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
806 #undef TARGET_FIXED_CONDITION_CODE_REGS
807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
809 #undef TARGET_HARD_REGNO_NREGS
810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
811 #undef TARGET_HARD_REGNO_MODE_OK
812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
814 #undef TARGET_MODES_TIEABLE_P
815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
817 #undef TARGET_CAN_CHANGE_MODE_CLASS
818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
820 #undef TARGET_CONSTANT_ALIGNMENT
821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
823 #undef TARGET_MD_ASM_ADJUST
824 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
826 /* Obstack for minipool constant handling. */
827 static struct obstack minipool_obstack;
828 static char * minipool_startobj;
830 /* The maximum number of insns skipped which
831 will be conditionalised if possible. */
832 static int max_insns_skipped = 5;
834 extern FILE * asm_out_file;
836 /* True if we are currently building a constant table. */
837 int making_const_table;
839 /* The processor for which instructions should be scheduled. */
840 enum processor_type arm_tune = TARGET_CPU_arm_none;
842 /* The current tuning set. */
843 const struct tune_params *current_tune;
845 /* Which floating point hardware to schedule for. */
848 /* Used for Thumb call_via trampolines. */
849 rtx thumb_call_via_label[14];
850 static int thumb_call_reg_needed;
852 /* The bits in this mask specify which instruction scheduling options should
854 unsigned int tune_flags = 0;
856 /* The highest ARM architecture version supported by the
858 enum base_architecture arm_base_arch = BASE_ARCH_0;
860 /* Active target architecture and tuning. */
862 struct arm_build_target arm_active_target;
864 /* The following are used in the arm.md file as equivalents to bits
865 in the above two flag variables. */
867 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
870 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
873 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
876 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
879 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
882 /* Nonzero if this chip supports the ARM 6K extensions. */
885 /* Nonzero if this chip supports the ARM 6KZ extensions. */
888 /* Nonzero if instructions present in ARMv6-M can be used. */
891 /* Nonzero if this chip supports the ARM 7 extensions. */
894 /* Nonzero if this chip supports the Large Physical Address Extension. */
895 int arm_arch_lpae = 0;
897 /* Nonzero if instructions not present in the 'M' profile can be used. */
898 int arm_arch_notm = 0;
900 /* Nonzero if instructions present in ARMv7E-M can be used. */
903 /* Nonzero if instructions present in ARMv8 can be used. */
906 /* Nonzero if this chip supports the ARMv8.1 extensions. */
909 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
912 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
915 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
918 /* Nonzero if this chip supports the FP16 instructions extension of ARM
920 int arm_fp16_inst = 0;
922 /* Nonzero if this chip can benefit from load scheduling. */
923 int arm_ld_sched = 0;
925 /* Nonzero if this chip is a StrongARM. */
926 int arm_tune_strongarm = 0;
928 /* Nonzero if this chip supports Intel Wireless MMX technology. */
929 int arm_arch_iwmmxt = 0;
931 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
932 int arm_arch_iwmmxt2 = 0;
934 /* Nonzero if this chip is an XScale. */
935 int arm_arch_xscale = 0;
937 /* Nonzero if tuning for XScale */
938 int arm_tune_xscale = 0;
940 /* Nonzero if we want to tune for stores that access the write-buffer.
941 This typically means an ARM6 or ARM7 with MMU or MPU. */
942 int arm_tune_wbuf = 0;
944 /* Nonzero if tuning for Cortex-A9. */
945 int arm_tune_cortex_a9 = 0;
947 /* Nonzero if we should define __THUMB_INTERWORK__ in the
949 XXX This is a bit of a hack, it's intended to help work around
950 problems in GLD which doesn't understand that armv5t code is
951 interworking clean. */
952 int arm_cpp_interwork = 0;
954 /* Nonzero if chip supports Thumb 1. */
957 /* Nonzero if chip supports Thumb 2. */
960 /* Nonzero if chip supports integer division instruction. */
961 int arm_arch_arm_hwdiv;
962 int arm_arch_thumb_hwdiv;
964 /* Nonzero if chip disallows volatile memory access in IT block. */
965 int arm_arch_no_volatile_ce;
967 /* Nonzero if we shouldn't use literal pools. */
968 bool arm_disable_literal_pool = false;
970 /* The register number to be used for the PIC offset register. */
971 unsigned arm_pic_register = INVALID_REGNUM;
973 enum arm_pcs arm_pcs_default;
975 /* For an explanation of these variables, see final_prescan_insn below. */
977 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
978 enum arm_cond_code arm_current_cc;
981 int arm_target_label;
982 /* The number of conditionally executed insns, including the current insn. */
983 int arm_condexec_count = 0;
984 /* A bitmask specifying the patterns for the IT block.
985 Zero means do not output an IT block before this insn. */
986 int arm_condexec_mask = 0;
987 /* The number of bits used in arm_condexec_mask. */
988 int arm_condexec_masklen = 0;
990 /* Nonzero if chip supports the ARMv8 CRC instructions. */
991 int arm_arch_crc = 0;
993 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
994 int arm_arch_dotprod = 0;
996 /* Nonzero if chip supports the ARMv8-M security extensions. */
997 int arm_arch_cmse = 0;
999 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1000 int arm_m_profile_small_mul = 0;
1002 /* The condition codes of the ARM, and the inverse function. */
1003 static const char * const arm_condition_codes[] =
1005 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1006 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1009 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1010 int arm_regs_in_sequence[] =
1012 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1015 #define ARM_LSL_NAME "lsl"
1016 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1018 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1019 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1020 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1022 /* Initialization code. */
1026 enum processor_type scheduler;
1027 unsigned int tune_flags;
1028 const struct tune_params *tune;
1031 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1032 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1039 /* arm generic vectorizer costs. */
1041 struct cpu_vec_costs arm_default_vec_cost = {
1042 1, /* scalar_stmt_cost. */
1043 1, /* scalar load_cost. */
1044 1, /* scalar_store_cost. */
1045 1, /* vec_stmt_cost. */
1046 1, /* vec_to_scalar_cost. */
1047 1, /* scalar_to_vec_cost. */
1048 1, /* vec_align_load_cost. */
1049 1, /* vec_unalign_load_cost. */
1050 1, /* vec_unalign_store_cost. */
1051 1, /* vec_store_cost. */
1052 3, /* cond_taken_branch_cost. */
1053 1, /* cond_not_taken_branch_cost. */
1056 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1057 #include "aarch-cost-tables.h"
1061 const struct cpu_cost_table cortexa9_extra_costs =
1068 COSTS_N_INSNS (1), /* shift_reg. */
1069 COSTS_N_INSNS (1), /* arith_shift. */
1070 COSTS_N_INSNS (2), /* arith_shift_reg. */
1072 COSTS_N_INSNS (1), /* log_shift_reg. */
1073 COSTS_N_INSNS (1), /* extend. */
1074 COSTS_N_INSNS (2), /* extend_arith. */
1075 COSTS_N_INSNS (1), /* bfi. */
1076 COSTS_N_INSNS (1), /* bfx. */
1080 true /* non_exec_costs_exec. */
1085 COSTS_N_INSNS (3), /* simple. */
1086 COSTS_N_INSNS (3), /* flag_setting. */
1087 COSTS_N_INSNS (2), /* extend. */
1088 COSTS_N_INSNS (3), /* add. */
1089 COSTS_N_INSNS (2), /* extend_add. */
1090 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1094 0, /* simple (N/A). */
1095 0, /* flag_setting (N/A). */
1096 COSTS_N_INSNS (4), /* extend. */
1098 COSTS_N_INSNS (4), /* extend_add. */
1104 COSTS_N_INSNS (2), /* load. */
1105 COSTS_N_INSNS (2), /* load_sign_extend. */
1106 COSTS_N_INSNS (2), /* ldrd. */
1107 COSTS_N_INSNS (2), /* ldm_1st. */
1108 1, /* ldm_regs_per_insn_1st. */
1109 2, /* ldm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (5), /* loadf. */
1111 COSTS_N_INSNS (5), /* loadd. */
1112 COSTS_N_INSNS (1), /* load_unaligned. */
1113 COSTS_N_INSNS (2), /* store. */
1114 COSTS_N_INSNS (2), /* strd. */
1115 COSTS_N_INSNS (2), /* stm_1st. */
1116 1, /* stm_regs_per_insn_1st. */
1117 2, /* stm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* storef. */
1119 COSTS_N_INSNS (1), /* stored. */
1120 COSTS_N_INSNS (1), /* store_unaligned. */
1121 COSTS_N_INSNS (1), /* loadv. */
1122 COSTS_N_INSNS (1) /* storev. */
1127 COSTS_N_INSNS (14), /* div. */
1128 COSTS_N_INSNS (4), /* mult. */
1129 COSTS_N_INSNS (7), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1143 COSTS_N_INSNS (24), /* div. */
1144 COSTS_N_INSNS (5), /* mult. */
1145 COSTS_N_INSNS (8), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (3), /* addsub. */
1148 COSTS_N_INSNS (1), /* fpconst. */
1149 COSTS_N_INSNS (1), /* neg. */
1150 COSTS_N_INSNS (3), /* compare. */
1151 COSTS_N_INSNS (3), /* widen. */
1152 COSTS_N_INSNS (3), /* narrow. */
1153 COSTS_N_INSNS (3), /* toint. */
1154 COSTS_N_INSNS (3), /* fromint. */
1155 COSTS_N_INSNS (3) /* roundint. */
1160 COSTS_N_INSNS (1) /* alu. */
1164 const struct cpu_cost_table cortexa8_extra_costs =
1170 COSTS_N_INSNS (1), /* shift. */
1172 COSTS_N_INSNS (1), /* arith_shift. */
1173 0, /* arith_shift_reg. */
1174 COSTS_N_INSNS (1), /* log_shift. */
1175 0, /* log_shift_reg. */
1177 0, /* extend_arith. */
1183 true /* non_exec_costs_exec. */
1188 COSTS_N_INSNS (1), /* simple. */
1189 COSTS_N_INSNS (1), /* flag_setting. */
1190 COSTS_N_INSNS (1), /* extend. */
1191 COSTS_N_INSNS (1), /* add. */
1192 COSTS_N_INSNS (1), /* extend_add. */
1193 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1197 0, /* simple (N/A). */
1198 0, /* flag_setting (N/A). */
1199 COSTS_N_INSNS (2), /* extend. */
1201 COSTS_N_INSNS (2), /* extend_add. */
1207 COSTS_N_INSNS (1), /* load. */
1208 COSTS_N_INSNS (1), /* load_sign_extend. */
1209 COSTS_N_INSNS (1), /* ldrd. */
1210 COSTS_N_INSNS (1), /* ldm_1st. */
1211 1, /* ldm_regs_per_insn_1st. */
1212 2, /* ldm_regs_per_insn_subsequent. */
1213 COSTS_N_INSNS (1), /* loadf. */
1214 COSTS_N_INSNS (1), /* loadd. */
1215 COSTS_N_INSNS (1), /* load_unaligned. */
1216 COSTS_N_INSNS (1), /* store. */
1217 COSTS_N_INSNS (1), /* strd. */
1218 COSTS_N_INSNS (1), /* stm_1st. */
1219 1, /* stm_regs_per_insn_1st. */
1220 2, /* stm_regs_per_insn_subsequent. */
1221 COSTS_N_INSNS (1), /* storef. */
1222 COSTS_N_INSNS (1), /* stored. */
1223 COSTS_N_INSNS (1), /* store_unaligned. */
1224 COSTS_N_INSNS (1), /* loadv. */
1225 COSTS_N_INSNS (1) /* storev. */
1230 COSTS_N_INSNS (36), /* div. */
1231 COSTS_N_INSNS (11), /* mult. */
1232 COSTS_N_INSNS (20), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (4), /* widen. */
1239 COSTS_N_INSNS (4), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1246 COSTS_N_INSNS (64), /* div. */
1247 COSTS_N_INSNS (16), /* mult. */
1248 COSTS_N_INSNS (25), /* mult_addsub. */
1249 COSTS_N_INSNS (30), /* fma. */
1250 COSTS_N_INSNS (9), /* addsub. */
1251 COSTS_N_INSNS (3), /* fpconst. */
1252 COSTS_N_INSNS (3), /* neg. */
1253 COSTS_N_INSNS (6), /* compare. */
1254 COSTS_N_INSNS (6), /* widen. */
1255 COSTS_N_INSNS (6), /* narrow. */
1256 COSTS_N_INSNS (8), /* toint. */
1257 COSTS_N_INSNS (8), /* fromint. */
1258 COSTS_N_INSNS (8) /* roundint. */
1263 COSTS_N_INSNS (1) /* alu. */
1267 const struct cpu_cost_table cortexa5_extra_costs =
1273 COSTS_N_INSNS (1), /* shift. */
1274 COSTS_N_INSNS (1), /* shift_reg. */
1275 COSTS_N_INSNS (1), /* arith_shift. */
1276 COSTS_N_INSNS (1), /* arith_shift_reg. */
1277 COSTS_N_INSNS (1), /* log_shift. */
1278 COSTS_N_INSNS (1), /* log_shift_reg. */
1279 COSTS_N_INSNS (1), /* extend. */
1280 COSTS_N_INSNS (1), /* extend_arith. */
1281 COSTS_N_INSNS (1), /* bfi. */
1282 COSTS_N_INSNS (1), /* bfx. */
1283 COSTS_N_INSNS (1), /* clz. */
1284 COSTS_N_INSNS (1), /* rev. */
1286 true /* non_exec_costs_exec. */
1293 COSTS_N_INSNS (1), /* flag_setting. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* add. */
1296 COSTS_N_INSNS (1), /* extend_add. */
1297 COSTS_N_INSNS (7) /* idiv. */
1301 0, /* simple (N/A). */
1302 0, /* flag_setting (N/A). */
1303 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (2), /* extend_add. */
1311 COSTS_N_INSNS (1), /* load. */
1312 COSTS_N_INSNS (1), /* load_sign_extend. */
1313 COSTS_N_INSNS (6), /* ldrd. */
1314 COSTS_N_INSNS (1), /* ldm_1st. */
1315 1, /* ldm_regs_per_insn_1st. */
1316 2, /* ldm_regs_per_insn_subsequent. */
1317 COSTS_N_INSNS (2), /* loadf. */
1318 COSTS_N_INSNS (4), /* loadd. */
1319 COSTS_N_INSNS (1), /* load_unaligned. */
1320 COSTS_N_INSNS (1), /* store. */
1321 COSTS_N_INSNS (3), /* strd. */
1322 COSTS_N_INSNS (1), /* stm_1st. */
1323 1, /* stm_regs_per_insn_1st. */
1324 2, /* stm_regs_per_insn_subsequent. */
1325 COSTS_N_INSNS (2), /* storef. */
1326 COSTS_N_INSNS (2), /* stored. */
1327 COSTS_N_INSNS (1), /* store_unaligned. */
1328 COSTS_N_INSNS (1), /* loadv. */
1329 COSTS_N_INSNS (1) /* storev. */
1334 COSTS_N_INSNS (15), /* div. */
1335 COSTS_N_INSNS (3), /* mult. */
1336 COSTS_N_INSNS (7), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1350 COSTS_N_INSNS (30), /* div. */
1351 COSTS_N_INSNS (6), /* mult. */
1352 COSTS_N_INSNS (10), /* mult_addsub. */
1353 COSTS_N_INSNS (7), /* fma. */
1354 COSTS_N_INSNS (3), /* addsub. */
1355 COSTS_N_INSNS (3), /* fpconst. */
1356 COSTS_N_INSNS (3), /* neg. */
1357 COSTS_N_INSNS (3), /* compare. */
1358 COSTS_N_INSNS (3), /* widen. */
1359 COSTS_N_INSNS (3), /* narrow. */
1360 COSTS_N_INSNS (3), /* toint. */
1361 COSTS_N_INSNS (3), /* fromint. */
1362 COSTS_N_INSNS (3) /* roundint. */
1367 COSTS_N_INSNS (1) /* alu. */
1372 const struct cpu_cost_table cortexa7_extra_costs =
1378 COSTS_N_INSNS (1), /* shift. */
1379 COSTS_N_INSNS (1), /* shift_reg. */
1380 COSTS_N_INSNS (1), /* arith_shift. */
1381 COSTS_N_INSNS (1), /* arith_shift_reg. */
1382 COSTS_N_INSNS (1), /* log_shift. */
1383 COSTS_N_INSNS (1), /* log_shift_reg. */
1384 COSTS_N_INSNS (1), /* extend. */
1385 COSTS_N_INSNS (1), /* extend_arith. */
1386 COSTS_N_INSNS (1), /* bfi. */
1387 COSTS_N_INSNS (1), /* bfx. */
1388 COSTS_N_INSNS (1), /* clz. */
1389 COSTS_N_INSNS (1), /* rev. */
1391 true /* non_exec_costs_exec. */
1398 COSTS_N_INSNS (1), /* flag_setting. */
1399 COSTS_N_INSNS (1), /* extend. */
1400 COSTS_N_INSNS (1), /* add. */
1401 COSTS_N_INSNS (1), /* extend_add. */
1402 COSTS_N_INSNS (7) /* idiv. */
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (1), /* extend. */
1410 COSTS_N_INSNS (2), /* extend_add. */
1416 COSTS_N_INSNS (1), /* load. */
1417 COSTS_N_INSNS (1), /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (1), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 2, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (2), /* loadf. */
1423 COSTS_N_INSNS (2), /* loadd. */
1424 COSTS_N_INSNS (1), /* load_unaligned. */
1425 COSTS_N_INSNS (1), /* store. */
1426 COSTS_N_INSNS (3), /* strd. */
1427 COSTS_N_INSNS (1), /* stm_1st. */
1428 1, /* stm_regs_per_insn_1st. */
1429 2, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (2), /* stored. */
1432 COSTS_N_INSNS (1), /* store_unaligned. */
1433 COSTS_N_INSNS (1), /* loadv. */
1434 COSTS_N_INSNS (1) /* storev. */
1439 COSTS_N_INSNS (15), /* div. */
1440 COSTS_N_INSNS (3), /* mult. */
1441 COSTS_N_INSNS (7), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1455 COSTS_N_INSNS (30), /* div. */
1456 COSTS_N_INSNS (6), /* mult. */
1457 COSTS_N_INSNS (10), /* mult_addsub. */
1458 COSTS_N_INSNS (7), /* fma. */
1459 COSTS_N_INSNS (3), /* addsub. */
1460 COSTS_N_INSNS (3), /* fpconst. */
1461 COSTS_N_INSNS (3), /* neg. */
1462 COSTS_N_INSNS (3), /* compare. */
1463 COSTS_N_INSNS (3), /* widen. */
1464 COSTS_N_INSNS (3), /* narrow. */
1465 COSTS_N_INSNS (3), /* toint. */
1466 COSTS_N_INSNS (3), /* fromint. */
1467 COSTS_N_INSNS (3) /* roundint. */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table cortexa12_extra_costs =
1483 COSTS_N_INSNS (1), /* shift_reg. */
1484 COSTS_N_INSNS (1), /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 COSTS_N_INSNS (1), /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1491 COSTS_N_INSNS (1), /* bfx. */
1492 COSTS_N_INSNS (1), /* clz. */
1493 COSTS_N_INSNS (1), /* rev. */
1495 true /* non_exec_costs_exec. */
1500 COSTS_N_INSNS (2), /* simple. */
1501 COSTS_N_INSNS (3), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (3), /* add. */
1504 COSTS_N_INSNS (2), /* extend_add. */
1505 COSTS_N_INSNS (18) /* idiv. */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (3), /* extend. */
1513 COSTS_N_INSNS (3), /* extend_add. */
1519 COSTS_N_INSNS (3), /* load. */
1520 COSTS_N_INSNS (3), /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (3), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 2, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (3), /* loadf. */
1526 COSTS_N_INSNS (3), /* loadd. */
1527 0, /* load_unaligned. */
1531 1, /* stm_regs_per_insn_1st. */
1532 2, /* stm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (2), /* storef. */
1534 COSTS_N_INSNS (2), /* stored. */
1535 0, /* store_unaligned. */
1536 COSTS_N_INSNS (1), /* loadv. */
1537 COSTS_N_INSNS (1) /* storev. */
1542 COSTS_N_INSNS (17), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1558 COSTS_N_INSNS (31), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (2), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1575 COSTS_N_INSNS (1) /* alu. */
1579 const struct cpu_cost_table cortexa15_extra_costs =
1587 COSTS_N_INSNS (1), /* arith_shift. */
1588 COSTS_N_INSNS (1), /* arith_shift_reg. */
1589 COSTS_N_INSNS (1), /* log_shift. */
1590 COSTS_N_INSNS (1), /* log_shift_reg. */
1592 COSTS_N_INSNS (1), /* extend_arith. */
1593 COSTS_N_INSNS (1), /* bfi. */
1598 true /* non_exec_costs_exec. */
1603 COSTS_N_INSNS (2), /* simple. */
1604 COSTS_N_INSNS (3), /* flag_setting. */
1605 COSTS_N_INSNS (2), /* extend. */
1606 COSTS_N_INSNS (2), /* add. */
1607 COSTS_N_INSNS (2), /* extend_add. */
1608 COSTS_N_INSNS (18) /* idiv. */
1612 0, /* simple (N/A). */
1613 0, /* flag_setting (N/A). */
1614 COSTS_N_INSNS (3), /* extend. */
1616 COSTS_N_INSNS (3), /* extend_add. */
1622 COSTS_N_INSNS (3), /* load. */
1623 COSTS_N_INSNS (3), /* load_sign_extend. */
1624 COSTS_N_INSNS (3), /* ldrd. */
1625 COSTS_N_INSNS (4), /* ldm_1st. */
1626 1, /* ldm_regs_per_insn_1st. */
1627 2, /* ldm_regs_per_insn_subsequent. */
1628 COSTS_N_INSNS (4), /* loadf. */
1629 COSTS_N_INSNS (4), /* loadd. */
1630 0, /* load_unaligned. */
1633 COSTS_N_INSNS (1), /* stm_1st. */
1634 1, /* stm_regs_per_insn_1st. */
1635 2, /* stm_regs_per_insn_subsequent. */
1638 0, /* store_unaligned. */
1639 COSTS_N_INSNS (1), /* loadv. */
1640 COSTS_N_INSNS (1) /* storev. */
1645 COSTS_N_INSNS (17), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (5), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1661 COSTS_N_INSNS (31), /* div. */
1662 COSTS_N_INSNS (4), /* mult. */
1663 COSTS_N_INSNS (8), /* mult_addsub. */
1664 COSTS_N_INSNS (8), /* fma. */
1665 COSTS_N_INSNS (4), /* addsub. */
1666 COSTS_N_INSNS (2), /* fpconst. */
1667 COSTS_N_INSNS (2), /* neg. */
1668 COSTS_N_INSNS (2), /* compare. */
1669 COSTS_N_INSNS (4), /* widen. */
1670 COSTS_N_INSNS (4), /* narrow. */
1671 COSTS_N_INSNS (4), /* toint. */
1672 COSTS_N_INSNS (4), /* fromint. */
1673 COSTS_N_INSNS (4) /* roundint. */
1678 COSTS_N_INSNS (1) /* alu. */
1682 const struct cpu_cost_table v7m_extra_costs =
1690 0, /* arith_shift. */
1691 COSTS_N_INSNS (1), /* arith_shift_reg. */
1693 COSTS_N_INSNS (1), /* log_shift_reg. */
1695 COSTS_N_INSNS (1), /* extend_arith. */
1700 COSTS_N_INSNS (1), /* non_exec. */
1701 false /* non_exec_costs_exec. */
1706 COSTS_N_INSNS (1), /* simple. */
1707 COSTS_N_INSNS (1), /* flag_setting. */
1708 COSTS_N_INSNS (2), /* extend. */
1709 COSTS_N_INSNS (1), /* add. */
1710 COSTS_N_INSNS (3), /* extend_add. */
1711 COSTS_N_INSNS (8) /* idiv. */
1715 0, /* simple (N/A). */
1716 0, /* flag_setting (N/A). */
1717 COSTS_N_INSNS (2), /* extend. */
1719 COSTS_N_INSNS (3), /* extend_add. */
1725 COSTS_N_INSNS (2), /* load. */
1726 0, /* load_sign_extend. */
1727 COSTS_N_INSNS (3), /* ldrd. */
1728 COSTS_N_INSNS (2), /* ldm_1st. */
1729 1, /* ldm_regs_per_insn_1st. */
1730 1, /* ldm_regs_per_insn_subsequent. */
1731 COSTS_N_INSNS (2), /* loadf. */
1732 COSTS_N_INSNS (3), /* loadd. */
1733 COSTS_N_INSNS (1), /* load_unaligned. */
1734 COSTS_N_INSNS (2), /* store. */
1735 COSTS_N_INSNS (3), /* strd. */
1736 COSTS_N_INSNS (2), /* stm_1st. */
1737 1, /* stm_regs_per_insn_1st. */
1738 1, /* stm_regs_per_insn_subsequent. */
1739 COSTS_N_INSNS (2), /* storef. */
1740 COSTS_N_INSNS (3), /* stored. */
1741 COSTS_N_INSNS (1), /* store_unaligned. */
1742 COSTS_N_INSNS (1), /* loadv. */
1743 COSTS_N_INSNS (1) /* storev. */
1748 COSTS_N_INSNS (7), /* div. */
1749 COSTS_N_INSNS (2), /* mult. */
1750 COSTS_N_INSNS (5), /* mult_addsub. */
1751 COSTS_N_INSNS (3), /* fma. */
1752 COSTS_N_INSNS (1), /* addsub. */
1764 COSTS_N_INSNS (15), /* div. */
1765 COSTS_N_INSNS (5), /* mult. */
1766 COSTS_N_INSNS (7), /* mult_addsub. */
1767 COSTS_N_INSNS (7), /* fma. */
1768 COSTS_N_INSNS (3), /* addsub. */
1781 COSTS_N_INSNS (1) /* alu. */
1785 const struct addr_mode_cost_table generic_addr_mode_costs =
1789 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1790 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1791 COSTS_N_INSNS (0) /* AMO_WB. */
1795 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1796 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1797 COSTS_N_INSNS (0) /* AMO_WB. */
1801 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1802 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1803 COSTS_N_INSNS (0) /* AMO_WB. */
1807 const struct tune_params arm_slowmul_tune =
1809 &generic_extra_costs, /* Insn extra costs. */
1810 &generic_addr_mode_costs, /* Addressing mode costs. */
1811 NULL, /* Sched adj cost. */
1812 arm_default_branch_cost,
1813 &arm_default_vec_cost,
1814 3, /* Constant limit. */
1815 5, /* Max cond insns. */
1816 8, /* Memset max inline. */
1817 1, /* Issue rate. */
1818 ARM_PREFETCH_NOT_BENEFICIAL,
1819 tune_params::PREF_CONST_POOL_TRUE,
1820 tune_params::PREF_LDRD_FALSE,
1821 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1823 tune_params::DISPARAGE_FLAGS_NEITHER,
1824 tune_params::PREF_NEON_STRINGOPS_FALSE,
1825 tune_params::FUSE_NOTHING,
1826 tune_params::SCHED_AUTOPREF_OFF
1829 const struct tune_params arm_fastmul_tune =
1831 &generic_extra_costs, /* Insn extra costs. */
1832 &generic_addr_mode_costs, /* Addressing mode costs. */
1833 NULL, /* Sched adj cost. */
1834 arm_default_branch_cost,
1835 &arm_default_vec_cost,
1836 1, /* Constant limit. */
1837 5, /* Max cond insns. */
1838 8, /* Memset max inline. */
1839 1, /* Issue rate. */
1840 ARM_PREFETCH_NOT_BENEFICIAL,
1841 tune_params::PREF_CONST_POOL_TRUE,
1842 tune_params::PREF_LDRD_FALSE,
1843 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1844 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1845 tune_params::DISPARAGE_FLAGS_NEITHER,
1846 tune_params::PREF_NEON_STRINGOPS_FALSE,
1847 tune_params::FUSE_NOTHING,
1848 tune_params::SCHED_AUTOPREF_OFF
1851 /* StrongARM has early execution of branches, so a sequence that is worth
1852 skipping is shorter. Set max_insns_skipped to a lower value. */
1854 const struct tune_params arm_strongarm_tune =
1856 &generic_extra_costs, /* Insn extra costs. */
1857 &generic_addr_mode_costs, /* Addressing mode costs. */
1858 NULL, /* Sched adj cost. */
1859 arm_default_branch_cost,
1860 &arm_default_vec_cost,
1861 1, /* Constant limit. */
1862 3, /* Max cond insns. */
1863 8, /* Memset max inline. */
1864 1, /* Issue rate. */
1865 ARM_PREFETCH_NOT_BENEFICIAL,
1866 tune_params::PREF_CONST_POOL_TRUE,
1867 tune_params::PREF_LDRD_FALSE,
1868 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1869 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1870 tune_params::DISPARAGE_FLAGS_NEITHER,
1871 tune_params::PREF_NEON_STRINGOPS_FALSE,
1872 tune_params::FUSE_NOTHING,
1873 tune_params::SCHED_AUTOPREF_OFF
1876 const struct tune_params arm_xscale_tune =
1878 &generic_extra_costs, /* Insn extra costs. */
1879 &generic_addr_mode_costs, /* Addressing mode costs. */
1880 xscale_sched_adjust_cost,
1881 arm_default_branch_cost,
1882 &arm_default_vec_cost,
1883 2, /* Constant limit. */
1884 3, /* Max cond insns. */
1885 8, /* Memset max inline. */
1886 1, /* Issue rate. */
1887 ARM_PREFETCH_NOT_BENEFICIAL,
1888 tune_params::PREF_CONST_POOL_TRUE,
1889 tune_params::PREF_LDRD_FALSE,
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1891 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1892 tune_params::DISPARAGE_FLAGS_NEITHER,
1893 tune_params::PREF_NEON_STRINGOPS_FALSE,
1894 tune_params::FUSE_NOTHING,
1895 tune_params::SCHED_AUTOPREF_OFF
1898 const struct tune_params arm_9e_tune =
1900 &generic_extra_costs, /* Insn extra costs. */
1901 &generic_addr_mode_costs, /* Addressing mode costs. */
1902 NULL, /* Sched adj cost. */
1903 arm_default_branch_cost,
1904 &arm_default_vec_cost,
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 8, /* Memset max inline. */
1908 1, /* Issue rate. */
1909 ARM_PREFETCH_NOT_BENEFICIAL,
1910 tune_params::PREF_CONST_POOL_TRUE,
1911 tune_params::PREF_LDRD_FALSE,
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1913 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1914 tune_params::DISPARAGE_FLAGS_NEITHER,
1915 tune_params::PREF_NEON_STRINGOPS_FALSE,
1916 tune_params::FUSE_NOTHING,
1917 tune_params::SCHED_AUTOPREF_OFF
1920 const struct tune_params arm_marvell_pj4_tune =
1922 &generic_extra_costs, /* Insn extra costs. */
1923 &generic_addr_mode_costs, /* Addressing mode costs. */
1924 NULL, /* Sched adj cost. */
1925 arm_default_branch_cost,
1926 &arm_default_vec_cost,
1927 1, /* Constant limit. */
1928 5, /* Max cond insns. */
1929 8, /* Memset max inline. */
1930 2, /* Issue rate. */
1931 ARM_PREFETCH_NOT_BENEFICIAL,
1932 tune_params::PREF_CONST_POOL_TRUE,
1933 tune_params::PREF_LDRD_FALSE,
1934 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1936 tune_params::DISPARAGE_FLAGS_NEITHER,
1937 tune_params::PREF_NEON_STRINGOPS_FALSE,
1938 tune_params::FUSE_NOTHING,
1939 tune_params::SCHED_AUTOPREF_OFF
1942 const struct tune_params arm_v6t2_tune =
1944 &generic_extra_costs, /* Insn extra costs. */
1945 &generic_addr_mode_costs, /* Addressing mode costs. */
1946 NULL, /* Sched adj cost. */
1947 arm_default_branch_cost,
1948 &arm_default_vec_cost,
1949 1, /* Constant limit. */
1950 5, /* Max cond insns. */
1951 8, /* Memset max inline. */
1952 1, /* Issue rate. */
1953 ARM_PREFETCH_NOT_BENEFICIAL,
1954 tune_params::PREF_CONST_POOL_FALSE,
1955 tune_params::PREF_LDRD_FALSE,
1956 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1958 tune_params::DISPARAGE_FLAGS_NEITHER,
1959 tune_params::PREF_NEON_STRINGOPS_FALSE,
1960 tune_params::FUSE_NOTHING,
1961 tune_params::SCHED_AUTOPREF_OFF
1965 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1966 const struct tune_params arm_cortex_tune =
1968 &generic_extra_costs,
1969 &generic_addr_mode_costs, /* Addressing mode costs. */
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 2, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_FALSE,
1979 tune_params::PREF_LDRD_FALSE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER,
1983 tune_params::PREF_NEON_STRINGOPS_FALSE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1988 const struct tune_params arm_cortex_a8_tune =
1990 &cortexa8_extra_costs,
1991 &generic_addr_mode_costs, /* Addressing mode costs. */
1992 NULL, /* Sched adj cost. */
1993 arm_default_branch_cost,
1994 &arm_default_vec_cost,
1995 1, /* Constant limit. */
1996 5, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 2, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 tune_params::PREF_CONST_POOL_FALSE,
2001 tune_params::PREF_LDRD_FALSE,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_NEITHER,
2005 tune_params::PREF_NEON_STRINGOPS_TRUE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_OFF
2010 const struct tune_params arm_cortex_a7_tune =
2012 &cortexa7_extra_costs,
2013 &generic_addr_mode_costs, /* Addressing mode costs. */
2014 NULL, /* Sched adj cost. */
2015 arm_default_branch_cost,
2016 &arm_default_vec_cost,
2017 1, /* Constant limit. */
2018 5, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 2, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL,
2022 tune_params::PREF_CONST_POOL_FALSE,
2023 tune_params::PREF_LDRD_FALSE,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_NEITHER,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_cortex_a15_tune =
2034 &cortexa15_extra_costs,
2035 &generic_addr_mode_costs, /* Addressing mode costs. */
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 8, /* Memset max inline. */
2042 3, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_STRINGOPS_TRUE,
2050 tune_params::FUSE_NOTHING,
2051 tune_params::SCHED_AUTOPREF_FULL
2054 const struct tune_params arm_cortex_a35_tune =
2056 &cortexa53_extra_costs,
2057 &generic_addr_mode_costs, /* Addressing mode costs. */
2058 NULL, /* Sched adj cost. */
2059 arm_default_branch_cost,
2060 &arm_default_vec_cost,
2061 1, /* Constant limit. */
2062 5, /* Max cond insns. */
2063 8, /* Memset max inline. */
2064 1, /* Issue rate. */
2065 ARM_PREFETCH_NOT_BENEFICIAL,
2066 tune_params::PREF_CONST_POOL_FALSE,
2067 tune_params::PREF_LDRD_FALSE,
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2070 tune_params::DISPARAGE_FLAGS_NEITHER,
2071 tune_params::PREF_NEON_STRINGOPS_TRUE,
2072 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2073 tune_params::SCHED_AUTOPREF_OFF
2076 const struct tune_params arm_cortex_a53_tune =
2078 &cortexa53_extra_costs,
2079 &generic_addr_mode_costs, /* Addressing mode costs. */
2080 NULL, /* Sched adj cost. */
2081 arm_default_branch_cost,
2082 &arm_default_vec_cost,
2083 1, /* Constant limit. */
2084 5, /* Max cond insns. */
2085 8, /* Memset max inline. */
2086 2, /* Issue rate. */
2087 ARM_PREFETCH_NOT_BENEFICIAL,
2088 tune_params::PREF_CONST_POOL_FALSE,
2089 tune_params::PREF_LDRD_FALSE,
2090 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2091 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2092 tune_params::DISPARAGE_FLAGS_NEITHER,
2093 tune_params::PREF_NEON_STRINGOPS_TRUE,
2094 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2095 tune_params::SCHED_AUTOPREF_OFF
2098 const struct tune_params arm_cortex_a57_tune =
2100 &cortexa57_extra_costs,
2101 &generic_addr_mode_costs, /* addressing mode costs */
2102 NULL, /* Sched adj cost. */
2103 arm_default_branch_cost,
2104 &arm_default_vec_cost,
2105 1, /* Constant limit. */
2106 2, /* Max cond insns. */
2107 8, /* Memset max inline. */
2108 3, /* Issue rate. */
2109 ARM_PREFETCH_NOT_BENEFICIAL,
2110 tune_params::PREF_CONST_POOL_FALSE,
2111 tune_params::PREF_LDRD_TRUE,
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2113 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2114 tune_params::DISPARAGE_FLAGS_ALL,
2115 tune_params::PREF_NEON_STRINGOPS_TRUE,
2116 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2117 tune_params::SCHED_AUTOPREF_FULL
2120 const struct tune_params arm_exynosm1_tune =
2122 &exynosm1_extra_costs,
2123 &generic_addr_mode_costs, /* Addressing mode costs. */
2124 NULL, /* Sched adj cost. */
2125 arm_default_branch_cost,
2126 &arm_default_vec_cost,
2127 1, /* Constant limit. */
2128 2, /* Max cond insns. */
2129 8, /* Memset max inline. */
2130 3, /* Issue rate. */
2131 ARM_PREFETCH_NOT_BENEFICIAL,
2132 tune_params::PREF_CONST_POOL_FALSE,
2133 tune_params::PREF_LDRD_TRUE,
2134 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2136 tune_params::DISPARAGE_FLAGS_ALL,
2137 tune_params::PREF_NEON_STRINGOPS_TRUE,
2138 tune_params::FUSE_NOTHING,
2139 tune_params::SCHED_AUTOPREF_OFF
2142 const struct tune_params arm_xgene1_tune =
2144 &xgene1_extra_costs,
2145 &generic_addr_mode_costs, /* Addressing mode costs. */
2146 NULL, /* Sched adj cost. */
2147 arm_default_branch_cost,
2148 &arm_default_vec_cost,
2149 1, /* Constant limit. */
2150 2, /* Max cond insns. */
2151 32, /* Memset max inline. */
2152 4, /* Issue rate. */
2153 ARM_PREFETCH_NOT_BENEFICIAL,
2154 tune_params::PREF_CONST_POOL_FALSE,
2155 tune_params::PREF_LDRD_TRUE,
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2158 tune_params::DISPARAGE_FLAGS_ALL,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2167 const struct tune_params arm_cortex_a5_tune =
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_STRINGOPS_TRUE,
2185 tune_params::FUSE_NOTHING,
2186 tune_params::SCHED_AUTOPREF_OFF
2189 const struct tune_params arm_cortex_a9_tune =
2191 &cortexa9_extra_costs,
2192 &generic_addr_mode_costs, /* Addressing mode costs. */
2193 cortex_a9_sched_adjust_cost,
2194 arm_default_branch_cost,
2195 &arm_default_vec_cost,
2196 1, /* Constant limit. */
2197 5, /* Max cond insns. */
2198 8, /* Memset max inline. */
2199 2, /* Issue rate. */
2200 ARM_PREFETCH_BENEFICIAL(4,32,32),
2201 tune_params::PREF_CONST_POOL_FALSE,
2202 tune_params::PREF_LDRD_FALSE,
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2205 tune_params::DISPARAGE_FLAGS_NEITHER,
2206 tune_params::PREF_NEON_STRINGOPS_FALSE,
2207 tune_params::FUSE_NOTHING,
2208 tune_params::SCHED_AUTOPREF_OFF
2211 const struct tune_params arm_cortex_a12_tune =
2213 &cortexa12_extra_costs,
2214 &generic_addr_mode_costs, /* Addressing mode costs. */
2215 NULL, /* Sched adj cost. */
2216 arm_default_branch_cost,
2217 &arm_default_vec_cost, /* Vectorizer costs. */
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 2, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL,
2223 tune_params::PREF_CONST_POOL_FALSE,
2224 tune_params::PREF_LDRD_TRUE,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_ALL,
2228 tune_params::PREF_NEON_STRINGOPS_TRUE,
2229 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2230 tune_params::SCHED_AUTOPREF_OFF
2233 const struct tune_params arm_cortex_a73_tune =
2235 &cortexa57_extra_costs,
2236 &generic_addr_mode_costs, /* Addressing mode costs. */
2237 NULL, /* Sched adj cost. */
2238 arm_default_branch_cost,
2239 &arm_default_vec_cost, /* Vectorizer costs. */
2240 1, /* Constant limit. */
2241 2, /* Max cond insns. */
2242 8, /* Memset max inline. */
2243 2, /* Issue rate. */
2244 ARM_PREFETCH_NOT_BENEFICIAL,
2245 tune_params::PREF_CONST_POOL_FALSE,
2246 tune_params::PREF_LDRD_TRUE,
2247 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2248 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2249 tune_params::DISPARAGE_FLAGS_ALL,
2250 tune_params::PREF_NEON_STRINGOPS_TRUE,
2251 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2252 tune_params::SCHED_AUTOPREF_FULL
2255 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2256 cycle to execute each. An LDR from the constant pool also takes two cycles
2257 to execute, but mildly increases pipelining opportunity (consecutive
2258 loads/stores can be pipelined together, saving one cycle), and may also
2259 improve icache utilisation. Hence we prefer the constant pool for such
2262 const struct tune_params arm_v7m_tune =
2265 &generic_addr_mode_costs, /* Addressing mode costs. */
2266 NULL, /* Sched adj cost. */
2267 arm_cortex_m_branch_cost,
2268 &arm_default_vec_cost,
2269 1, /* Constant limit. */
2270 2, /* Max cond insns. */
2271 8, /* Memset max inline. */
2272 1, /* Issue rate. */
2273 ARM_PREFETCH_NOT_BENEFICIAL,
2274 tune_params::PREF_CONST_POOL_TRUE,
2275 tune_params::PREF_LDRD_FALSE,
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2277 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2278 tune_params::DISPARAGE_FLAGS_NEITHER,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE,
2280 tune_params::FUSE_NOTHING,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 /* Cortex-M7 tuning. */
2286 const struct tune_params arm_cortex_m7_tune =
2289 &generic_addr_mode_costs, /* Addressing mode costs. */
2290 NULL, /* Sched adj cost. */
2291 arm_cortex_m7_branch_cost,
2292 &arm_default_vec_cost,
2293 0, /* Constant limit. */
2294 1, /* Max cond insns. */
2295 8, /* Memset max inline. */
2296 2, /* Issue rate. */
2297 ARM_PREFETCH_NOT_BENEFICIAL,
2298 tune_params::PREF_CONST_POOL_TRUE,
2299 tune_params::PREF_LDRD_FALSE,
2300 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2301 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2302 tune_params::DISPARAGE_FLAGS_NEITHER,
2303 tune_params::PREF_NEON_STRINGOPS_FALSE,
2304 tune_params::FUSE_NOTHING,
2305 tune_params::SCHED_AUTOPREF_OFF
2308 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2309 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2311 const struct tune_params arm_v6m_tune =
2313 &generic_extra_costs, /* Insn extra costs. */
2314 &generic_addr_mode_costs, /* Addressing mode costs. */
2315 NULL, /* Sched adj cost. */
2316 arm_default_branch_cost,
2317 &arm_default_vec_cost, /* Vectorizer costs. */
2318 1, /* Constant limit. */
2319 5, /* Max cond insns. */
2320 8, /* Memset max inline. */
2321 1, /* Issue rate. */
2322 ARM_PREFETCH_NOT_BENEFICIAL,
2323 tune_params::PREF_CONST_POOL_FALSE,
2324 tune_params::PREF_LDRD_FALSE,
2325 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2326 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2327 tune_params::DISPARAGE_FLAGS_NEITHER,
2328 tune_params::PREF_NEON_STRINGOPS_FALSE,
2329 tune_params::FUSE_NOTHING,
2330 tune_params::SCHED_AUTOPREF_OFF
2333 const struct tune_params arm_fa726te_tune =
2335 &generic_extra_costs, /* Insn extra costs. */
2336 &generic_addr_mode_costs, /* Addressing mode costs. */
2337 fa726te_sched_adjust_cost,
2338 arm_default_branch_cost,
2339 &arm_default_vec_cost,
2340 1, /* Constant limit. */
2341 5, /* Max cond insns. */
2342 8, /* Memset max inline. */
2343 2, /* Issue rate. */
2344 ARM_PREFETCH_NOT_BENEFICIAL,
2345 tune_params::PREF_CONST_POOL_TRUE,
2346 tune_params::PREF_LDRD_FALSE,
2347 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2348 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2349 tune_params::DISPARAGE_FLAGS_NEITHER,
2350 tune_params::PREF_NEON_STRINGOPS_FALSE,
2351 tune_params::FUSE_NOTHING,
2352 tune_params::SCHED_AUTOPREF_OFF
2355 /* Auto-generated CPU, FPU and architecture tables. */
2356 #include "arm-cpu-data.h"
2358 /* The name of the preprocessor macro to define for this architecture. PROFILE
2359 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2360 is thus chosen to be big enough to hold the longest architecture name. */
2362 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2364 /* Supported TLS relocations. */
2375 TLS_DESCSEQ /* GNU scheme */
2378 /* The maximum number of insns to be used when loading a constant. */
2380 arm_constant_limit (bool size_p)
2382 return size_p ? 1 : current_tune->constant_limit;
2385 /* Emit an insn that's a simple single-set. Both the operands must be known
2387 inline static rtx_insn *
2388 emit_set_insn (rtx x, rtx y)
2390 return emit_insn (gen_rtx_SET (x, y));
2393 /* Return the number of bits set in VALUE. */
2395 bit_count (unsigned long value)
2397 unsigned long count = 0;
2402 value &= value - 1; /* Clear the least-significant set bit. */
2408 /* Return the number of bits set in BMAP. */
2410 bitmap_popcount (const sbitmap bmap)
2412 unsigned int count = 0;
2414 sbitmap_iterator sbi;
2416 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2425 } arm_fixed_mode_set;
2427 /* A small helper for setting fixed-point library libfuncs. */
2430 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2431 const char *funcname, const char *modename,
2436 if (num_suffix == 0)
2437 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2439 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2441 set_optab_libfunc (optable, mode, buffer);
2445 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2446 machine_mode from, const char *funcname,
2447 const char *toname, const char *fromname)
2450 const char *maybe_suffix_2 = "";
2452 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2453 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2454 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2455 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2456 maybe_suffix_2 = "2";
2458 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2461 set_conv_libfunc (optable, to, from, buffer);
2464 static GTY(()) rtx speculation_barrier_libfunc;
2466 /* Set up library functions unique to ARM. */
2468 arm_init_libfuncs (void)
2470 /* For Linux, we have access to kernel support for atomic operations. */
2471 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2472 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2474 /* There are no special library functions unless we are using the
2479 /* The functions below are described in Section 4 of the "Run-Time
2480 ABI for the ARM architecture", Version 1.0. */
2482 /* Double-precision floating-point arithmetic. Table 2. */
2483 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2484 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2485 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2486 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2487 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2489 /* Double-precision comparisons. Table 3. */
2490 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2491 set_optab_libfunc (ne_optab, DFmode, NULL);
2492 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2493 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2494 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2495 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2496 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2498 /* Single-precision floating-point arithmetic. Table 4. */
2499 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2500 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2501 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2502 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2503 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2505 /* Single-precision comparisons. Table 5. */
2506 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2507 set_optab_libfunc (ne_optab, SFmode, NULL);
2508 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2509 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2510 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2511 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2512 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2514 /* Floating-point to integer conversions. Table 6. */
2515 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2516 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2517 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2518 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2519 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2520 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2521 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2522 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2524 /* Conversions between floating types. Table 7. */
2525 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2526 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2528 /* Integer to floating-point conversions. Table 8. */
2529 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2530 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2531 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2532 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2533 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2534 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2535 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2536 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2538 /* Long long. Table 9. */
2539 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2540 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2541 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2542 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2543 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2544 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2545 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2546 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2548 /* Integer (32/32->32) division. \S 4.3.1. */
2549 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2550 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2552 /* The divmod functions are designed so that they can be used for
2553 plain division, even though they return both the quotient and the
2554 remainder. The quotient is returned in the usual location (i.e.,
2555 r0 for SImode, {r0, r1} for DImode), just as would be expected
2556 for an ordinary division routine. Because the AAPCS calling
2557 conventions specify that all of { r0, r1, r2, r3 } are
2558 callee-saved registers, there is no need to tell the compiler
2559 explicitly that those registers are clobbered by these
2561 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2562 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2564 /* For SImode division the ABI provides div-without-mod routines,
2565 which are faster. */
2566 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2567 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2569 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2570 divmod libcalls instead. */
2571 set_optab_libfunc (smod_optab, DImode, NULL);
2572 set_optab_libfunc (umod_optab, DImode, NULL);
2573 set_optab_libfunc (smod_optab, SImode, NULL);
2574 set_optab_libfunc (umod_optab, SImode, NULL);
2576 /* Half-precision float operations. The compiler handles all operations
2577 with NULL libfuncs by converting the SFmode. */
2578 switch (arm_fp16_format)
2580 case ARM_FP16_FORMAT_IEEE:
2581 case ARM_FP16_FORMAT_ALTERNATIVE:
2584 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2585 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2587 : "__gnu_f2h_alternative"));
2588 set_conv_libfunc (sext_optab, SFmode, HFmode,
2589 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2591 : "__gnu_h2f_alternative"));
2593 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2594 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596 : "__gnu_d2h_alternative"));
2599 set_optab_libfunc (add_optab, HFmode, NULL);
2600 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2601 set_optab_libfunc (smul_optab, HFmode, NULL);
2602 set_optab_libfunc (neg_optab, HFmode, NULL);
2603 set_optab_libfunc (sub_optab, HFmode, NULL);
2606 set_optab_libfunc (eq_optab, HFmode, NULL);
2607 set_optab_libfunc (ne_optab, HFmode, NULL);
2608 set_optab_libfunc (lt_optab, HFmode, NULL);
2609 set_optab_libfunc (le_optab, HFmode, NULL);
2610 set_optab_libfunc (ge_optab, HFmode, NULL);
2611 set_optab_libfunc (gt_optab, HFmode, NULL);
2612 set_optab_libfunc (unord_optab, HFmode, NULL);
2619 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2621 const arm_fixed_mode_set fixed_arith_modes[] =
2624 { E_UQQmode, "uqq" },
2626 { E_UHQmode, "uhq" },
2628 { E_USQmode, "usq" },
2630 { E_UDQmode, "udq" },
2632 { E_UTQmode, "utq" },
2634 { E_UHAmode, "uha" },
2636 { E_USAmode, "usa" },
2638 { E_UDAmode, "uda" },
2640 { E_UTAmode, "uta" }
2642 const arm_fixed_mode_set fixed_conv_modes[] =
2645 { E_UQQmode, "uqq" },
2647 { E_UHQmode, "uhq" },
2649 { E_USQmode, "usq" },
2651 { E_UDQmode, "udq" },
2653 { E_UTQmode, "utq" },
2655 { E_UHAmode, "uha" },
2657 { E_USAmode, "usa" },
2659 { E_UDAmode, "uda" },
2661 { E_UTAmode, "uta" },
2672 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2674 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2675 "add", fixed_arith_modes[i].name, 3);
2676 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2677 "ssadd", fixed_arith_modes[i].name, 3);
2678 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2679 "usadd", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2681 "sub", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2683 "sssub", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2685 "ussub", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2687 "mul", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2689 "ssmul", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2691 "usmul", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2693 "div", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2695 "udiv", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2697 "ssdiv", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2699 "usdiv", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2701 "neg", fixed_arith_modes[i].name, 2);
2702 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2703 "ssneg", fixed_arith_modes[i].name, 2);
2704 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2705 "usneg", fixed_arith_modes[i].name, 2);
2706 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2707 "ashl", fixed_arith_modes[i].name, 3);
2708 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2709 "ashr", fixed_arith_modes[i].name, 3);
2710 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2711 "lshr", fixed_arith_modes[i].name, 3);
2712 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2713 "ssashl", fixed_arith_modes[i].name, 3);
2714 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2715 "usashl", fixed_arith_modes[i].name, 3);
2716 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2717 "cmp", fixed_arith_modes[i].name, 2);
2720 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2721 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2724 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2725 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2728 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2729 fixed_conv_modes[j].mode, "fract",
2730 fixed_conv_modes[i].name,
2731 fixed_conv_modes[j].name);
2732 arm_set_fixed_conv_libfunc (satfract_optab,
2733 fixed_conv_modes[i].mode,
2734 fixed_conv_modes[j].mode, "satfract",
2735 fixed_conv_modes[i].name,
2736 fixed_conv_modes[j].name);
2737 arm_set_fixed_conv_libfunc (fractuns_optab,
2738 fixed_conv_modes[i].mode,
2739 fixed_conv_modes[j].mode, "fractuns",
2740 fixed_conv_modes[i].name,
2741 fixed_conv_modes[j].name);
2742 arm_set_fixed_conv_libfunc (satfractuns_optab,
2743 fixed_conv_modes[i].mode,
2744 fixed_conv_modes[j].mode, "satfractuns",
2745 fixed_conv_modes[i].name,
2746 fixed_conv_modes[j].name);
2750 if (TARGET_AAPCS_BASED)
2751 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2753 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2756 /* On AAPCS systems, this is the "struct __va_list". */
2757 static GTY(()) tree va_list_type;
2759 /* Return the type to use as __builtin_va_list. */
2761 arm_build_builtin_va_list (void)
2766 if (!TARGET_AAPCS_BASED)
2767 return std_build_builtin_va_list ();
2769 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2777 The C Library ABI further reinforces this definition in \S
2780 We must follow this definition exactly. The structure tag
2781 name is visible in C++ mangled names, and thus forms a part
2782 of the ABI. The field name may be used by people who
2783 #include <stdarg.h>. */
2784 /* Create the type. */
2785 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2786 /* Give it the required name. */
2787 va_list_name = build_decl (BUILTINS_LOCATION,
2789 get_identifier ("__va_list"),
2791 DECL_ARTIFICIAL (va_list_name) = 1;
2792 TYPE_NAME (va_list_type) = va_list_name;
2793 TYPE_STUB_DECL (va_list_type) = va_list_name;
2794 /* Create the __ap field. */
2795 ap_field = build_decl (BUILTINS_LOCATION,
2797 get_identifier ("__ap"),
2799 DECL_ARTIFICIAL (ap_field) = 1;
2800 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2801 TYPE_FIELDS (va_list_type) = ap_field;
2802 /* Compute its layout. */
2803 layout_type (va_list_type);
2805 return va_list_type;
2808 /* Return an expression of type "void *" pointing to the next
2809 available argument in a variable-argument list. VALIST is the
2810 user-level va_list object, of type __builtin_va_list. */
2812 arm_extract_valist_ptr (tree valist)
2814 if (TREE_TYPE (valist) == error_mark_node)
2815 return error_mark_node;
2817 /* On an AAPCS target, the pointer is stored within "struct
2819 if (TARGET_AAPCS_BASED)
2821 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2822 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2823 valist, ap_field, NULL_TREE);
2829 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2831 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2833 valist = arm_extract_valist_ptr (valist);
2834 std_expand_builtin_va_start (valist, nextarg);
2837 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2839 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842 valist = arm_extract_valist_ptr (valist);
2843 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 /* Check any incompatible options that the user has specified. */
2848 arm_option_check_internal (struct gcc_options *opts)
2850 int flags = opts->x_target_flags;
2852 /* iWMMXt and NEON are incompatible. */
2854 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2855 error ("iWMMXt and NEON are incompatible");
2857 /* Make sure that the processor choice does not conflict with any of the
2858 other command line choices. */
2859 if (TARGET_ARM_P (flags)
2860 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2861 error ("target CPU does not support ARM mode");
2863 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2864 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2865 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2867 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2868 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2870 /* If this target is normally configured to use APCS frames, warn if they
2871 are turned off and debugging is turned on. */
2872 if (TARGET_ARM_P (flags)
2873 && write_symbols != NO_DEBUG
2874 && !TARGET_APCS_FRAME
2875 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2876 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2879 /* iWMMXt unsupported under Thumb mode. */
2880 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2881 error ("iWMMXt unsupported under Thumb mode");
2883 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2884 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2886 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888 error ("RTP PIC is incompatible with Thumb");
2892 if (target_pure_code || target_slow_flash_data)
2894 const char *flag = (target_pure_code ? "-mpure-code" :
2895 "-mslow-flash-data");
2897 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2899 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2900 error ("%s only supports non-pic code on M-profile targets with the "
2901 "MOVT instruction", flag);
2903 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2904 -mword-relocations forbids relocation of MOVT/MOVW. */
2905 if (target_word_relocations)
2906 error ("%s incompatible with %<-mword-relocations%>", flag);
2910 /* Recompute the global settings depending on target attribute options. */
2913 arm_option_params_internal (void)
2915 /* If we are not using the default (ARM mode) section anchor offset
2916 ranges, then set the correct ranges now. */
2919 /* Thumb-1 LDR instructions cannot have negative offsets.
2920 Permissible positive offset ranges are 5-bit (for byte loads),
2921 6-bit (for halfword loads), or 7-bit (for word loads).
2922 Empirical results suggest a 7-bit anchor range gives the best
2923 overall code size. */
2924 targetm.min_anchor_offset = 0;
2925 targetm.max_anchor_offset = 127;
2927 else if (TARGET_THUMB2)
2929 /* The minimum is set such that the total size of the block
2930 for a particular anchor is 248 + 1 + 4095 bytes, which is
2931 divisible by eight, ensuring natural spacing of anchors. */
2932 targetm.min_anchor_offset = -248;
2933 targetm.max_anchor_offset = 4095;
2937 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2938 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2941 /* Increase the number of conditional instructions with -Os. */
2942 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2944 /* For THUMB2, we limit the conditional sequence to one IT block. */
2946 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2949 targetm.md_asm_adjust = thumb1_md_asm_adjust;
2951 targetm.md_asm_adjust = arm_md_asm_adjust;
2954 /* True if -mflip-thumb should next add an attribute for the default
2955 mode, false if it should next add an attribute for the opposite mode. */
2956 static GTY(()) bool thumb_flipper;
2958 /* Options after initial target override. */
2959 static GTY(()) tree init_optimize;
2962 arm_override_options_after_change_1 (struct gcc_options *opts)
2964 /* -falign-functions without argument: supply one. */
2965 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2966 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2967 && opts->x_optimize_size ? "2" : "4";
2970 /* Implement targetm.override_options_after_change. */
2973 arm_override_options_after_change (void)
2975 arm_configure_build_target (&arm_active_target,
2976 TREE_TARGET_OPTION (target_option_default_node),
2977 &global_options_set, false);
2979 arm_override_options_after_change_1 (&global_options);
2982 /* Implement TARGET_OPTION_SAVE. */
2984 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2986 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2987 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2988 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2991 /* Implement TARGET_OPTION_RESTORE. */
2993 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2995 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2996 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2997 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2998 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
3002 /* Reset options between modes that the user has specified. */
3004 arm_option_override_internal (struct gcc_options *opts,
3005 struct gcc_options *opts_set)
3007 arm_override_options_after_change_1 (opts);
3009 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3011 /* The default is to enable interworking, so this warning message would
3012 be confusing to users who have just compiled with
3013 eg, -march=armv4. */
3014 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3015 opts->x_target_flags &= ~MASK_INTERWORK;
3018 if (TARGET_THUMB_P (opts->x_target_flags)
3019 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3021 warning (0, "target CPU does not support THUMB instructions");
3022 opts->x_target_flags &= ~MASK_THUMB;
3025 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3027 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3028 opts->x_target_flags &= ~MASK_APCS_FRAME;
3031 /* Callee super interworking implies thumb interworking. Adding
3032 this to the flags here simplifies the logic elsewhere. */
3033 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3034 opts->x_target_flags |= MASK_INTERWORK;
3036 /* need to remember initial values so combinaisons of options like
3037 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3038 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3040 if (! opts_set->x_arm_restrict_it)
3041 opts->x_arm_restrict_it = arm_arch8;
3043 /* ARM execution state and M profile don't have [restrict] IT. */
3044 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3045 opts->x_arm_restrict_it = 0;
3047 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3048 if (!opts_set->x_arm_restrict_it
3049 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3050 opts->x_arm_restrict_it = 0;
3052 /* Enable -munaligned-access by default for
3053 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3054 i.e. Thumb2 and ARM state only.
3055 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3056 - ARMv8 architecture-base processors.
3058 Disable -munaligned-access by default for
3059 - all pre-ARMv6 architecture-based processors
3060 - ARMv6-M architecture-based processors
3061 - ARMv8-M Baseline processors. */
3063 if (! opts_set->x_unaligned_access)
3065 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3066 && arm_arch6 && (arm_arch_notm || arm_arch7));
3068 else if (opts->x_unaligned_access == 1
3069 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3071 warning (0, "target CPU does not support unaligned accesses");
3072 opts->x_unaligned_access = 0;
3075 /* Don't warn since it's on by default in -O2. */
3076 if (TARGET_THUMB1_P (opts->x_target_flags))
3077 opts->x_flag_schedule_insns = 0;
3079 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3081 /* Disable shrink-wrap when optimizing function for size, since it tends to
3082 generate additional returns. */
3083 if (optimize_function_for_size_p (cfun)
3084 && TARGET_THUMB2_P (opts->x_target_flags))
3085 opts->x_flag_shrink_wrap = false;
3087 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3089 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3090 - epilogue_insns - does not accurately model the corresponding insns
3091 emitted in the asm file. In particular, see the comment in thumb_exit
3092 'Find out how many of the (return) argument registers we can corrupt'.
3093 As a consequence, the epilogue may clobber registers without fipa-ra
3094 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3095 TODO: Accurately model clobbers for epilogue_insns and reenable
3097 if (TARGET_THUMB1_P (opts->x_target_flags))
3098 opts->x_flag_ipa_ra = 0;
3100 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3102 /* Thumb2 inline assembly code should always use unified syntax.
3103 This will apply to ARM and Thumb1 eventually. */
3104 if (TARGET_THUMB2_P (opts->x_target_flags))
3105 opts->x_inline_asm_unified = true;
3107 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3108 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3112 static sbitmap isa_all_fpubits_internal;
3113 static sbitmap isa_all_fpbits;
3114 static sbitmap isa_quirkbits;
3116 /* Configure a build target TARGET from the user-specified options OPTS and
3117 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3118 architecture have been specified, but the two are not identical. */
3120 arm_configure_build_target (struct arm_build_target *target,
3121 struct cl_target_option *opts,
3122 struct gcc_options *opts_set,
3123 bool warn_compatible)
3125 const cpu_option *arm_selected_tune = NULL;
3126 const arch_option *arm_selected_arch = NULL;
3127 const cpu_option *arm_selected_cpu = NULL;
3128 const arm_fpu_desc *arm_selected_fpu = NULL;
3129 const char *tune_opts = NULL;
3130 const char *arch_opts = NULL;
3131 const char *cpu_opts = NULL;
3133 bitmap_clear (target->isa);
3134 target->core_name = NULL;
3135 target->arch_name = NULL;
3137 if (opts_set->x_arm_arch_string)
3139 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3141 opts->x_arm_arch_string);
3142 arch_opts = strchr (opts->x_arm_arch_string, '+');
3145 if (opts_set->x_arm_cpu_string)
3147 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3148 opts->x_arm_cpu_string);
3149 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3150 arm_selected_tune = arm_selected_cpu;
3151 /* If taking the tuning from -mcpu, we don't need to rescan the
3152 options for tuning. */
3155 if (opts_set->x_arm_tune_string)
3157 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3158 opts->x_arm_tune_string);
3159 tune_opts = strchr (opts->x_arm_tune_string, '+');
3162 if (arm_selected_arch)
3164 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3165 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3168 if (arm_selected_cpu)
3170 auto_sbitmap cpu_isa (isa_num_bits);
3171 auto_sbitmap isa_delta (isa_num_bits);
3173 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3174 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3176 bitmap_xor (isa_delta, cpu_isa, target->isa);
3177 /* Ignore any bits that are quirk bits. */
3178 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3179 /* Ignore (for now) any bits that might be set by -mfpu. */
3180 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);
3182 /* And if the target ISA lacks floating point, ignore any
3183 extensions that depend on that. */
3184 if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
3185 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3187 if (!bitmap_empty_p (isa_delta))
3189 if (warn_compatible)
3190 warning (0, "switch %<-mcpu=%s%> conflicts "
3191 "with %<-march=%s%> switch",
3192 arm_selected_cpu->common.name,
3193 arm_selected_arch->common.name);
3194 /* -march wins for code generation.
3195 -mcpu wins for default tuning. */
3196 if (!arm_selected_tune)
3197 arm_selected_tune = arm_selected_cpu;
3199 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3200 target->arch_name = arm_selected_arch->common.name;
3204 /* Architecture and CPU are essentially the same.
3205 Prefer the CPU setting. */
3206 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3207 target->core_name = arm_selected_cpu->common.name;
3208 /* Copy the CPU's capabilities, so that we inherit the
3209 appropriate extensions and quirks. */
3210 bitmap_copy (target->isa, cpu_isa);
3215 /* Pick a CPU based on the architecture. */
3216 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3217 target->arch_name = arm_selected_arch->common.name;
3218 /* Note: target->core_name is left unset in this path. */
3221 else if (arm_selected_cpu)
3223 target->core_name = arm_selected_cpu->common.name;
3224 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3225 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3227 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3229 /* If the user did not specify a processor or architecture, choose
3233 const cpu_option *sel;
3234 auto_sbitmap sought_isa (isa_num_bits);
3235 bitmap_clear (sought_isa);
3236 auto_sbitmap default_isa (isa_num_bits);
3238 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3239 TARGET_CPU_DEFAULT);
3240 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3241 gcc_assert (arm_selected_cpu->common.name);
3243 /* RWE: All of the selection logic below (to the end of this
3244 'if' clause) looks somewhat suspect. It appears to be mostly
3245 there to support forcing thumb support when the default CPU
3246 does not have thumb (somewhat dubious in terms of what the
3247 user might be expecting). I think it should be removed once
3248 support for the pre-thumb era cores is removed. */
3249 sel = arm_selected_cpu;
3250 arm_initialize_isa (default_isa, sel->common.isa_bits);
3251 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3254 /* Now check to see if the user has specified any command line
3255 switches that require certain abilities from the cpu. */
3257 if (TARGET_INTERWORK || TARGET_THUMB)
3258 bitmap_set_bit (sought_isa, isa_bit_thumb);
3260 /* If there are such requirements and the default CPU does not
3261 satisfy them, we need to run over the complete list of
3262 cores looking for one that is satisfactory. */
3263 if (!bitmap_empty_p (sought_isa)
3264 && !bitmap_subset_p (sought_isa, default_isa))
3266 auto_sbitmap candidate_isa (isa_num_bits);
3267 /* We're only interested in a CPU with at least the
3268 capabilities of the default CPU and the required
3269 additional features. */
3270 bitmap_ior (default_isa, default_isa, sought_isa);
3272 /* Try to locate a CPU type that supports all of the abilities
3273 of the default CPU, plus the extra abilities requested by
3275 for (sel = all_cores; sel->common.name != NULL; sel++)
3277 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3278 /* An exact match? */
3279 if (bitmap_equal_p (default_isa, candidate_isa))
3283 if (sel->common.name == NULL)
3285 unsigned current_bit_count = isa_num_bits;
3286 const cpu_option *best_fit = NULL;
3288 /* Ideally we would like to issue an error message here
3289 saying that it was not possible to find a CPU compatible
3290 with the default CPU, but which also supports the command
3291 line options specified by the programmer, and so they
3292 ought to use the -mcpu=<name> command line option to
3293 override the default CPU type.
3295 If we cannot find a CPU that has exactly the
3296 characteristics of the default CPU and the given
3297 command line options we scan the array again looking
3298 for a best match. The best match must have at least
3299 the capabilities of the perfect match. */
3300 for (sel = all_cores; sel->common.name != NULL; sel++)
3302 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3304 if (bitmap_subset_p (default_isa, candidate_isa))
3308 bitmap_and_compl (candidate_isa, candidate_isa,
3310 count = bitmap_popcount (candidate_isa);
3312 if (count < current_bit_count)
3315 current_bit_count = count;
3319 gcc_assert (best_fit);
3323 arm_selected_cpu = sel;
3326 /* Now we know the CPU, we can finally initialize the target
3328 target->core_name = arm_selected_cpu->common.name;
3329 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3330 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3332 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3335 gcc_assert (arm_selected_cpu);
3336 gcc_assert (arm_selected_arch);
3338 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3340 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3341 auto_sbitmap fpu_bits (isa_num_bits);
3343 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3344 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3345 bitmap_ior (target->isa, target->isa, fpu_bits);
3348 if (!arm_selected_tune)
3349 arm_selected_tune = arm_selected_cpu;
3350 else /* Validate the features passed to -mtune. */
3351 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3353 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3355 /* Finish initializing the target structure. */
3356 target->arch_pp_name = arm_selected_arch->arch;
3357 target->base_arch = arm_selected_arch->base_arch;
3358 target->profile = arm_selected_arch->profile;
3360 target->tune_flags = tune_data->tune_flags;
3361 target->tune = tune_data->tune;
3362 target->tune_core = tune_data->scheduler;
3363 arm_option_reconfigure_globals ();
3366 /* Fix up any incompatible options that the user has specified. */
3368 arm_option_override (void)
3370 static const enum isa_feature fpu_bitlist_internal[]
3371 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3372 static const enum isa_feature fp_bitlist[]
3373 = { ISA_ALL_FP, isa_nobit };
3374 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3375 cl_target_option opts;
3377 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3378 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3380 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3381 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3382 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3383 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3385 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3387 if (!global_options_set.x_arm_fpu_index)
3392 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3395 arm_fpu_index = (enum fpu_type) fpu_index;
3398 cl_target_option_save (&opts, &global_options);
3399 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3402 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3403 SUBTARGET_OVERRIDE_OPTIONS;
3406 /* Initialize boolean versions of the architectural flags, for use
3407 in the arm.md file and for enabling feature flags. */
3408 arm_option_reconfigure_globals ();
3410 arm_tune = arm_active_target.tune_core;
3411 tune_flags = arm_active_target.tune_flags;
3412 current_tune = arm_active_target.tune;
3414 /* TBD: Dwarf info for apcs frame is not handled yet. */
3415 if (TARGET_APCS_FRAME)
3416 flag_shrink_wrap = false;
3418 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3420 warning (0, "%<-mapcs-stack-check%> incompatible with "
3421 "%<-mno-apcs-frame%>");
3422 target_flags |= MASK_APCS_FRAME;
3425 if (TARGET_POKE_FUNCTION_NAME)
3426 target_flags |= MASK_APCS_FRAME;
3428 if (TARGET_APCS_REENT && flag_pic)
3429 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3431 if (TARGET_APCS_REENT)
3432 warning (0, "APCS reentrant code not supported. Ignored");
3434 /* Set up some tuning parameters. */
3435 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3436 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3437 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3438 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3439 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3440 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3442 /* For arm2/3 there is no need to do any scheduling if we are doing
3443 software floating-point. */
3444 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3445 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3447 /* Override the default structure alignment for AAPCS ABI. */
3448 if (!global_options_set.x_arm_structure_size_boundary)
3450 if (TARGET_AAPCS_BASED)
3451 arm_structure_size_boundary = 8;
3455 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3457 if (arm_structure_size_boundary != 8
3458 && arm_structure_size_boundary != 32
3459 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3461 if (ARM_DOUBLEWORD_ALIGN)
3463 "structure size boundary can only be set to 8, 32 or 64");
3465 warning (0, "structure size boundary can only be set to 8 or 32");
3466 arm_structure_size_boundary
3467 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3471 if (TARGET_VXWORKS_RTP)
3473 if (!global_options_set.x_arm_pic_data_is_text_relative)
3474 arm_pic_data_is_text_relative = 0;
3477 && !arm_pic_data_is_text_relative
3478 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3479 /* When text & data segments don't have a fixed displacement, the
3480 intended use is with a single, read only, pic base register.
3481 Unless the user explicitly requested not to do that, set
3483 target_flags |= MASK_SINGLE_PIC_BASE;
3485 /* If stack checking is disabled, we can use r10 as the PIC register,
3486 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3487 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3489 if (TARGET_VXWORKS_RTP)
3490 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3491 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3494 if (flag_pic && TARGET_VXWORKS_RTP)
3495 arm_pic_register = 9;
3497 /* If in FDPIC mode then force arm_pic_register to be r9. */
3500 arm_pic_register = FDPIC_REGNUM;
3502 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3505 if (arm_pic_register_string != NULL)
3507 int pic_register = decode_reg_name (arm_pic_register_string);
3510 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3512 /* Prevent the user from choosing an obviously stupid PIC register. */
3513 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3514 || pic_register == HARD_FRAME_POINTER_REGNUM
3515 || pic_register == STACK_POINTER_REGNUM
3516 || pic_register >= PC_REGNUM
3517 || (TARGET_VXWORKS_RTP
3518 && (unsigned int) pic_register != arm_pic_register))
3519 error ("unable to use %qs for PIC register", arm_pic_register_string);
3521 arm_pic_register = pic_register;
3525 target_word_relocations = 1;
3527 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3528 if (fix_cm3_ldrd == 2)
3530 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3536 /* Hot/Cold partitioning is not currently supported, since we can't
3537 handle literal pool placement in that case. */
3538 if (flag_reorder_blocks_and_partition)
3540 inform (input_location,
3541 "%<-freorder-blocks-and-partition%> not supported "
3542 "on this architecture");
3543 flag_reorder_blocks_and_partition = 0;
3544 flag_reorder_blocks = 1;
3548 /* Hoisting PIC address calculations more aggressively provides a small,
3549 but measurable, size reduction for PIC code. Therefore, we decrease
3550 the bar for unrestricted expression hoisting to the cost of PIC address
3551 calculation, which is 2 instructions. */
3552 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3553 param_gcse_unrestricted_cost, 2);
3555 /* ARM EABI defaults to strict volatile bitfields. */
3556 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3557 && abi_version_at_least(2))
3558 flag_strict_volatile_bitfields = 1;
3560 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3561 have deemed it beneficial (signified by setting
3562 prefetch.num_slots to 1 or more). */
3563 if (flag_prefetch_loop_arrays < 0
3566 && current_tune->prefetch.num_slots > 0)
3567 flag_prefetch_loop_arrays = 1;
3569 /* Set up parameters to be used in prefetching algorithm. Do not
3570 override the defaults unless we are tuning for a core we have
3571 researched values for. */
3572 if (current_tune->prefetch.num_slots > 0)
3573 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3574 param_simultaneous_prefetches,
3575 current_tune->prefetch.num_slots);
3576 if (current_tune->prefetch.l1_cache_line_size >= 0)
3577 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3578 param_l1_cache_line_size,
3579 current_tune->prefetch.l1_cache_line_size);
3580 if (current_tune->prefetch.l1_cache_size >= 0)
3581 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3582 param_l1_cache_size,
3583 current_tune->prefetch.l1_cache_size);
3585 /* Look through ready list and all of queue for instructions
3586 relevant for L2 auto-prefetcher. */
3587 int sched_autopref_queue_depth;
3589 switch (current_tune->sched_autopref)
3591 case tune_params::SCHED_AUTOPREF_OFF:
3592 sched_autopref_queue_depth = -1;
3595 case tune_params::SCHED_AUTOPREF_RANK:
3596 sched_autopref_queue_depth = 0;
3599 case tune_params::SCHED_AUTOPREF_FULL:
3600 sched_autopref_queue_depth = max_insn_queue_index + 1;
3607 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3608 param_sched_autopref_queue_depth,
3609 sched_autopref_queue_depth);
3611 /* Currently, for slow flash data, we just disable literal pools. We also
3612 disable it for pure-code. */
3613 if (target_slow_flash_data || target_pure_code)
3614 arm_disable_literal_pool = true;
3616 /* Disable scheduling fusion by default if it's not armv7 processor
3617 or doesn't prefer ldrd/strd. */
3618 if (flag_schedule_fusion == 2
3619 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3620 flag_schedule_fusion = 0;
3622 /* Need to remember initial options before they are overriden. */
3623 init_optimize = build_optimization_node (&global_options);
3625 arm_options_perform_arch_sanity_checks ();
3626 arm_option_override_internal (&global_options, &global_options_set);
3627 arm_option_check_internal (&global_options);
3628 arm_option_params_internal ();
3630 /* Create the default target_options structure. */
3631 target_option_default_node = target_option_current_node
3632 = build_target_option_node (&global_options);
3634 /* Register global variables with the garbage collector. */
3635 arm_add_gc_roots ();
3637 /* Init initial mode for testing. */
3638 thumb_flipper = TARGET_THUMB;
3642 /* Reconfigure global status flags from the active_target.isa. */
3644 arm_option_reconfigure_globals (void)
3646 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3647 arm_base_arch = arm_active_target.base_arch;
3649 /* Initialize boolean versions of the architectural flags, for use
3650 in the arm.md file. */
3651 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3652 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3653 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3654 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3655 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3656 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3657 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3658 arm_arch6m = arm_arch6 && !arm_arch_notm;
3659 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3660 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3661 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3662 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3663 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3664 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3665 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3666 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3667 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3668 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3669 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3670 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3671 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3672 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3673 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3674 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3675 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3676 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3679 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3680 error ("selected fp16 options are incompatible");
3681 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3684 /* And finally, set up some quirks. */
3685 arm_arch_no_volatile_ce
3686 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3687 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3688 isa_bit_quirk_armv6kz);
3690 /* Use the cp15 method if it is available. */
3691 if (target_thread_pointer == TP_AUTO)
3693 if (arm_arch6k && !TARGET_THUMB1)
3694 target_thread_pointer = TP_CP15;
3696 target_thread_pointer = TP_SOFT;
3700 /* Perform some validation between the desired architecture and the rest of the
3703 arm_options_perform_arch_sanity_checks (void)
3705 /* V5T code we generate is completely interworking capable, so we turn off
3706 TARGET_INTERWORK here to avoid many tests later on. */
3708 /* XXX However, we must pass the right pre-processor defines to CPP
3709 or GLD can get confused. This is a hack. */
3710 if (TARGET_INTERWORK)
3711 arm_cpp_interwork = 1;
3714 target_flags &= ~MASK_INTERWORK;
3716 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3717 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3719 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3720 error ("iwmmxt abi requires an iwmmxt capable cpu");
3722 /* BPABI targets use linker tricks to allow interworking on cores
3723 without thumb support. */
3724 if (TARGET_INTERWORK
3726 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3728 warning (0, "target CPU does not support interworking" );
3729 target_flags &= ~MASK_INTERWORK;
3732 /* If soft-float is specified then don't use FPU. */
3733 if (TARGET_SOFT_FLOAT)
3734 arm_fpu_attr = FPU_NONE;
3736 arm_fpu_attr = FPU_VFP;
3738 if (TARGET_AAPCS_BASED)
3740 if (TARGET_CALLER_INTERWORKING)
3741 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3743 if (TARGET_CALLEE_INTERWORKING)
3744 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3747 /* __fp16 support currently assumes the core has ldrh. */
3748 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3749 sorry ("__fp16 and no ldrh");
3751 if (use_cmse && !arm_arch_cmse)
3752 error ("target CPU does not support ARMv8-M Security Extensions");
3754 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3755 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3756 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3757 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3760 if (TARGET_AAPCS_BASED)
3762 if (arm_abi == ARM_ABI_IWMMXT)
3763 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3764 else if (TARGET_HARD_FLOAT_ABI)
3766 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3767 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3768 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3771 arm_pcs_default = ARM_PCS_AAPCS;
3775 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3776 sorry ("%<-mfloat-abi=hard%> and VFP");
3778 if (arm_abi == ARM_ABI_APCS)
3779 arm_pcs_default = ARM_PCS_APCS;
3781 arm_pcs_default = ARM_PCS_ATPCS;
3785 /* Test whether a local function descriptor is canonical, i.e.,
3786 whether we can use GOTOFFFUNCDESC to compute the address of the
3789 arm_fdpic_local_funcdesc_p (rtx fnx)
3792 enum symbol_visibility vis;
3798 if (! SYMBOL_REF_LOCAL_P (fnx))
3801 fn = SYMBOL_REF_DECL (fnx);
3806 vis = DECL_VISIBILITY (fn);
3808 if (vis == VISIBILITY_PROTECTED)
3809 /* Private function descriptors for protected functions are not
3810 canonical. Temporarily change the visibility to global so that
3811 we can ensure uniqueness of funcdesc pointers. */
3812 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3814 ret = default_binds_local_p_1 (fn, flag_pic);
3816 DECL_VISIBILITY (fn) = vis;
3822 arm_add_gc_roots (void)
3824 gcc_obstack_init(&minipool_obstack);
3825 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3828 /* A table of known ARM exception types.
3829 For use with the interrupt function attribute. */
3833 const char *const arg;
3834 const unsigned long return_value;
3838 static const isr_attribute_arg isr_attribute_args [] =
3840 { "IRQ", ARM_FT_ISR },
3841 { "irq", ARM_FT_ISR },
3842 { "FIQ", ARM_FT_FIQ },
3843 { "fiq", ARM_FT_FIQ },
3844 { "ABORT", ARM_FT_ISR },
3845 { "abort", ARM_FT_ISR },
3846 { "ABORT", ARM_FT_ISR },
3847 { "abort", ARM_FT_ISR },
3848 { "UNDEF", ARM_FT_EXCEPTION },
3849 { "undef", ARM_FT_EXCEPTION },
3850 { "SWI", ARM_FT_EXCEPTION },
3851 { "swi", ARM_FT_EXCEPTION },
3852 { NULL, ARM_FT_NORMAL }
3855 /* Returns the (interrupt) function type of the current
3856 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3858 static unsigned long
3859 arm_isr_value (tree argument)
3861 const isr_attribute_arg * ptr;
3865 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3867 /* No argument - default to IRQ. */
3868 if (argument == NULL_TREE)
3871 /* Get the value of the argument. */
3872 if (TREE_VALUE (argument) == NULL_TREE
3873 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3874 return ARM_FT_UNKNOWN;
3876 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3878 /* Check it against the list of known arguments. */
3879 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3880 if (streq (arg, ptr->arg))
3881 return ptr->return_value;
3883 /* An unrecognized interrupt type. */
3884 return ARM_FT_UNKNOWN;
3887 /* Computes the type of the current function. */
3889 static unsigned long
3890 arm_compute_func_type (void)
3892 unsigned long type = ARM_FT_UNKNOWN;
3896 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3898 /* Decide if the current function is volatile. Such functions
3899 never return, and many memory cycles can be saved by not storing
3900 register values that will never be needed again. This optimization
3901 was added to speed up context switching in a kernel application. */
3903 && (TREE_NOTHROW (current_function_decl)
3904 || !(flag_unwind_tables
3906 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3907 && TREE_THIS_VOLATILE (current_function_decl))
3908 type |= ARM_FT_VOLATILE;
3910 if (cfun->static_chain_decl != NULL)
3911 type |= ARM_FT_NESTED;
3913 attr = DECL_ATTRIBUTES (current_function_decl);
3915 a = lookup_attribute ("naked", attr);
3917 type |= ARM_FT_NAKED;
3919 a = lookup_attribute ("isr", attr);
3921 a = lookup_attribute ("interrupt", attr);
3924 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3926 type |= arm_isr_value (TREE_VALUE (a));
3928 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3929 type |= ARM_FT_CMSE_ENTRY;
3934 /* Returns the type of the current function. */
3937 arm_current_func_type (void)
3939 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3940 cfun->machine->func_type = arm_compute_func_type ();
3942 return cfun->machine->func_type;
3946 arm_allocate_stack_slots_for_args (void)
3948 /* Naked functions should not allocate stack slots for arguments. */
3949 return !IS_NAKED (arm_current_func_type ());
3953 arm_warn_func_return (tree decl)
3955 /* Naked functions are implemented entirely in assembly, including the
3956 return sequence, so suppress warnings about this. */
3957 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3961 /* Output assembler code for a block containing the constant parts
3962 of a trampoline, leaving space for the variable parts.
3964 On the ARM, (if r8 is the static chain regnum, and remembering that
3965 referencing pc adds an offset of 8) the trampoline looks like:
3968 .word static chain value
3969 .word function's address
3970 XXX FIXME: When the trampoline returns, r8 will be clobbered.
3972 In FDPIC mode, the trampoline looks like:
3973 .word trampoline address
3974 .word trampoline GOT address
3975 ldr r12, [pc, #8] ; #4 for Arm mode
3976 ldr r9, [pc, #8] ; #4 for Arm mode
3977 ldr pc, [pc, #8] ; #4 for Arm mode
3978 .word static chain value
3980 .word function's address
3984 arm_asm_trampoline_template (FILE *f)
3986 fprintf (f, "\t.syntax unified\n");
3990 /* The first two words are a function descriptor pointing to the
3991 trampoline code just below. */
3993 fprintf (f, "\t.arm\n");
3994 else if (TARGET_THUMB2)
3995 fprintf (f, "\t.thumb\n");
3997 /* Only ARM and Thumb-2 are supported. */
4000 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4001 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4002 /* Trampoline code which sets the static chain register but also
4003 PIC register before jumping into real code. */
4004 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4005 STATIC_CHAIN_REGNUM, PC_REGNUM,
4006 TARGET_THUMB2 ? 8 : 4);
4007 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4008 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4009 TARGET_THUMB2 ? 8 : 4);
4010 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4011 PC_REGNUM, PC_REGNUM,
4012 TARGET_THUMB2 ? 8 : 4);
4013 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4015 else if (TARGET_ARM)
4017 fprintf (f, "\t.arm\n");
4018 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4019 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4021 else if (TARGET_THUMB2)
4023 fprintf (f, "\t.thumb\n");
4024 /* The Thumb-2 trampoline is similar to the arm implementation.
4025 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4026 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4027 STATIC_CHAIN_REGNUM, PC_REGNUM);
4028 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4032 ASM_OUTPUT_ALIGN (f, 2);
4033 fprintf (f, "\t.code\t16\n");
4034 fprintf (f, ".Ltrampoline_start:\n");
4035 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4036 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4037 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4038 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4039 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4040 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4042 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4043 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4046 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4049 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4051 rtx fnaddr, mem, a_tramp;
4053 emit_block_move (m_tramp, assemble_trampoline_template (),
4054 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4058 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4059 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4060 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4061 /* The function start address is at offset 8, but in Thumb mode
4062 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4064 rtx trampoline_code_start
4065 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4067 /* Write initial funcdesc which points to the trampoline. */
4068 mem = adjust_address (m_tramp, SImode, 0);
4069 emit_move_insn (mem, trampoline_code_start);
4070 mem = adjust_address (m_tramp, SImode, 4);
4071 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4072 /* Setup static chain. */
4073 mem = adjust_address (m_tramp, SImode, 20);
4074 emit_move_insn (mem, chain_value);
4075 /* GOT + real function entry point. */
4076 mem = adjust_address (m_tramp, SImode, 24);
4077 emit_move_insn (mem, gotaddr);
4078 mem = adjust_address (m_tramp, SImode, 28);
4079 emit_move_insn (mem, fnaddr);
4083 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4084 emit_move_insn (mem, chain_value);
4086 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4087 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4088 emit_move_insn (mem, fnaddr);
4091 a_tramp = XEXP (m_tramp, 0);
4092 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4093 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4094 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4097 /* Thumb trampolines should be entered in thumb mode, so set
4098 the bottom bit of the address. */
4101 arm_trampoline_adjust_address (rtx addr)
4103 /* For FDPIC don't fix trampoline address since it's a function
4104 descriptor and not a function address. */
4105 if (TARGET_THUMB && !TARGET_FDPIC)
4106 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4107 NULL, 0, OPTAB_LIB_WIDEN);
4111 /* Return 1 if it is possible to return using a single instruction.
4112 If SIBLING is non-null, this is a test for a return before a sibling
4113 call. SIBLING is the call insn, so we can examine its register usage. */
4116 use_return_insn (int iscond, rtx sibling)
4119 unsigned int func_type;
4120 unsigned long saved_int_regs;
4121 unsigned HOST_WIDE_INT stack_adjust;
4122 arm_stack_offsets *offsets;
4124 /* Never use a return instruction before reload has run. */
4125 if (!reload_completed)
4128 func_type = arm_current_func_type ();
4130 /* Naked, volatile and stack alignment functions need special
4132 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4135 /* So do interrupt functions that use the frame pointer and Thumb
4136 interrupt functions. */
4137 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4140 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4141 && !optimize_function_for_size_p (cfun))
4144 offsets = arm_get_frame_offsets ();
4145 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4147 /* As do variadic functions. */
4148 if (crtl->args.pretend_args_size
4149 || cfun->machine->uses_anonymous_args
4150 /* Or if the function calls __builtin_eh_return () */
4151 || crtl->calls_eh_return
4152 /* Or if the function calls alloca */
4153 || cfun->calls_alloca
4154 /* Or if there is a stack adjustment. However, if the stack pointer
4155 is saved on the stack, we can use a pre-incrementing stack load. */
4156 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4157 && stack_adjust == 4))
4158 /* Or if the static chain register was saved above the frame, under the
4159 assumption that the stack pointer isn't saved on the stack. */
4160 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4161 && arm_compute_static_chain_stack_bytes() != 0))
4164 saved_int_regs = offsets->saved_regs_mask;
4166 /* Unfortunately, the insn
4168 ldmib sp, {..., sp, ...}
4170 triggers a bug on most SA-110 based devices, such that the stack
4171 pointer won't be correctly restored if the instruction takes a
4172 page fault. We work around this problem by popping r3 along with
4173 the other registers, since that is never slower than executing
4174 another instruction.
4176 We test for !arm_arch5t here, because code for any architecture
4177 less than this could potentially be run on one of the buggy
4179 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4181 /* Validate that r3 is a call-clobbered register (always true in
4182 the default abi) ... */
4183 if (!call_used_or_fixed_reg_p (3))
4186 /* ... that it isn't being used for a return value ... */
4187 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4190 /* ... or for a tail-call argument ... */
4193 gcc_assert (CALL_P (sibling));
4195 if (find_regno_fusage (sibling, USE, 3))
4199 /* ... and that there are no call-saved registers in r0-r2
4200 (always true in the default ABI). */
4201 if (saved_int_regs & 0x7)
4205 /* Can't be done if interworking with Thumb, and any registers have been
4207 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4210 /* On StrongARM, conditional returns are expensive if they aren't
4211 taken and multiple registers have been stacked. */
4212 if (iscond && arm_tune_strongarm)
4214 /* Conditional return when just the LR is stored is a simple
4215 conditional-load instruction, that's not expensive. */
4216 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4220 && arm_pic_register != INVALID_REGNUM
4221 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4225 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4226 several instructions if anything needs to be popped. */
4227 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4230 /* If there are saved registers but the LR isn't saved, then we need
4231 two instructions for the return. */
4232 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4235 /* Can't be done if any of the VFP regs are pushed,
4236 since this also requires an insn. */
4237 if (TARGET_HARD_FLOAT)
4238 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4239 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
4242 if (TARGET_REALLY_IWMMXT)
4243 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4244 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
4250 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4251 shrink-wrapping if possible. This is the case if we need to emit a
4252 prologue, which we can test by looking at the offsets. */
4254 use_simple_return_p (void)
4256 arm_stack_offsets *offsets;
4258 /* Note this function can be called before or after reload. */
4259 if (!reload_completed)
4260 arm_compute_frame_layout ();
4262 offsets = arm_get_frame_offsets ();
4263 return offsets->outgoing_args != 0;
4266 /* Return TRUE if int I is a valid immediate ARM constant. */
4269 const_ok_for_arm (HOST_WIDE_INT i)
4273 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4274 be all zero, or all one. */
4275 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4276 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4277 != ((~(unsigned HOST_WIDE_INT) 0)
4278 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4281 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4283 /* Fast return for 0 and small values. We must do this for zero, since
4284 the code below can't handle that one case. */
4285 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4288 /* Get the number of trailing zeros. */
4289 lowbit = ffs((int) i) - 1;
4291 /* Only even shifts are allowed in ARM mode so round down to the
4292 nearest even number. */
4296 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4301 /* Allow rotated constants in ARM mode. */
4303 && ((i & ~0xc000003f) == 0
4304 || (i & ~0xf000000f) == 0
4305 || (i & ~0xfc000003) == 0))
4308 else if (TARGET_THUMB2)
4312 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4315 if (i == v || i == (v | (v << 8)))
4318 /* Allow repeated pattern 0xXY00XY00. */
4324 else if (TARGET_HAVE_MOVT)
4326 /* Thumb-1 Targets with MOVT. */
4336 /* Return true if I is a valid constant for the operation CODE. */
4338 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4340 if (const_ok_for_arm (i))
4346 /* See if we can use movw. */
4347 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4350 /* Otherwise, try mvn. */
4351 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4354 /* See if we can use addw or subw. */
4356 && ((i & 0xfffff000) == 0
4357 || ((-i) & 0xfffff000) == 0))
4378 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4380 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4386 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4390 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4397 /* Return true if I is a valid di mode constant for the operation CODE. */
4399 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4401 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4402 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4403 rtx hi = GEN_INT (hi_val);
4404 rtx lo = GEN_INT (lo_val);
4414 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4415 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4417 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4424 /* Emit a sequence of insns to handle a large constant.
4425 CODE is the code of the operation required, it can be any of SET, PLUS,
4426 IOR, AND, XOR, MINUS;
4427 MODE is the mode in which the operation is being performed;
4428 VAL is the integer to operate on;
4429 SOURCE is the other operand (a register, or a null-pointer for SET);
4430 SUBTARGETS means it is safe to create scratch registers if that will
4431 either produce a simpler sequence, or we will want to cse the values.
4432 Return value is the number of insns emitted. */
4434 /* ??? Tweak this for thumb2. */
4436 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4437 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4441 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4442 cond = COND_EXEC_TEST (PATTERN (insn));
4446 if (subtargets || code == SET
4447 || (REG_P (target) && REG_P (source)
4448 && REGNO (target) != REGNO (source)))
4450 /* After arm_reorg has been called, we can't fix up expensive
4451 constants by pushing them into memory so we must synthesize
4452 them in-line, regardless of the cost. This is only likely to
4453 be more costly on chips that have load delay slots and we are
4454 compiling without running the scheduler (so no splitting
4455 occurred before the final instruction emission).
4457 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4459 if (!cfun->machine->after_arm_reorg
4461 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4463 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4468 /* Currently SET is the only monadic value for CODE, all
4469 the rest are diadic. */
4470 if (TARGET_USE_MOVT)
4471 arm_emit_movpair (target, GEN_INT (val));
4473 emit_set_insn (target, GEN_INT (val));
4479 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4481 if (TARGET_USE_MOVT)
4482 arm_emit_movpair (temp, GEN_INT (val));
4484 emit_set_insn (temp, GEN_INT (val));
4486 /* For MINUS, the value is subtracted from, since we never
4487 have subtraction of a constant. */
4489 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4491 emit_set_insn (target,
4492 gen_rtx_fmt_ee (code, mode, source, temp));
4498 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4502 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4503 ARM/THUMB2 immediates, and add up to VAL.
4504 Thr function return value gives the number of insns required. */
4506 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4507 struct four_ints *return_sequence)
4509 int best_consecutive_zeros = 0;
4513 struct four_ints tmp_sequence;
4515 /* If we aren't targeting ARM, the best place to start is always at
4516 the bottom, otherwise look more closely. */
4519 for (i = 0; i < 32; i += 2)
4521 int consecutive_zeros = 0;
4523 if (!(val & (3 << i)))
4525 while ((i < 32) && !(val & (3 << i)))
4527 consecutive_zeros += 2;
4530 if (consecutive_zeros > best_consecutive_zeros)
4532 best_consecutive_zeros = consecutive_zeros;
4533 best_start = i - consecutive_zeros;
4540 /* So long as it won't require any more insns to do so, it's
4541 desirable to emit a small constant (in bits 0...9) in the last
4542 insn. This way there is more chance that it can be combined with
4543 a later addressing insn to form a pre-indexed load or store
4544 operation. Consider:
4546 *((volatile int *)0xe0000100) = 1;
4547 *((volatile int *)0xe0000110) = 2;
4549 We want this to wind up as:
4553 str rB, [rA, #0x100]
4555 str rB, [rA, #0x110]
4557 rather than having to synthesize both large constants from scratch.
4559 Therefore, we calculate how many insns would be required to emit
4560 the constant starting from `best_start', and also starting from
4561 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4562 yield a shorter sequence, we may as well use zero. */
4563 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4565 && ((HOST_WIDE_INT_1U << best_start) < val))
4567 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4568 if (insns2 <= insns1)
4570 *return_sequence = tmp_sequence;
4578 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4580 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4581 struct four_ints *return_sequence, int i)
4583 int remainder = val & 0xffffffff;
4586 /* Try and find a way of doing the job in either two or three
4589 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4590 location. We start at position I. This may be the MSB, or
4591 optimial_immediate_sequence may have positioned it at the largest block
4592 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4593 wrapping around to the top of the word when we drop off the bottom.
4594 In the worst case this code should produce no more than four insns.
4596 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4597 constants, shifted to any arbitrary location. We should always start
4602 unsigned int b1, b2, b3, b4;
4603 unsigned HOST_WIDE_INT result;
4606 gcc_assert (insns < 4);
4611 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4612 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4615 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4616 /* We can use addw/subw for the last 12 bits. */
4620 /* Use an 8-bit shifted/rotated immediate. */
4624 result = remainder & ((0x0ff << end)
4625 | ((i < end) ? (0xff >> (32 - end))
4632 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4633 arbitrary shifts. */
4634 i -= TARGET_ARM ? 2 : 1;
4638 /* Next, see if we can do a better job with a thumb2 replicated
4641 We do it this way around to catch the cases like 0x01F001E0 where
4642 two 8-bit immediates would work, but a replicated constant would
4645 TODO: 16-bit constants that don't clear all the bits, but still win.
4646 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4649 b1 = (remainder & 0xff000000) >> 24;
4650 b2 = (remainder & 0x00ff0000) >> 16;
4651 b3 = (remainder & 0x0000ff00) >> 8;
4652 b4 = remainder & 0xff;
4656 /* The 8-bit immediate already found clears b1 (and maybe b2),
4657 but must leave b3 and b4 alone. */
4659 /* First try to find a 32-bit replicated constant that clears
4660 almost everything. We can assume that we can't do it in one,
4661 or else we wouldn't be here. */
4662 unsigned int tmp = b1 & b2 & b3 & b4;
4663 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4665 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4666 + (tmp == b3) + (tmp == b4);
4668 && (matching_bytes >= 3
4669 || (matching_bytes == 2
4670 && const_ok_for_op (remainder & ~tmp2, code))))
4672 /* At least 3 of the bytes match, and the fourth has at
4673 least as many bits set, or two of the bytes match
4674 and it will only require one more insn to finish. */
4682 /* Second, try to find a 16-bit replicated constant that can
4683 leave three of the bytes clear. If b2 or b4 is already
4684 zero, then we can. If the 8-bit from above would not
4685 clear b2 anyway, then we still win. */
4686 else if (b1 == b3 && (!b2 || !b4
4687 || (remainder & 0x00ff0000 & ~result)))
4689 result = remainder & 0xff00ff00;
4695 /* The 8-bit immediate already found clears b2 (and maybe b3)
4696 and we don't get here unless b1 is alredy clear, but it will
4697 leave b4 unchanged. */
4699 /* If we can clear b2 and b4 at once, then we win, since the
4700 8-bits couldn't possibly reach that far. */
4703 result = remainder & 0x00ff00ff;
4709 return_sequence->i[insns++] = result;
4710 remainder &= ~result;
4712 if (code == SET || code == MINUS)
4720 /* Emit an instruction with the indicated PATTERN. If COND is
4721 non-NULL, conditionalize the execution of the instruction on COND
4725 emit_constant_insn (rtx cond, rtx pattern)
4728 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4729 emit_insn (pattern);
4732 /* As above, but extra parameter GENERATE which, if clear, suppresses
4736 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4737 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4738 int subtargets, int generate)
4742 int final_invert = 0;
4744 int set_sign_bit_copies = 0;
4745 int clear_sign_bit_copies = 0;
4746 int clear_zero_bit_copies = 0;
4747 int set_zero_bit_copies = 0;
4748 int insns = 0, neg_insns, inv_insns;
4749 unsigned HOST_WIDE_INT temp1, temp2;
4750 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4751 struct four_ints *immediates;
4752 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4754 /* Find out which operations are safe for a given CODE. Also do a quick
4755 check for degenerate cases; these can occur when DImode operations
4768 if (remainder == 0xffffffff)
4771 emit_constant_insn (cond,
4772 gen_rtx_SET (target,
4773 GEN_INT (ARM_SIGN_EXTEND (val))));
4779 if (reload_completed && rtx_equal_p (target, source))
4783 emit_constant_insn (cond, gen_rtx_SET (target, source));
4792 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4795 if (remainder == 0xffffffff)
4797 if (reload_completed && rtx_equal_p (target, source))
4800 emit_constant_insn (cond, gen_rtx_SET (target, source));
4809 if (reload_completed && rtx_equal_p (target, source))
4812 emit_constant_insn (cond, gen_rtx_SET (target, source));
4816 if (remainder == 0xffffffff)
4819 emit_constant_insn (cond,
4820 gen_rtx_SET (target,
4821 gen_rtx_NOT (mode, source)));
4828 /* We treat MINUS as (val - source), since (source - val) is always
4829 passed as (source + (-val)). */
4833 emit_constant_insn (cond,
4834 gen_rtx_SET (target,
4835 gen_rtx_NEG (mode, source)));
4838 if (const_ok_for_arm (val))
4841 emit_constant_insn (cond,
4842 gen_rtx_SET (target,
4843 gen_rtx_MINUS (mode, GEN_INT (val),
4854 /* If we can do it in one insn get out quickly. */
4855 if (const_ok_for_op (val, code))
4858 emit_constant_insn (cond,
4859 gen_rtx_SET (target,
4861 ? gen_rtx_fmt_ee (code, mode, source,
4867 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4869 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4870 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4874 if (mode == SImode && i == 16)
4875 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4877 emit_constant_insn (cond,
4878 gen_zero_extendhisi2
4879 (target, gen_lowpart (HImode, source)));
4881 /* Extz only supports SImode, but we can coerce the operands
4883 emit_constant_insn (cond,
4884 gen_extzv_t2 (gen_lowpart (SImode, target),
4885 gen_lowpart (SImode, source),
4886 GEN_INT (i), const0_rtx));
4892 /* Calculate a few attributes that may be useful for specific
4894 /* Count number of leading zeros. */
4895 for (i = 31; i >= 0; i--)
4897 if ((remainder & (1 << i)) == 0)
4898 clear_sign_bit_copies++;
4903 /* Count number of leading 1's. */
4904 for (i = 31; i >= 0; i--)
4906 if ((remainder & (1 << i)) != 0)
4907 set_sign_bit_copies++;
4912 /* Count number of trailing zero's. */
4913 for (i = 0; i <= 31; i++)
4915 if ((remainder & (1 << i)) == 0)
4916 clear_zero_bit_copies++;
4921 /* Count number of trailing 1's. */
4922 for (i = 0; i <= 31; i++)
4924 if ((remainder & (1 << i)) != 0)
4925 set_zero_bit_copies++;
4933 /* See if we can do this by sign_extending a constant that is known
4934 to be negative. This is a good, way of doing it, since the shift
4935 may well merge into a subsequent insn. */
4936 if (set_sign_bit_copies > 1)
4938 if (const_ok_for_arm
4939 (temp1 = ARM_SIGN_EXTEND (remainder
4940 << (set_sign_bit_copies - 1))))
4944 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4945 emit_constant_insn (cond,
4946 gen_rtx_SET (new_src, GEN_INT (temp1)));
4947 emit_constant_insn (cond,
4948 gen_ashrsi3 (target, new_src,
4949 GEN_INT (set_sign_bit_copies - 1)));
4953 /* For an inverted constant, we will need to set the low bits,
4954 these will be shifted out of harm's way. */
4955 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4956 if (const_ok_for_arm (~temp1))
4960 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4961 emit_constant_insn (cond,
4962 gen_rtx_SET (new_src, GEN_INT (temp1)));
4963 emit_constant_insn (cond,
4964 gen_ashrsi3 (target, new_src,
4965 GEN_INT (set_sign_bit_copies - 1)));
4971 /* See if we can calculate the value as the difference between two
4972 valid immediates. */
4973 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4975 int topshift = clear_sign_bit_copies & ~1;
4977 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4978 & (0xff000000 >> topshift));
4980 /* If temp1 is zero, then that means the 9 most significant
4981 bits of remainder were 1 and we've caused it to overflow.
4982 When topshift is 0 we don't need to do anything since we
4983 can borrow from 'bit 32'. */
4984 if (temp1 == 0 && topshift != 0)
4985 temp1 = 0x80000000 >> (topshift - 1);
4987 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4989 if (const_ok_for_arm (temp2))
4993 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4994 emit_constant_insn (cond,
4995 gen_rtx_SET (new_src, GEN_INT (temp1)));
4996 emit_constant_insn (cond,
4997 gen_addsi3 (target, new_src,
5005 /* See if we can generate this by setting the bottom (or the top)
5006 16 bits, and then shifting these into the other half of the
5007 word. We only look for the simplest cases, to do more would cost
5008 too much. Be careful, however, not to generate this when the
5009 alternative would take fewer insns. */
5010 if (val & 0xffff0000)
5012 temp1 = remainder & 0xffff0000;
5013 temp2 = remainder & 0x0000ffff;
5015 /* Overlaps outside this range are best done using other methods. */
5016 for (i = 9; i < 24; i++)
5018 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5019 && !const_ok_for_arm (temp2))
5021 rtx new_src = (subtargets
5022 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5024 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5025 source, subtargets, generate);
5033 gen_rtx_ASHIFT (mode, source,
5040 /* Don't duplicate cases already considered. */
5041 for (i = 17; i < 24; i++)
5043 if (((temp1 | (temp1 >> i)) == remainder)
5044 && !const_ok_for_arm (temp1))
5046 rtx new_src = (subtargets
5047 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5049 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5050 source, subtargets, generate);
5055 gen_rtx_SET (target,
5058 gen_rtx_LSHIFTRT (mode, source,
5069 /* If we have IOR or XOR, and the constant can be loaded in a
5070 single instruction, and we can find a temporary to put it in,
5071 then this can be done in two instructions instead of 3-4. */
5073 /* TARGET can't be NULL if SUBTARGETS is 0 */
5074 || (reload_completed && !reg_mentioned_p (target, source)))
5076 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5080 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5082 emit_constant_insn (cond,
5083 gen_rtx_SET (sub, GEN_INT (val)));
5084 emit_constant_insn (cond,
5085 gen_rtx_SET (target,
5086 gen_rtx_fmt_ee (code, mode,
5097 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5098 and the remainder 0s for e.g. 0xfff00000)
5099 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5101 This can be done in 2 instructions by using shifts with mov or mvn.
5106 mvn r0, r0, lsr #12 */
5107 if (set_sign_bit_copies > 8
5108 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5112 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5113 rtx shift = GEN_INT (set_sign_bit_copies);
5119 gen_rtx_ASHIFT (mode,
5124 gen_rtx_SET (target,
5126 gen_rtx_LSHIFTRT (mode, sub,
5133 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5135 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5137 For eg. r0 = r0 | 0xfff
5142 if (set_zero_bit_copies > 8
5143 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5147 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5148 rtx shift = GEN_INT (set_zero_bit_copies);
5154 gen_rtx_LSHIFTRT (mode,
5159 gen_rtx_SET (target,
5161 gen_rtx_ASHIFT (mode, sub,
5167 /* This will never be reached for Thumb2 because orn is a valid
5168 instruction. This is for Thumb1 and the ARM 32 bit cases.
5170 x = y | constant (such that ~constant is a valid constant)
5172 x = ~(~y & ~constant).
5174 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5178 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5179 emit_constant_insn (cond,
5181 gen_rtx_NOT (mode, source)));
5184 sub = gen_reg_rtx (mode);
5185 emit_constant_insn (cond,
5187 gen_rtx_AND (mode, source,
5189 emit_constant_insn (cond,
5190 gen_rtx_SET (target,
5191 gen_rtx_NOT (mode, sub)));
5198 /* See if two shifts will do 2 or more insn's worth of work. */
5199 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5201 HOST_WIDE_INT shift_mask = ((0xffffffff
5202 << (32 - clear_sign_bit_copies))
5205 if ((remainder | shift_mask) != 0xffffffff)
5207 HOST_WIDE_INT new_val
5208 = ARM_SIGN_EXTEND (remainder | shift_mask);
5212 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5213 insns = arm_gen_constant (AND, SImode, cond, new_val,
5214 new_src, source, subtargets, 1);
5219 rtx targ = subtargets ? NULL_RTX : target;
5220 insns = arm_gen_constant (AND, mode, cond, new_val,
5221 targ, source, subtargets, 0);
5227 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5228 rtx shift = GEN_INT (clear_sign_bit_copies);
5230 emit_insn (gen_ashlsi3 (new_src, source, shift));
5231 emit_insn (gen_lshrsi3 (target, new_src, shift));
5237 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5239 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5241 if ((remainder | shift_mask) != 0xffffffff)
5243 HOST_WIDE_INT new_val
5244 = ARM_SIGN_EXTEND (remainder | shift_mask);
5247 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5249 insns = arm_gen_constant (AND, mode, cond, new_val,
5250 new_src, source, subtargets, 1);
5255 rtx targ = subtargets ? NULL_RTX : target;
5257 insns = arm_gen_constant (AND, mode, cond, new_val,
5258 targ, source, subtargets, 0);
5264 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5265 rtx shift = GEN_INT (clear_zero_bit_copies);
5267 emit_insn (gen_lshrsi3 (new_src, source, shift));
5268 emit_insn (gen_ashlsi3 (target, new_src, shift));
5280 /* Calculate what the instruction sequences would be if we generated it
5281 normally, negated, or inverted. */
5283 /* AND cannot be split into multiple insns, so invert and use BIC. */
5286 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5289 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5294 if (can_invert || final_invert)
5295 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5300 immediates = &pos_immediates;
5302 /* Is the negated immediate sequence more efficient? */
5303 if (neg_insns < insns && neg_insns <= inv_insns)
5306 immediates = &neg_immediates;
5311 /* Is the inverted immediate sequence more efficient?
5312 We must allow for an extra NOT instruction for XOR operations, although
5313 there is some chance that the final 'mvn' will get optimized later. */
5314 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5317 immediates = &inv_immediates;
5325 /* Now output the chosen sequence as instructions. */
5328 for (i = 0; i < insns; i++)
5330 rtx new_src, temp1_rtx;
5332 temp1 = immediates->i[i];
5334 if (code == SET || code == MINUS)
5335 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5336 else if ((final_invert || i < (insns - 1)) && subtargets)
5337 new_src = gen_reg_rtx (mode);
5343 else if (can_negate)
5346 temp1 = trunc_int_for_mode (temp1, mode);
5347 temp1_rtx = GEN_INT (temp1);
5351 else if (code == MINUS)
5352 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5354 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5356 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5361 can_negate = can_invert;
5365 else if (code == MINUS)
5373 emit_constant_insn (cond, gen_rtx_SET (target,
5374 gen_rtx_NOT (mode, source)));
5381 /* Return TRUE if op is a constant where both the low and top words are
5382 suitable for RSB/RSC instructions. This is never true for Thumb, since
5383 we do not have RSC in that case. */
5385 arm_const_double_prefer_rsbs_rsc (rtx op)
5387 /* Thumb lacks RSC, so we never prefer that sequence. */
5388 if (TARGET_THUMB || !CONST_INT_P (op))
5390 HOST_WIDE_INT hi, lo;
5391 lo = UINTVAL (op) & 0xffffffffULL;
5392 hi = UINTVAL (op) >> 32;
5393 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5396 /* Canonicalize a comparison so that we are more likely to recognize it.
5397 This can be done for a few constant compares, where we can make the
5398 immediate value easier to load. */
5401 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5402 bool op0_preserve_value)
5405 unsigned HOST_WIDE_INT i, maxval;
5407 mode = GET_MODE (*op0);
5408 if (mode == VOIDmode)
5409 mode = GET_MODE (*op1);
5411 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5413 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5414 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5415 either reversed or (for constant OP1) adjusted to GE/LT.
5416 Similarly for GTU/LEU in Thumb mode. */
5420 if (*code == GT || *code == LE
5421 || *code == GTU || *code == LEU)
5423 /* Missing comparison. First try to use an available
5425 if (CONST_INT_P (*op1))
5434 /* Try to convert to GE/LT, unless that would be more
5436 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5437 && arm_const_double_prefer_rsbs_rsc (*op1))
5439 *op1 = GEN_INT (i + 1);
5440 *code = *code == GT ? GE : LT;
5447 if (i != ~((unsigned HOST_WIDE_INT) 0))
5449 /* Try to convert to GEU/LTU, unless that would
5450 be more expensive. */
5451 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5452 && arm_const_double_prefer_rsbs_rsc (*op1))
5454 *op1 = GEN_INT (i + 1);
5455 *code = *code == GTU ? GEU : LTU;
5465 if (!op0_preserve_value)
5467 std::swap (*op0, *op1);
5468 *code = (int)swap_condition ((enum rtx_code)*code);
5474 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5475 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5476 to facilitate possible combining with a cmp into 'ands'. */
5478 && GET_CODE (*op0) == ZERO_EXTEND
5479 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5480 && GET_MODE (XEXP (*op0, 0)) == QImode
5481 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5482 && subreg_lowpart_p (XEXP (*op0, 0))
5483 && *op1 == const0_rtx)
5484 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5487 /* Comparisons smaller than DImode. Only adjust comparisons against
5488 an out-of-range constant. */
5489 if (!CONST_INT_P (*op1)
5490 || const_ok_for_arm (INTVAL (*op1))
5491 || const_ok_for_arm (- INTVAL (*op1)))
5505 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5507 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5508 *code = *code == GT ? GE : LT;
5516 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5518 *op1 = GEN_INT (i - 1);
5519 *code = *code == GE ? GT : LE;
5526 if (i != ~((unsigned HOST_WIDE_INT) 0)
5527 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5529 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5530 *code = *code == GTU ? GEU : LTU;
5538 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5540 *op1 = GEN_INT (i - 1);
5541 *code = *code == GEU ? GTU : LEU;
5552 /* Define how to find the value returned by a function. */
5555 arm_function_value(const_tree type, const_tree func,
5556 bool outgoing ATTRIBUTE_UNUSED)
5559 int unsignedp ATTRIBUTE_UNUSED;
5560 rtx r ATTRIBUTE_UNUSED;
5562 mode = TYPE_MODE (type);
5564 if (TARGET_AAPCS_BASED)
5565 return aapcs_allocate_return_reg (mode, type, func);
5567 /* Promote integer types. */
5568 if (INTEGRAL_TYPE_P (type))
5569 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5571 /* Promotes small structs returned in a register to full-word size
5572 for big-endian AAPCS. */
5573 if (arm_return_in_msb (type))
5575 HOST_WIDE_INT size = int_size_in_bytes (type);
5576 if (size % UNITS_PER_WORD != 0)
5578 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5579 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5583 return arm_libcall_value_1 (mode);
5586 /* libcall hashtable helpers. */
5588 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5590 static inline hashval_t hash (const rtx_def *);
5591 static inline bool equal (const rtx_def *, const rtx_def *);
5592 static inline void remove (rtx_def *);
5596 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5598 return rtx_equal_p (p1, p2);
5602 libcall_hasher::hash (const rtx_def *p1)
5604 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5607 typedef hash_table<libcall_hasher> libcall_table_type;
5610 add_libcall (libcall_table_type *htab, rtx libcall)
5612 *htab->find_slot (libcall, INSERT) = libcall;
5616 arm_libcall_uses_aapcs_base (const_rtx libcall)
5618 static bool init_done = false;
5619 static libcall_table_type *libcall_htab = NULL;
5625 libcall_htab = new libcall_table_type (31);
5626 add_libcall (libcall_htab,
5627 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5628 add_libcall (libcall_htab,
5629 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5630 add_libcall (libcall_htab,
5631 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5632 add_libcall (libcall_htab,
5633 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5635 add_libcall (libcall_htab,
5636 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5637 add_libcall (libcall_htab,
5638 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5639 add_libcall (libcall_htab,
5640 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5641 add_libcall (libcall_htab,
5642 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5644 add_libcall (libcall_htab,
5645 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5646 add_libcall (libcall_htab,
5647 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5648 add_libcall (libcall_htab,
5649 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5650 add_libcall (libcall_htab,
5651 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5652 add_libcall (libcall_htab,
5653 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5654 add_libcall (libcall_htab,
5655 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5656 add_libcall (libcall_htab,
5657 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5658 add_libcall (libcall_htab,
5659 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5661 /* Values from double-precision helper functions are returned in core
5662 registers if the selected core only supports single-precision
5663 arithmetic, even if we are using the hard-float ABI. The same is
5664 true for single-precision helpers, but we will never be using the
5665 hard-float ABI on a CPU which doesn't support single-precision
5666 operations in hardware. */
5667 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5668 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5669 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5670 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5671 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5672 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5673 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5674 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5675 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5676 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5677 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5678 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5680 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5682 add_libcall (libcall_htab,
5683 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5686 return libcall && libcall_htab->find (libcall) != NULL;
5690 arm_libcall_value_1 (machine_mode mode)
5692 if (TARGET_AAPCS_BASED)
5693 return aapcs_libcall_value (mode);
5694 else if (TARGET_IWMMXT_ABI
5695 && arm_vector_mode_supported_p (mode))
5696 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5698 return gen_rtx_REG (mode, ARG_REGISTER (1));
5701 /* Define how to find the value returned by a library function
5702 assuming the value has mode MODE. */
5705 arm_libcall_value (machine_mode mode, const_rtx libcall)
5707 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5708 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5710 /* The following libcalls return their result in integer registers,
5711 even though they return a floating point value. */
5712 if (arm_libcall_uses_aapcs_base (libcall))
5713 return gen_rtx_REG (mode, ARG_REGISTER(1));
5717 return arm_libcall_value_1 (mode);
5720 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5723 arm_function_value_regno_p (const unsigned int regno)
5725 if (regno == ARG_REGISTER (1)
5727 && TARGET_AAPCS_BASED
5728 && TARGET_HARD_FLOAT
5729 && regno == FIRST_VFP_REGNUM)
5730 || (TARGET_IWMMXT_ABI
5731 && regno == FIRST_IWMMXT_REGNUM))
5737 /* Determine the amount of memory needed to store the possible return
5738 registers of an untyped call. */
5740 arm_apply_result_size (void)
5746 if (TARGET_HARD_FLOAT_ABI)
5748 if (TARGET_IWMMXT_ABI)
5755 /* Decide whether TYPE should be returned in memory (true)
5756 or in a register (false). FNTYPE is the type of the function making
5759 arm_return_in_memory (const_tree type, const_tree fntype)
5763 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5765 if (TARGET_AAPCS_BASED)
5767 /* Simple, non-aggregate types (ie not including vectors and
5768 complex) are always returned in a register (or registers).
5769 We don't care about which register here, so we can short-cut
5770 some of the detail. */
5771 if (!AGGREGATE_TYPE_P (type)
5772 && TREE_CODE (type) != VECTOR_TYPE
5773 && TREE_CODE (type) != COMPLEX_TYPE)
5776 /* Any return value that is no larger than one word can be
5778 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5781 /* Check any available co-processors to see if they accept the
5782 type as a register candidate (VFP, for example, can return
5783 some aggregates in consecutive registers). These aren't
5784 available if the call is variadic. */
5785 if (aapcs_select_return_coproc (type, fntype) >= 0)
5788 /* Vector values should be returned using ARM registers, not
5789 memory (unless they're over 16 bytes, which will break since
5790 we only have four call-clobbered registers to play with). */
5791 if (TREE_CODE (type) == VECTOR_TYPE)
5792 return (size < 0 || size > (4 * UNITS_PER_WORD));
5794 /* The rest go in memory. */
5798 if (TREE_CODE (type) == VECTOR_TYPE)
5799 return (size < 0 || size > (4 * UNITS_PER_WORD));
5801 if (!AGGREGATE_TYPE_P (type) &&
5802 (TREE_CODE (type) != VECTOR_TYPE))
5803 /* All simple types are returned in registers. */
5806 if (arm_abi != ARM_ABI_APCS)
5808 /* ATPCS and later return aggregate types in memory only if they are
5809 larger than a word (or are variable size). */
5810 return (size < 0 || size > UNITS_PER_WORD);
5813 /* For the arm-wince targets we choose to be compatible with Microsoft's
5814 ARM and Thumb compilers, which always return aggregates in memory. */
5816 /* All structures/unions bigger than one word are returned in memory.
5817 Also catch the case where int_size_in_bytes returns -1. In this case
5818 the aggregate is either huge or of variable size, and in either case
5819 we will want to return it via memory and not in a register. */
5820 if (size < 0 || size > UNITS_PER_WORD)
5823 if (TREE_CODE (type) == RECORD_TYPE)
5827 /* For a struct the APCS says that we only return in a register
5828 if the type is 'integer like' and every addressable element
5829 has an offset of zero. For practical purposes this means
5830 that the structure can have at most one non bit-field element
5831 and that this element must be the first one in the structure. */
5833 /* Find the first field, ignoring non FIELD_DECL things which will
5834 have been created by C++. */
5835 for (field = TYPE_FIELDS (type);
5836 field && TREE_CODE (field) != FIELD_DECL;
5837 field = DECL_CHAIN (field))
5841 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5843 /* Check that the first field is valid for returning in a register. */
5845 /* ... Floats are not allowed */
5846 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5849 /* ... Aggregates that are not themselves valid for returning in
5850 a register are not allowed. */
5851 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5854 /* Now check the remaining fields, if any. Only bitfields are allowed,
5855 since they are not addressable. */
5856 for (field = DECL_CHAIN (field);
5858 field = DECL_CHAIN (field))
5860 if (TREE_CODE (field) != FIELD_DECL)
5863 if (!DECL_BIT_FIELD_TYPE (field))
5870 if (TREE_CODE (type) == UNION_TYPE)
5874 /* Unions can be returned in registers if every element is
5875 integral, or can be returned in an integer register. */
5876 for (field = TYPE_FIELDS (type);
5878 field = DECL_CHAIN (field))
5880 if (TREE_CODE (field) != FIELD_DECL)
5883 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5886 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5892 #endif /* not ARM_WINCE */
5894 /* Return all other types in memory. */
5898 const struct pcs_attribute_arg
5902 } pcs_attribute_args[] =
5904 {"aapcs", ARM_PCS_AAPCS},
5905 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5907 /* We could recognize these, but changes would be needed elsewhere
5908 * to implement them. */
5909 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5910 {"atpcs", ARM_PCS_ATPCS},
5911 {"apcs", ARM_PCS_APCS},
5913 {NULL, ARM_PCS_UNKNOWN}
5917 arm_pcs_from_attribute (tree attr)
5919 const struct pcs_attribute_arg *ptr;
5922 /* Get the value of the argument. */
5923 if (TREE_VALUE (attr) == NULL_TREE
5924 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5925 return ARM_PCS_UNKNOWN;
5927 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5929 /* Check it against the list of known arguments. */
5930 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5931 if (streq (arg, ptr->arg))
5934 /* An unrecognized interrupt type. */
5935 return ARM_PCS_UNKNOWN;
5938 /* Get the PCS variant to use for this call. TYPE is the function's type
5939 specification, DECL is the specific declartion. DECL may be null if
5940 the call could be indirect or if this is a library call. */
5942 arm_get_pcs_model (const_tree type, const_tree decl)
5944 bool user_convention = false;
5945 enum arm_pcs user_pcs = arm_pcs_default;
5950 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5953 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5954 user_convention = true;
5957 if (TARGET_AAPCS_BASED)
5959 /* Detect varargs functions. These always use the base rules
5960 (no argument is ever a candidate for a co-processor
5962 bool base_rules = stdarg_p (type);
5964 if (user_convention)
5966 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5967 sorry ("non-AAPCS derived PCS variant");
5968 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5969 error ("variadic functions must use the base AAPCS variant");
5973 return ARM_PCS_AAPCS;
5974 else if (user_convention)
5976 else if (decl && flag_unit_at_a_time)
5978 /* Local functions never leak outside this compilation unit,
5979 so we are free to use whatever conventions are
5981 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5982 cgraph_node *local_info_node
5983 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
5984 if (local_info_node && local_info_node->local)
5985 return ARM_PCS_AAPCS_LOCAL;
5988 else if (user_convention && user_pcs != arm_pcs_default)
5989 sorry ("PCS variant");
5991 /* For everything else we use the target's default. */
5992 return arm_pcs_default;
5997 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5998 const_tree fntype ATTRIBUTE_UNUSED,
5999 rtx libcall ATTRIBUTE_UNUSED,
6000 const_tree fndecl ATTRIBUTE_UNUSED)
6002 /* Record the unallocated VFP registers. */
6003 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6004 pcum->aapcs_vfp_reg_alloc = 0;
6007 /* Walk down the type tree of TYPE counting consecutive base elements.
6008 If *MODEP is VOIDmode, then set it to the first valid floating point
6009 type. If a non-floating point type is found, or if a floating point
6010 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6011 otherwise return the count in the sub-tree. */
6013 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
6018 switch (TREE_CODE (type))
6021 mode = TYPE_MODE (type);
6022 if (mode != DFmode && mode != SFmode && mode != HFmode)
6025 if (*modep == VOIDmode)
6034 mode = TYPE_MODE (TREE_TYPE (type));
6035 if (mode != DFmode && mode != SFmode)
6038 if (*modep == VOIDmode)
6047 /* Use V2SImode and V4SImode as representatives of all 64-bit
6048 and 128-bit vector types, whether or not those modes are
6049 supported with the present options. */
6050 size = int_size_in_bytes (type);
6063 if (*modep == VOIDmode)
6066 /* Vector modes are considered to be opaque: two vectors are
6067 equivalent for the purposes of being homogeneous aggregates
6068 if they are the same size. */
6077 tree index = TYPE_DOMAIN (type);
6079 /* Can't handle incomplete types nor sizes that are not
6081 if (!COMPLETE_TYPE_P (type)
6082 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6085 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6088 || !TYPE_MAX_VALUE (index)
6089 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6090 || !TYPE_MIN_VALUE (index)
6091 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6095 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6096 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6098 /* There must be no padding. */
6099 if (wi::to_wide (TYPE_SIZE (type))
6100 != count * GET_MODE_BITSIZE (*modep))
6112 /* Can't handle incomplete types nor sizes that are not
6114 if (!COMPLETE_TYPE_P (type)
6115 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6118 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6120 if (TREE_CODE (field) != FIELD_DECL)
6123 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6129 /* There must be no padding. */
6130 if (wi::to_wide (TYPE_SIZE (type))
6131 != count * GET_MODE_BITSIZE (*modep))
6138 case QUAL_UNION_TYPE:
6140 /* These aren't very interesting except in a degenerate case. */
6145 /* Can't handle incomplete types nor sizes that are not
6147 if (!COMPLETE_TYPE_P (type)
6148 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6151 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6153 if (TREE_CODE (field) != FIELD_DECL)
6156 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6159 count = count > sub_count ? count : sub_count;
6162 /* There must be no padding. */
6163 if (wi::to_wide (TYPE_SIZE (type))
6164 != count * GET_MODE_BITSIZE (*modep))
6177 /* Return true if PCS_VARIANT should use VFP registers. */
6179 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6181 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6183 static bool seen_thumb1_vfp = false;
6185 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6187 sorry ("Thumb-1 hard-float VFP ABI");
6188 /* sorry() is not immediately fatal, so only display this once. */
6189 seen_thumb1_vfp = true;
6195 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6198 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6199 (TARGET_VFP_DOUBLE || !is_double));
6202 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6203 suitable for passing or returning in VFP registers for the PCS
6204 variant selected. If it is, then *BASE_MODE is updated to contain
6205 a machine mode describing each element of the argument's type and
6206 *COUNT to hold the number of such elements. */
6208 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6209 machine_mode mode, const_tree type,
6210 machine_mode *base_mode, int *count)
6212 machine_mode new_mode = VOIDmode;
6214 /* If we have the type information, prefer that to working things
6215 out from the mode. */
6218 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6220 if (ag_count > 0 && ag_count <= 4)
6225 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6226 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6227 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6232 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6235 new_mode = (mode == DCmode ? DFmode : SFmode);
6241 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6244 *base_mode = new_mode;
6246 if (TARGET_GENERAL_REGS_ONLY)
6247 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6254 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6255 machine_mode mode, const_tree type)
6257 int count ATTRIBUTE_UNUSED;
6258 machine_mode ag_mode ATTRIBUTE_UNUSED;
6260 if (!use_vfp_abi (pcs_variant, false))
6262 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6267 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6270 if (!use_vfp_abi (pcum->pcs_variant, false))
6273 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6274 &pcum->aapcs_vfp_rmode,
6275 &pcum->aapcs_vfp_rcount);
6278 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6279 for the behaviour of this function. */
6282 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6283 const_tree type ATTRIBUTE_UNUSED)
6286 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6287 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6288 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6291 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6292 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6294 pcum->aapcs_vfp_reg_alloc = mask << regno;
6296 || (mode == TImode && ! TARGET_NEON)
6297 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6300 int rcount = pcum->aapcs_vfp_rcount;
6302 machine_mode rmode = pcum->aapcs_vfp_rmode;
6306 /* Avoid using unsupported vector modes. */
6307 if (rmode == V2SImode)
6309 else if (rmode == V4SImode)
6316 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6317 for (i = 0; i < rcount; i++)
6319 rtx tmp = gen_rtx_REG (rmode,
6320 FIRST_VFP_REGNUM + regno + i * rshift);
6321 tmp = gen_rtx_EXPR_LIST
6323 GEN_INT (i * GET_MODE_SIZE (rmode)));
6324 XVECEXP (par, 0, i) = tmp;
6327 pcum->aapcs_reg = par;
6330 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6336 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6337 comment there for the behaviour of this function. */
6340 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6342 const_tree type ATTRIBUTE_UNUSED)
6344 if (!use_vfp_abi (pcs_variant, false))
6348 || (GET_MODE_CLASS (mode) == MODE_INT
6349 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6353 machine_mode ag_mode;
6358 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6363 if (ag_mode == V2SImode)
6365 else if (ag_mode == V4SImode)
6371 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6372 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6373 for (i = 0; i < count; i++)
6375 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6376 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6377 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6378 XVECEXP (par, 0, i) = tmp;
6384 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6388 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6389 machine_mode mode ATTRIBUTE_UNUSED,
6390 const_tree type ATTRIBUTE_UNUSED)
6392 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6393 pcum->aapcs_vfp_reg_alloc = 0;
6397 #define AAPCS_CP(X) \
6399 aapcs_ ## X ## _cum_init, \
6400 aapcs_ ## X ## _is_call_candidate, \
6401 aapcs_ ## X ## _allocate, \
6402 aapcs_ ## X ## _is_return_candidate, \
6403 aapcs_ ## X ## _allocate_return_reg, \
6404 aapcs_ ## X ## _advance \
6407 /* Table of co-processors that can be used to pass arguments in
6408 registers. Idealy no arugment should be a candidate for more than
6409 one co-processor table entry, but the table is processed in order
6410 and stops after the first match. If that entry then fails to put
6411 the argument into a co-processor register, the argument will go on
6415 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6416 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6418 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6419 BLKmode) is a candidate for this co-processor's registers; this
6420 function should ignore any position-dependent state in
6421 CUMULATIVE_ARGS and only use call-type dependent information. */
6422 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6424 /* Return true if the argument does get a co-processor register; it
6425 should set aapcs_reg to an RTX of the register allocated as is
6426 required for a return from FUNCTION_ARG. */
6427 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6429 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6430 be returned in this co-processor's registers. */
6431 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6433 /* Allocate and return an RTX element to hold the return type of a call. This
6434 routine must not fail and will only be called if is_return_candidate
6435 returned true with the same parameters. */
6436 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6438 /* Finish processing this argument and prepare to start processing
6440 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6441 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6449 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6454 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6455 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6462 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6464 /* We aren't passed a decl, so we can't check that a call is local.
6465 However, it isn't clear that that would be a win anyway, since it
6466 might limit some tail-calling opportunities. */
6467 enum arm_pcs pcs_variant;
6471 const_tree fndecl = NULL_TREE;
6473 if (TREE_CODE (fntype) == FUNCTION_DECL)
6476 fntype = TREE_TYPE (fntype);
6479 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6482 pcs_variant = arm_pcs_default;
6484 if (pcs_variant != ARM_PCS_AAPCS)
6488 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6489 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6498 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6501 /* We aren't passed a decl, so we can't check that a call is local.
6502 However, it isn't clear that that would be a win anyway, since it
6503 might limit some tail-calling opportunities. */
6504 enum arm_pcs pcs_variant;
6505 int unsignedp ATTRIBUTE_UNUSED;
6509 const_tree fndecl = NULL_TREE;
6511 if (TREE_CODE (fntype) == FUNCTION_DECL)
6514 fntype = TREE_TYPE (fntype);
6517 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6520 pcs_variant = arm_pcs_default;
6522 /* Promote integer types. */
6523 if (type && INTEGRAL_TYPE_P (type))
6524 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6526 if (pcs_variant != ARM_PCS_AAPCS)
6530 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6531 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6533 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6537 /* Promotes small structs returned in a register to full-word size
6538 for big-endian AAPCS. */
6539 if (type && arm_return_in_msb (type))
6541 HOST_WIDE_INT size = int_size_in_bytes (type);
6542 if (size % UNITS_PER_WORD != 0)
6544 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6545 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6549 return gen_rtx_REG (mode, R0_REGNUM);
6553 aapcs_libcall_value (machine_mode mode)
6555 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6556 && GET_MODE_SIZE (mode) <= 4)
6559 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6562 /* Lay out a function argument using the AAPCS rules. The rule
6563 numbers referred to here are those in the AAPCS. */
6565 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6566 const_tree type, bool named)
6571 /* We only need to do this once per argument. */
6572 if (pcum->aapcs_arg_processed)
6575 pcum->aapcs_arg_processed = true;
6577 /* Special case: if named is false then we are handling an incoming
6578 anonymous argument which is on the stack. */
6582 /* Is this a potential co-processor register candidate? */
6583 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6585 int slot = aapcs_select_call_coproc (pcum, mode, type);
6586 pcum->aapcs_cprc_slot = slot;
6588 /* We don't have to apply any of the rules from part B of the
6589 preparation phase, these are handled elsewhere in the
6594 /* A Co-processor register candidate goes either in its own
6595 class of registers or on the stack. */
6596 if (!pcum->aapcs_cprc_failed[slot])
6598 /* C1.cp - Try to allocate the argument to co-processor
6600 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6603 /* C2.cp - Put the argument on the stack and note that we
6604 can't assign any more candidates in this slot. We also
6605 need to note that we have allocated stack space, so that
6606 we won't later try to split a non-cprc candidate between
6607 core registers and the stack. */
6608 pcum->aapcs_cprc_failed[slot] = true;
6609 pcum->can_split = false;
6612 /* We didn't get a register, so this argument goes on the
6614 gcc_assert (pcum->can_split == false);
6619 /* C3 - For double-word aligned arguments, round the NCRN up to the
6620 next even number. */
6621 ncrn = pcum->aapcs_ncrn;
6624 int res = arm_needs_doubleword_align (mode, type);
6625 /* Only warn during RTL expansion of call stmts, otherwise we would
6626 warn e.g. during gimplification even on functions that will be
6627 always inlined, and we'd warn multiple times. Don't warn when
6628 called in expand_function_start either, as we warn instead in
6629 arm_function_arg_boundary in that case. */
6630 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6631 inform (input_location, "parameter passing for argument of type "
6632 "%qT changed in GCC 7.1", type);
6637 nregs = ARM_NUM_REGS2(mode, type);
6639 /* Sigh, this test should really assert that nregs > 0, but a GCC
6640 extension allows empty structs and then gives them empty size; it
6641 then allows such a structure to be passed by value. For some of
6642 the code below we have to pretend that such an argument has
6643 non-zero size so that we 'locate' it correctly either in
6644 registers or on the stack. */
6645 gcc_assert (nregs >= 0);
6647 nregs2 = nregs ? nregs : 1;
6649 /* C4 - Argument fits entirely in core registers. */
6650 if (ncrn + nregs2 <= NUM_ARG_REGS)
6652 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6653 pcum->aapcs_next_ncrn = ncrn + nregs;
6657 /* C5 - Some core registers left and there are no arguments already
6658 on the stack: split this argument between the remaining core
6659 registers and the stack. */
6660 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6662 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6663 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6664 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6668 /* C6 - NCRN is set to 4. */
6669 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6671 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6675 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6676 for a call to a function whose data type is FNTYPE.
6677 For a library call, FNTYPE is NULL. */
6679 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6681 tree fndecl ATTRIBUTE_UNUSED)
6683 /* Long call handling. */
6685 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6687 pcum->pcs_variant = arm_pcs_default;
6689 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6691 if (arm_libcall_uses_aapcs_base (libname))
6692 pcum->pcs_variant = ARM_PCS_AAPCS;
6694 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6695 pcum->aapcs_reg = NULL_RTX;
6696 pcum->aapcs_partial = 0;
6697 pcum->aapcs_arg_processed = false;
6698 pcum->aapcs_cprc_slot = -1;
6699 pcum->can_split = true;
6701 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6705 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6707 pcum->aapcs_cprc_failed[i] = false;
6708 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6716 /* On the ARM, the offset starts at 0. */
6718 pcum->iwmmxt_nregs = 0;
6719 pcum->can_split = true;
6721 /* Varargs vectors are treated the same as long long.
6722 named_count avoids having to change the way arm handles 'named' */
6723 pcum->named_count = 0;
6726 if (TARGET_REALLY_IWMMXT && fntype)
6730 for (fn_arg = TYPE_ARG_TYPES (fntype);
6732 fn_arg = TREE_CHAIN (fn_arg))
6733 pcum->named_count += 1;
6735 if (! pcum->named_count)
6736 pcum->named_count = INT_MAX;
6740 /* Return 2 if double word alignment is required for argument passing,
6741 but wasn't required before the fix for PR88469.
6742 Return 1 if double word alignment is required for argument passing.
6743 Return -1 if double word alignment used to be required for argument
6744 passing before PR77728 ABI fix, but is not required anymore.
6745 Return 0 if double word alignment is not required and wasn't requried
6748 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6751 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6753 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6754 if (!AGGREGATE_TYPE_P (type))
6755 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6757 /* Array types: Use member alignment of element type. */
6758 if (TREE_CODE (type) == ARRAY_TYPE)
6759 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6763 /* Record/aggregate types: Use greatest member alignment of any member. */
6764 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6765 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6767 if (TREE_CODE (field) == FIELD_DECL)
6770 /* Before PR77728 fix, we were incorrectly considering also
6771 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6772 Make sure we can warn about that with -Wpsabi. */
6775 else if (TREE_CODE (field) == FIELD_DECL
6776 && DECL_BIT_FIELD_TYPE (field)
6777 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6787 /* Determine where to put an argument to a function.
6788 Value is zero to push the argument on the stack,
6789 or a hard register in which to store the argument.
6791 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6792 the preceding args and about the function being called.
6793 ARG is a description of the argument.
6795 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6796 other arguments are passed on the stack. If (NAMED == 0) (which happens
6797 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6798 defined), say it is passed in the stack (function_prologue will
6799 indeed make it pass in the stack if necessary). */
6802 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
6804 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6807 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6808 a call insn (op3 of a call_value insn). */
6809 if (arg.end_marker_p ())
6812 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6814 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6815 return pcum->aapcs_reg;
6818 /* Varargs vectors are treated the same as long long.
6819 named_count avoids having to change the way arm handles 'named' */
6820 if (TARGET_IWMMXT_ABI
6821 && arm_vector_mode_supported_p (arg.mode)
6822 && pcum->named_count > pcum->nargs + 1)
6824 if (pcum->iwmmxt_nregs <= 9)
6825 return gen_rtx_REG (arg.mode,
6826 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6829 pcum->can_split = false;
6834 /* Put doubleword aligned quantities in even register pairs. */
6835 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6837 int res = arm_needs_doubleword_align (arg.mode, arg.type);
6838 if (res < 0 && warn_psabi)
6839 inform (input_location, "parameter passing for argument of type "
6840 "%qT changed in GCC 7.1", arg.type);
6844 if (res > 1 && warn_psabi)
6845 inform (input_location, "parameter passing for argument of type "
6846 "%qT changed in GCC 9.1", arg.type);
6850 /* Only allow splitting an arg between regs and memory if all preceding
6851 args were allocated to regs. For args passed by reference we only count
6852 the reference pointer. */
6853 if (pcum->can_split)
6856 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
6858 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
6861 return gen_rtx_REG (arg.mode, pcum->nregs);
6865 arm_function_arg_boundary (machine_mode mode, const_tree type)
6867 if (!ARM_DOUBLEWORD_ALIGN)
6868 return PARM_BOUNDARY;
6870 int res = arm_needs_doubleword_align (mode, type);
6871 if (res < 0 && warn_psabi)
6872 inform (input_location, "parameter passing for argument of type %qT "
6873 "changed in GCC 7.1", type);
6874 if (res > 1 && warn_psabi)
6875 inform (input_location, "parameter passing for argument of type "
6876 "%qT changed in GCC 9.1", type);
6878 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6882 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6884 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6885 int nregs = pcum->nregs;
6887 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6889 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6890 return pcum->aapcs_partial;
6893 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6896 if (NUM_ARG_REGS > nregs
6897 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6899 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6904 /* Update the data in PCUM to advance over argument ARG. */
6907 arm_function_arg_advance (cumulative_args_t pcum_v,
6908 const function_arg_info &arg)
6910 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6912 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6914 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6916 if (pcum->aapcs_cprc_slot >= 0)
6918 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
6920 pcum->aapcs_cprc_slot = -1;
6923 /* Generic stuff. */
6924 pcum->aapcs_arg_processed = false;
6925 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6926 pcum->aapcs_reg = NULL_RTX;
6927 pcum->aapcs_partial = 0;
6932 if (arm_vector_mode_supported_p (arg.mode)
6933 && pcum->named_count > pcum->nargs
6934 && TARGET_IWMMXT_ABI)
6935 pcum->iwmmxt_nregs += 1;
6937 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
6941 /* Variable sized types are passed by reference. This is a GCC
6942 extension to the ARM ABI. */
6945 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6947 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6950 /* Encode the current state of the #pragma [no_]long_calls. */
6953 OFF, /* No #pragma [no_]long_calls is in effect. */
6954 LONG, /* #pragma long_calls is in effect. */
6955 SHORT /* #pragma no_long_calls is in effect. */
6958 static arm_pragma_enum arm_pragma_long_calls = OFF;
6961 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6963 arm_pragma_long_calls = LONG;
6967 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6969 arm_pragma_long_calls = SHORT;
6973 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6975 arm_pragma_long_calls = OFF;
6978 /* Handle an attribute requiring a FUNCTION_DECL;
6979 arguments as in struct attribute_spec.handler. */
6981 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6982 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6984 if (TREE_CODE (*node) != FUNCTION_DECL)
6986 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6988 *no_add_attrs = true;
6994 /* Handle an "interrupt" or "isr" attribute;
6995 arguments as in struct attribute_spec.handler. */
6997 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7002 if (TREE_CODE (*node) != FUNCTION_DECL)
7004 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7006 *no_add_attrs = true;
7008 /* FIXME: the argument if any is checked for type attributes;
7009 should it be checked for decl ones? */
7013 if (TREE_CODE (*node) == FUNCTION_TYPE
7014 || TREE_CODE (*node) == METHOD_TYPE)
7016 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7018 warning (OPT_Wattributes, "%qE attribute ignored",
7020 *no_add_attrs = true;
7023 else if (TREE_CODE (*node) == POINTER_TYPE
7024 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7025 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7026 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7028 *node = build_variant_type_copy (*node);
7029 TREE_TYPE (*node) = build_type_attribute_variant
7031 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7032 *no_add_attrs = true;
7036 /* Possibly pass this attribute on from the type to a decl. */
7037 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7038 | (int) ATTR_FLAG_FUNCTION_NEXT
7039 | (int) ATTR_FLAG_ARRAY_NEXT))
7041 *no_add_attrs = true;
7042 return tree_cons (name, args, NULL_TREE);
7046 warning (OPT_Wattributes, "%qE attribute ignored",
7055 /* Handle a "pcs" attribute; arguments as in struct
7056 attribute_spec.handler. */
7058 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7059 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7061 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7063 warning (OPT_Wattributes, "%qE attribute ignored", name);
7064 *no_add_attrs = true;
7069 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7070 /* Handle the "notshared" attribute. This attribute is another way of
7071 requesting hidden visibility. ARM's compiler supports
7072 "__declspec(notshared)"; we support the same thing via an
7076 arm_handle_notshared_attribute (tree *node,
7077 tree name ATTRIBUTE_UNUSED,
7078 tree args ATTRIBUTE_UNUSED,
7079 int flags ATTRIBUTE_UNUSED,
7082 tree decl = TYPE_NAME (*node);
7086 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7087 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7088 *no_add_attrs = false;
7094 /* This function returns true if a function with declaration FNDECL and type
7095 FNTYPE uses the stack to pass arguments or return variables and false
7096 otherwise. This is used for functions with the attributes
7097 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7098 diagnostic messages if the stack is used. NAME is the name of the attribute
7102 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7104 function_args_iterator args_iter;
7105 CUMULATIVE_ARGS args_so_far_v;
7106 cumulative_args_t args_so_far;
7107 bool first_param = true;
7108 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7110 /* Error out if any argument is passed on the stack. */
7111 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7112 args_so_far = pack_cumulative_args (&args_so_far_v);
7113 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7117 prev_arg_type = arg_type;
7118 if (VOID_TYPE_P (arg_type))
7121 function_arg_info arg (arg_type, /*named=*/true);
7123 /* ??? We should advance after processing the argument and pass
7124 the argument we're advancing past. */
7125 arm_function_arg_advance (args_so_far, arg);
7126 arg_rtx = arm_function_arg (args_so_far, arg);
7127 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7129 error ("%qE attribute not available to functions with arguments "
7130 "passed on the stack", name);
7133 first_param = false;
7136 /* Error out for variadic functions since we cannot control how many
7137 arguments will be passed and thus stack could be used. stdarg_p () is not
7138 used for the checking to avoid browsing arguments twice. */
7139 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7141 error ("%qE attribute not available to functions with variable number "
7142 "of arguments", name);
7146 /* Error out if return value is passed on the stack. */
7147 ret_type = TREE_TYPE (fntype);
7148 if (arm_return_in_memory (ret_type, fntype))
7150 error ("%qE attribute not available to functions that return value on "
7157 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7158 function will check whether the attribute is allowed here and will add the
7159 attribute to the function declaration tree or otherwise issue a warning. */
7162 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7171 *no_add_attrs = true;
7172 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7177 /* Ignore attribute for function types. */
7178 if (TREE_CODE (*node) != FUNCTION_DECL)
7180 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7182 *no_add_attrs = true;
7188 /* Warn for static linkage functions. */
7189 if (!TREE_PUBLIC (fndecl))
7191 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7192 "with static linkage", name);
7193 *no_add_attrs = true;
7197 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7198 TREE_TYPE (fndecl));
7203 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7204 function will check whether the attribute is allowed here and will add the
7205 attribute to the function type tree or otherwise issue a diagnostic. The
7206 reason we check this at declaration time is to only allow the use of the
7207 attribute with declarations of function pointers and not function
7208 declarations. This function checks NODE is of the expected type and issues
7209 diagnostics otherwise using NAME. If it is not of the expected type
7210 *NO_ADD_ATTRS will be set to true. */
7213 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7218 tree decl = NULL_TREE, fntype = NULL_TREE;
7223 *no_add_attrs = true;
7224 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7229 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7232 fntype = TREE_TYPE (decl);
7235 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7236 fntype = TREE_TYPE (fntype);
7238 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7240 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7241 "function pointer", name);
7242 *no_add_attrs = true;
7246 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7251 /* Prevent trees being shared among function types with and without
7252 cmse_nonsecure_call attribute. */
7253 type = TREE_TYPE (decl);
7255 type = build_distinct_type_copy (type);
7256 TREE_TYPE (decl) = type;
7259 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7262 fntype = TREE_TYPE (fntype);
7263 fntype = build_distinct_type_copy (fntype);
7264 TREE_TYPE (type) = fntype;
7267 /* Construct a type attribute and add it to the function type. */
7268 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7269 TYPE_ATTRIBUTES (fntype));
7270 TYPE_ATTRIBUTES (fntype) = attrs;
7274 /* Return 0 if the attributes for two types are incompatible, 1 if they
7275 are compatible, and 2 if they are nearly compatible (which causes a
7276 warning to be generated). */
7278 arm_comp_type_attributes (const_tree type1, const_tree type2)
7282 /* Check for mismatch of non-default calling convention. */
7283 if (TREE_CODE (type1) != FUNCTION_TYPE)
7286 /* Check for mismatched call attributes. */
7287 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7288 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7289 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7290 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7292 /* Only bother to check if an attribute is defined. */
7293 if (l1 | l2 | s1 | s2)
7295 /* If one type has an attribute, the other must have the same attribute. */
7296 if ((l1 != l2) || (s1 != s2))
7299 /* Disallow mixed attributes. */
7300 if ((l1 & s2) || (l2 & s1))
7304 /* Check for mismatched ISR attribute. */
7305 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7307 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7308 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7310 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7314 l1 = lookup_attribute ("cmse_nonsecure_call",
7315 TYPE_ATTRIBUTES (type1)) != NULL;
7316 l2 = lookup_attribute ("cmse_nonsecure_call",
7317 TYPE_ATTRIBUTES (type2)) != NULL;
7325 /* Assigns default attributes to newly defined type. This is used to
7326 set short_call/long_call attributes for function types of
7327 functions defined inside corresponding #pragma scopes. */
7329 arm_set_default_type_attributes (tree type)
7331 /* Add __attribute__ ((long_call)) to all functions, when
7332 inside #pragma long_calls or __attribute__ ((short_call)),
7333 when inside #pragma no_long_calls. */
7334 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7336 tree type_attr_list, attr_name;
7337 type_attr_list = TYPE_ATTRIBUTES (type);
7339 if (arm_pragma_long_calls == LONG)
7340 attr_name = get_identifier ("long_call");
7341 else if (arm_pragma_long_calls == SHORT)
7342 attr_name = get_identifier ("short_call");
7346 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7347 TYPE_ATTRIBUTES (type) = type_attr_list;
7351 /* Return true if DECL is known to be linked into section SECTION. */
7354 arm_function_in_section_p (tree decl, section *section)
7356 /* We can only be certain about the prevailing symbol definition. */
7357 if (!decl_binds_to_current_def_p (decl))
7360 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7361 if (!DECL_SECTION_NAME (decl))
7363 /* Make sure that we will not create a unique section for DECL. */
7364 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7368 return function_section (decl) == section;
7371 /* Return nonzero if a 32-bit "long_call" should be generated for
7372 a call from the current function to DECL. We generate a long_call
7375 a. has an __attribute__((long call))
7376 or b. is within the scope of a #pragma long_calls
7377 or c. the -mlong-calls command line switch has been specified
7379 However we do not generate a long call if the function:
7381 d. has an __attribute__ ((short_call))
7382 or e. is inside the scope of a #pragma no_long_calls
7383 or f. is defined in the same section as the current function. */
7386 arm_is_long_call_p (tree decl)
7391 return TARGET_LONG_CALLS;
7393 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7394 if (lookup_attribute ("short_call", attrs))
7397 /* For "f", be conservative, and only cater for cases in which the
7398 whole of the current function is placed in the same section. */
7399 if (!flag_reorder_blocks_and_partition
7400 && TREE_CODE (decl) == FUNCTION_DECL
7401 && arm_function_in_section_p (decl, current_function_section ()))
7404 if (lookup_attribute ("long_call", attrs))
7407 return TARGET_LONG_CALLS;
7410 /* Return nonzero if it is ok to make a tail-call to DECL. */
7412 arm_function_ok_for_sibcall (tree decl, tree exp)
7414 unsigned long func_type;
7416 if (cfun->machine->sibcall_blocked)
7421 /* In FDPIC, never tailcall something for which we have no decl:
7422 the target function could be in a different module, requiring
7423 a different FDPIC register value. */
7428 /* Never tailcall something if we are generating code for Thumb-1. */
7432 /* The PIC register is live on entry to VxWorks PLT entries, so we
7433 must make the call before restoring the PIC register. */
7434 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7437 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7438 may be used both as target of the call and base register for restoring
7439 the VFP registers */
7440 if (TARGET_APCS_FRAME && TARGET_ARM
7441 && TARGET_HARD_FLOAT
7442 && decl && arm_is_long_call_p (decl))
7445 /* If we are interworking and the function is not declared static
7446 then we can't tail-call it unless we know that it exists in this
7447 compilation unit (since it might be a Thumb routine). */
7448 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7449 && !TREE_ASM_WRITTEN (decl))
7452 func_type = arm_current_func_type ();
7453 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7454 if (IS_INTERRUPT (func_type))
7457 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7458 generated for entry functions themselves. */
7459 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7462 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7463 this would complicate matters for later code generation. */
7464 if (TREE_CODE (exp) == CALL_EXPR)
7466 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7467 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7471 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7473 /* Check that the return value locations are the same. For
7474 example that we aren't returning a value from the sibling in
7475 a VFP register but then need to transfer it to a core
7478 tree decl_or_type = decl;
7480 /* If it is an indirect function pointer, get the function type. */
7482 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7484 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7485 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7487 if (!rtx_equal_p (a, b))
7491 /* Never tailcall if function may be called with a misaligned SP. */
7492 if (IS_STACKALIGN (func_type))
7495 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7496 references should become a NOP. Don't convert such calls into
7498 if (TARGET_AAPCS_BASED
7499 && arm_abi == ARM_ABI_AAPCS
7501 && DECL_WEAK (decl))
7504 /* We cannot do a tailcall for an indirect call by descriptor if all the
7505 argument registers are used because the only register left to load the
7506 address is IP and it will already contain the static chain. */
7507 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7509 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7510 CUMULATIVE_ARGS cum;
7511 cumulative_args_t cum_v;
7513 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7514 cum_v = pack_cumulative_args (&cum);
7516 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7518 tree type = TREE_VALUE (t);
7519 if (!VOID_TYPE_P (type))
7521 function_arg_info arg (type, /*named=*/true);
7522 arm_function_arg_advance (cum_v, arg);
7526 function_arg_info arg (integer_type_node, /*named=*/true);
7527 if (!arm_function_arg (cum_v, arg))
7531 /* Everything else is ok. */
7536 /* Addressing mode support functions. */
7538 /* Return nonzero if X is a legitimate immediate operand when compiling
7539 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7541 legitimate_pic_operand_p (rtx x)
7543 if (GET_CODE (x) == SYMBOL_REF
7544 || (GET_CODE (x) == CONST
7545 && GET_CODE (XEXP (x, 0)) == PLUS
7546 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7552 /* Record that the current function needs a PIC register. If PIC_REG is null,
7553 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7554 both case cfun->machine->pic_reg is initialized if we have not already done
7555 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7556 PIC register is reloaded in the current position of the instruction stream
7557 irregardless of whether it was loaded before. Otherwise, it is only loaded
7558 if not already done so (crtl->uses_pic_offset_table is null). Note that
7559 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7560 is only supported iff COMPUTE_NOW is false. */
7563 require_pic_register (rtx pic_reg, bool compute_now)
7565 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7567 /* A lot of the logic here is made obscure by the fact that this
7568 routine gets called as part of the rtx cost estimation process.
7569 We don't want those calls to affect any assumptions about the real
7570 function; and further, we can't call entry_of_function() until we
7571 start the real expansion process. */
7572 if (!crtl->uses_pic_offset_table || compute_now)
7574 gcc_assert (can_create_pseudo_p ()
7575 || (pic_reg != NULL_RTX
7577 && GET_MODE (pic_reg) == Pmode));
7578 if (arm_pic_register != INVALID_REGNUM
7580 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7582 if (!cfun->machine->pic_reg)
7583 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7585 /* Play games to avoid marking the function as needing pic
7586 if we are being called as part of the cost-estimation
7588 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7589 crtl->uses_pic_offset_table = 1;
7593 rtx_insn *seq, *insn;
7595 if (pic_reg == NULL_RTX)
7596 pic_reg = gen_reg_rtx (Pmode);
7597 if (!cfun->machine->pic_reg)
7598 cfun->machine->pic_reg = pic_reg;
7600 /* Play games to avoid marking the function as needing pic
7601 if we are being called as part of the cost-estimation
7603 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7605 crtl->uses_pic_offset_table = 1;
7608 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7609 && arm_pic_register > LAST_LO_REGNUM
7611 emit_move_insn (cfun->machine->pic_reg,
7612 gen_rtx_REG (Pmode, arm_pic_register));
7614 arm_load_pic_register (0UL, pic_reg);
7619 for (insn = seq; insn; insn = NEXT_INSN (insn))
7621 INSN_LOCATION (insn) = prologue_location;
7623 /* We can be called during expansion of PHI nodes, where
7624 we can't yet emit instructions directly in the final
7625 insn stream. Queue the insns on the entry edge, they will
7626 be committed after everything else is expanded. */
7627 if (currently_expanding_to_rtl)
7628 insert_insn_on_edge (seq,
7630 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7638 /* Generate insns to calculate the address of ORIG in pic mode. */
7640 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7645 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7647 /* Make the MEM as close to a constant as possible. */
7648 mem = SET_SRC (pat);
7649 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7650 MEM_READONLY_P (mem) = 1;
7651 MEM_NOTRAP_P (mem) = 1;
7653 return emit_insn (pat);
7656 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7657 created to hold the result of the load. If not NULL, PIC_REG indicates
7658 which register to use as PIC register, otherwise it is decided by register
7659 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7660 location in the instruction stream, irregardless of whether it was loaded
7661 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7662 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7664 Returns the register REG into which the PIC load is performed. */
7667 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7670 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7672 if (GET_CODE (orig) == SYMBOL_REF
7673 || GET_CODE (orig) == LABEL_REF)
7677 gcc_assert (can_create_pseudo_p ());
7678 reg = gen_reg_rtx (Pmode);
7681 /* VxWorks does not impose a fixed gap between segments; the run-time
7682 gap can be different from the object-file gap. We therefore can't
7683 use GOTOFF unless we are absolutely sure that the symbol is in the
7684 same segment as the GOT. Unfortunately, the flexibility of linker
7685 scripts means that we can't be sure of that in general, so assume
7686 that GOTOFF is never valid on VxWorks. */
7687 /* References to weak symbols cannot be resolved locally: they
7688 may be overridden by a non-weak definition at link time. */
7690 if ((GET_CODE (orig) == LABEL_REF
7691 || (GET_CODE (orig) == SYMBOL_REF
7692 && SYMBOL_REF_LOCAL_P (orig)
7693 && (SYMBOL_REF_DECL (orig)
7694 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7695 && (!SYMBOL_REF_FUNCTION_P (orig)
7696 || arm_fdpic_local_funcdesc_p (orig))))
7698 && arm_pic_data_is_text_relative)
7699 insn = arm_pic_static_addr (orig, reg);
7702 /* If this function doesn't have a pic register, create one now. */
7703 require_pic_register (pic_reg, compute_now);
7705 if (pic_reg == NULL_RTX)
7706 pic_reg = cfun->machine->pic_reg;
7708 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7711 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7713 set_unique_reg_note (insn, REG_EQUAL, orig);
7717 else if (GET_CODE (orig) == CONST)
7721 if (GET_CODE (XEXP (orig, 0)) == PLUS
7722 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7725 /* Handle the case where we have: const (UNSPEC_TLS). */
7726 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7727 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7730 /* Handle the case where we have:
7731 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7733 if (GET_CODE (XEXP (orig, 0)) == PLUS
7734 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7735 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7737 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7743 gcc_assert (can_create_pseudo_p ());
7744 reg = gen_reg_rtx (Pmode);
7747 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7749 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7750 pic_reg, compute_now);
7751 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7752 base == reg ? 0 : reg, pic_reg,
7755 if (CONST_INT_P (offset))
7757 /* The base register doesn't really matter, we only want to
7758 test the index for the appropriate mode. */
7759 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7761 gcc_assert (can_create_pseudo_p ());
7762 offset = force_reg (Pmode, offset);
7765 if (CONST_INT_P (offset))
7766 return plus_constant (Pmode, base, INTVAL (offset));
7769 if (GET_MODE_SIZE (mode) > 4
7770 && (GET_MODE_CLASS (mode) == MODE_INT
7771 || TARGET_SOFT_FLOAT))
7773 emit_insn (gen_addsi3 (reg, base, offset));
7777 return gen_rtx_PLUS (Pmode, base, offset);
7784 /* Whether a register is callee saved or not. This is necessary because high
7785 registers are marked as caller saved when optimizing for size on Thumb-1
7786 targets despite being callee saved in order to avoid using them. */
7787 #define callee_saved_reg_p(reg) \
7788 (!call_used_or_fixed_reg_p (reg) \
7789 || (TARGET_THUMB1 && optimize_size \
7790 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7792 /* Return a mask for the call-clobbered low registers that are unused
7793 at the end of the prologue. */
7794 static unsigned long
7795 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7797 unsigned long mask = 0;
7798 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7800 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7801 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7802 mask |= 1 << (reg - FIRST_LO_REGNUM);
7806 /* Similarly for the start of the epilogue. */
7807 static unsigned long
7808 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7810 unsigned long mask = 0;
7811 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7813 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7814 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7815 mask |= 1 << (reg - FIRST_LO_REGNUM);
7819 /* Find a spare register to use during the prolog of a function. */
7822 thumb_find_work_register (unsigned long pushed_regs_mask)
7826 unsigned long unused_regs
7827 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7829 /* Check the argument registers first as these are call-used. The
7830 register allocation order means that sometimes r3 might be used
7831 but earlier argument registers might not, so check them all. */
7832 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7833 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7836 /* Otherwise look for a call-saved register that is going to be pushed. */
7837 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7838 if (pushed_regs_mask & (1 << reg))
7843 /* Thumb-2 can use high regs. */
7844 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7845 if (pushed_regs_mask & (1 << reg))
7848 /* Something went wrong - thumb_compute_save_reg_mask()
7849 should have arranged for a suitable register to be pushed. */
7853 static GTY(()) int pic_labelno;
7855 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7859 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7861 rtx l1, labelno, pic_tmp, pic_rtx;
7863 if (crtl->uses_pic_offset_table == 0
7864 || TARGET_SINGLE_PIC_BASE
7868 gcc_assert (flag_pic);
7870 if (pic_reg == NULL_RTX)
7871 pic_reg = cfun->machine->pic_reg;
7872 if (TARGET_VXWORKS_RTP)
7874 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7875 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7876 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7878 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7880 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7881 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7885 /* We use an UNSPEC rather than a LABEL_REF because this label
7886 never appears in the code stream. */
7888 labelno = GEN_INT (pic_labelno++);
7889 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7890 l1 = gen_rtx_CONST (VOIDmode, l1);
7892 /* On the ARM the PC register contains 'dot + 8' at the time of the
7893 addition, on the Thumb it is 'dot + 4'. */
7894 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7895 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7897 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7901 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7903 else /* TARGET_THUMB1 */
7905 if (arm_pic_register != INVALID_REGNUM
7906 && REGNO (pic_reg) > LAST_LO_REGNUM)
7908 /* We will have pushed the pic register, so we should always be
7909 able to find a work register. */
7910 pic_tmp = gen_rtx_REG (SImode,
7911 thumb_find_work_register (saved_regs));
7912 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7913 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7914 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7916 else if (arm_pic_register != INVALID_REGNUM
7917 && arm_pic_register > LAST_LO_REGNUM
7918 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7920 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7921 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7922 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7925 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7929 /* Need to emit this whether or not we obey regdecls,
7930 since setjmp/longjmp can cause life info to screw up. */
7934 /* Try to determine whether an object, referenced via ORIG, will be
7935 placed in the text or data segment. This is used in FDPIC mode, to
7936 decide which relocations to use when accessing ORIG. *IS_READONLY
7937 is set to true if ORIG is a read-only location, false otherwise.
7938 Return true if we could determine the location of ORIG, false
7939 otherwise. *IS_READONLY is valid only when we return true. */
7941 arm_is_segment_info_known (rtx orig, bool *is_readonly)
7943 *is_readonly = false;
7945 if (GET_CODE (orig) == LABEL_REF)
7947 *is_readonly = true;
7951 if (SYMBOL_REF_P (orig))
7953 if (CONSTANT_POOL_ADDRESS_P (orig))
7955 *is_readonly = true;
7958 if (SYMBOL_REF_LOCAL_P (orig)
7959 && !SYMBOL_REF_EXTERNAL_P (orig)
7960 && SYMBOL_REF_DECL (orig)
7961 && (!DECL_P (SYMBOL_REF_DECL (orig))
7962 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
7964 tree decl = SYMBOL_REF_DECL (orig);
7965 tree init = (TREE_CODE (decl) == VAR_DECL)
7966 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
7969 bool named_section, readonly;
7971 if (init && init != error_mark_node)
7972 reloc = compute_reloc_for_constant (init);
7974 named_section = TREE_CODE (decl) == VAR_DECL
7975 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
7976 readonly = decl_readonly_section (decl, reloc);
7978 /* We don't know where the link script will put a named
7979 section, so return false in such a case. */
7983 *is_readonly = readonly;
7987 /* We don't know. */
7994 /* Generate code to load the address of a static var when flag_pic is set. */
7996 arm_pic_static_addr (rtx orig, rtx reg)
7998 rtx l1, labelno, offset_rtx;
8001 gcc_assert (flag_pic);
8003 bool is_readonly = false;
8004 bool info_known = false;
8007 && SYMBOL_REF_P (orig)
8008 && !SYMBOL_REF_FUNCTION_P (orig))
8009 info_known = arm_is_segment_info_known (orig, &is_readonly);
8012 && SYMBOL_REF_P (orig)
8013 && !SYMBOL_REF_FUNCTION_P (orig)
8016 /* We don't know where orig is stored, so we have be
8017 pessimistic and use a GOT relocation. */
8018 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8020 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8022 else if (TARGET_FDPIC
8023 && SYMBOL_REF_P (orig)
8024 && (SYMBOL_REF_FUNCTION_P (orig)
8027 /* We use the GOTOFF relocation. */
8028 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8030 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8031 emit_insn (gen_movsi (reg, l1));
8032 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8036 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8037 PC-relative access. */
8038 /* We use an UNSPEC rather than a LABEL_REF because this label
8039 never appears in the code stream. */
8040 labelno = GEN_INT (pic_labelno++);
8041 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8042 l1 = gen_rtx_CONST (VOIDmode, l1);
8044 /* On the ARM the PC register contains 'dot + 8' at the time of the
8045 addition, on the Thumb it is 'dot + 4'. */
8046 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8047 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8048 UNSPEC_SYMBOL_OFFSET);
8049 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8051 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8058 /* Return nonzero if X is valid as an ARM state addressing register. */
8060 arm_address_register_rtx_p (rtx x, int strict_p)
8070 return ARM_REGNO_OK_FOR_BASE_P (regno);
8072 return (regno <= LAST_ARM_REGNUM
8073 || regno >= FIRST_PSEUDO_REGISTER
8074 || regno == FRAME_POINTER_REGNUM
8075 || regno == ARG_POINTER_REGNUM);
8078 /* Return TRUE if this rtx is the difference of a symbol and a label,
8079 and will reduce to a PC-relative relocation in the object file.
8080 Expressions like this can be left alone when generating PIC, rather
8081 than forced through the GOT. */
8083 pcrel_constant_p (rtx x)
8085 if (GET_CODE (x) == MINUS)
8086 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8091 /* Return true if X will surely end up in an index register after next
8094 will_be_in_index_register (const_rtx x)
8096 /* arm.md: calculate_pic_address will split this into a register. */
8097 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8100 /* Return nonzero if X is a valid ARM state address operand. */
8102 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8106 enum rtx_code code = GET_CODE (x);
8108 if (arm_address_register_rtx_p (x, strict_p))
8111 use_ldrd = (TARGET_LDRD
8112 && (mode == DImode || mode == DFmode));
8114 if (code == POST_INC || code == PRE_DEC
8115 || ((code == PRE_INC || code == POST_DEC)
8116 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8117 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8119 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8120 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8121 && GET_CODE (XEXP (x, 1)) == PLUS
8122 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8124 rtx addend = XEXP (XEXP (x, 1), 1);
8126 /* Don't allow ldrd post increment by register because it's hard
8127 to fixup invalid register choices. */
8129 && GET_CODE (x) == POST_MODIFY
8133 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8134 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8137 /* After reload constants split into minipools will have addresses
8138 from a LABEL_REF. */
8139 else if (reload_completed
8140 && (code == LABEL_REF
8142 && GET_CODE (XEXP (x, 0)) == PLUS
8143 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8144 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8147 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8150 else if (code == PLUS)
8152 rtx xop0 = XEXP (x, 0);
8153 rtx xop1 = XEXP (x, 1);
8155 return ((arm_address_register_rtx_p (xop0, strict_p)
8156 && ((CONST_INT_P (xop1)
8157 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8158 || (!strict_p && will_be_in_index_register (xop1))))
8159 || (arm_address_register_rtx_p (xop1, strict_p)
8160 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8164 /* Reload currently can't handle MINUS, so disable this for now */
8165 else if (GET_CODE (x) == MINUS)
8167 rtx xop0 = XEXP (x, 0);
8168 rtx xop1 = XEXP (x, 1);
8170 return (arm_address_register_rtx_p (xop0, strict_p)
8171 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8175 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8176 && code == SYMBOL_REF
8177 && CONSTANT_POOL_ADDRESS_P (x)
8179 && symbol_mentioned_p (get_pool_constant (x))
8180 && ! pcrel_constant_p (get_pool_constant (x))))
8186 /* Return true if we can avoid creating a constant pool entry for x. */
8188 can_avoid_literal_pool_for_label_p (rtx x)
8190 /* Normally we can assign constant values to target registers without
8191 the help of constant pool. But there are cases we have to use constant
8193 1) assign a label to register.
8194 2) sign-extend a 8bit value to 32bit and then assign to register.
8196 Constant pool access in format:
8197 (set (reg r0) (mem (symbol_ref (".LC0"))))
8198 will cause the use of literal pool (later in function arm_reorg).
8199 So here we mark such format as an invalid format, then the compiler
8200 will adjust it into:
8201 (set (reg r0) (symbol_ref (".LC0")))
8202 (set (reg r0) (mem (reg r0))).
8203 No extra register is required, and (mem (reg r0)) won't cause the use
8204 of literal pools. */
8205 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8206 && CONSTANT_POOL_ADDRESS_P (x))
8212 /* Return nonzero if X is a valid Thumb-2 address operand. */
8214 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8217 enum rtx_code code = GET_CODE (x);
8219 if (arm_address_register_rtx_p (x, strict_p))
8222 use_ldrd = (TARGET_LDRD
8223 && (mode == DImode || mode == DFmode));
8225 if (code == POST_INC || code == PRE_DEC
8226 || ((code == PRE_INC || code == POST_DEC)
8227 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8228 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8230 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8231 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8232 && GET_CODE (XEXP (x, 1)) == PLUS
8233 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8235 /* Thumb-2 only has autoincrement by constant. */
8236 rtx addend = XEXP (XEXP (x, 1), 1);
8237 HOST_WIDE_INT offset;
8239 if (!CONST_INT_P (addend))
8242 offset = INTVAL(addend);
8243 if (GET_MODE_SIZE (mode) <= 4)
8244 return (offset > -256 && offset < 256);
8246 return (use_ldrd && offset > -1024 && offset < 1024
8247 && (offset & 3) == 0);
8250 /* After reload constants split into minipools will have addresses
8251 from a LABEL_REF. */
8252 else if (reload_completed
8253 && (code == LABEL_REF
8255 && GET_CODE (XEXP (x, 0)) == PLUS
8256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8257 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8260 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8263 else if (code == PLUS)
8265 rtx xop0 = XEXP (x, 0);
8266 rtx xop1 = XEXP (x, 1);
8268 return ((arm_address_register_rtx_p (xop0, strict_p)
8269 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8270 || (!strict_p && will_be_in_index_register (xop1))))
8271 || (arm_address_register_rtx_p (xop1, strict_p)
8272 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8275 else if (can_avoid_literal_pool_for_label_p (x))
8278 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8279 && code == SYMBOL_REF
8280 && CONSTANT_POOL_ADDRESS_P (x)
8282 && symbol_mentioned_p (get_pool_constant (x))
8283 && ! pcrel_constant_p (get_pool_constant (x))))
8289 /* Return nonzero if INDEX is valid for an address index operand in
8292 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8295 HOST_WIDE_INT range;
8296 enum rtx_code code = GET_CODE (index);
8298 /* Standard coprocessor addressing modes. */
8299 if (TARGET_HARD_FLOAT
8300 && (mode == SFmode || mode == DFmode))
8301 return (code == CONST_INT && INTVAL (index) < 1024
8302 && INTVAL (index) > -1024
8303 && (INTVAL (index) & 3) == 0);
8305 /* For quad modes, we restrict the constant offset to be slightly less
8306 than what the instruction format permits. We do this because for
8307 quad mode moves, we will actually decompose them into two separate
8308 double-mode reads or writes. INDEX must therefore be a valid
8309 (double-mode) offset and so should INDEX+8. */
8310 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8311 return (code == CONST_INT
8312 && INTVAL (index) < 1016
8313 && INTVAL (index) > -1024
8314 && (INTVAL (index) & 3) == 0);
8316 /* We have no such constraint on double mode offsets, so we permit the
8317 full range of the instruction format. */
8318 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8319 return (code == CONST_INT
8320 && INTVAL (index) < 1024
8321 && INTVAL (index) > -1024
8322 && (INTVAL (index) & 3) == 0);
8324 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8325 return (code == CONST_INT
8326 && INTVAL (index) < 1024
8327 && INTVAL (index) > -1024
8328 && (INTVAL (index) & 3) == 0);
8330 if (arm_address_register_rtx_p (index, strict_p)
8331 && (GET_MODE_SIZE (mode) <= 4))
8334 if (mode == DImode || mode == DFmode)
8336 if (code == CONST_INT)
8338 HOST_WIDE_INT val = INTVAL (index);
8340 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8341 If vldr is selected it uses arm_coproc_mem_operand. */
8343 return val > -256 && val < 256;
8345 return val > -4096 && val < 4092;
8348 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8351 if (GET_MODE_SIZE (mode) <= 4
8355 || (mode == QImode && outer == SIGN_EXTEND))))
8359 rtx xiop0 = XEXP (index, 0);
8360 rtx xiop1 = XEXP (index, 1);
8362 return ((arm_address_register_rtx_p (xiop0, strict_p)
8363 && power_of_two_operand (xiop1, SImode))
8364 || (arm_address_register_rtx_p (xiop1, strict_p)
8365 && power_of_two_operand (xiop0, SImode)));
8367 else if (code == LSHIFTRT || code == ASHIFTRT
8368 || code == ASHIFT || code == ROTATERT)
8370 rtx op = XEXP (index, 1);
8372 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8375 && INTVAL (op) <= 31);
8379 /* For ARM v4 we may be doing a sign-extend operation during the
8385 || (outer == SIGN_EXTEND && mode == QImode))
8391 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8393 return (code == CONST_INT
8394 && INTVAL (index) < range
8395 && INTVAL (index) > -range);
8398 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8399 index operand. i.e. 1, 2, 4 or 8. */
8401 thumb2_index_mul_operand (rtx op)
8405 if (!CONST_INT_P (op))
8409 return (val == 1 || val == 2 || val == 4 || val == 8);
8412 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8414 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8416 enum rtx_code code = GET_CODE (index);
8418 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8419 /* Standard coprocessor addressing modes. */
8420 if (TARGET_HARD_FLOAT
8421 && (mode == SFmode || mode == DFmode))
8422 return (code == CONST_INT && INTVAL (index) < 1024
8423 /* Thumb-2 allows only > -256 index range for it's core register
8424 load/stores. Since we allow SF/DF in core registers, we have
8425 to use the intersection between -256~4096 (core) and -1024~1024
8427 && INTVAL (index) > -256
8428 && (INTVAL (index) & 3) == 0);
8430 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8432 /* For DImode assume values will usually live in core regs
8433 and only allow LDRD addressing modes. */
8434 if (!TARGET_LDRD || mode != DImode)
8435 return (code == CONST_INT
8436 && INTVAL (index) < 1024
8437 && INTVAL (index) > -1024
8438 && (INTVAL (index) & 3) == 0);
8441 /* For quad modes, we restrict the constant offset to be slightly less
8442 than what the instruction format permits. We do this because for
8443 quad mode moves, we will actually decompose them into two separate
8444 double-mode reads or writes. INDEX must therefore be a valid
8445 (double-mode) offset and so should INDEX+8. */
8446 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8447 return (code == CONST_INT
8448 && INTVAL (index) < 1016
8449 && INTVAL (index) > -1024
8450 && (INTVAL (index) & 3) == 0);
8452 /* We have no such constraint on double mode offsets, so we permit the
8453 full range of the instruction format. */
8454 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8455 return (code == CONST_INT
8456 && INTVAL (index) < 1024
8457 && INTVAL (index) > -1024
8458 && (INTVAL (index) & 3) == 0);
8460 if (arm_address_register_rtx_p (index, strict_p)
8461 && (GET_MODE_SIZE (mode) <= 4))
8464 if (mode == DImode || mode == DFmode)
8466 if (code == CONST_INT)
8468 HOST_WIDE_INT val = INTVAL (index);
8469 /* Thumb-2 ldrd only has reg+const addressing modes.
8470 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8471 If vldr is selected it uses arm_coproc_mem_operand. */
8473 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8475 return IN_RANGE (val, -255, 4095 - 4);
8483 rtx xiop0 = XEXP (index, 0);
8484 rtx xiop1 = XEXP (index, 1);
8486 return ((arm_address_register_rtx_p (xiop0, strict_p)
8487 && thumb2_index_mul_operand (xiop1))
8488 || (arm_address_register_rtx_p (xiop1, strict_p)
8489 && thumb2_index_mul_operand (xiop0)));
8491 else if (code == ASHIFT)
8493 rtx op = XEXP (index, 1);
8495 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8498 && INTVAL (op) <= 3);
8501 return (code == CONST_INT
8502 && INTVAL (index) < 4096
8503 && INTVAL (index) > -256);
8506 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8508 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8518 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8520 return (regno <= LAST_LO_REGNUM
8521 || regno > LAST_VIRTUAL_REGISTER
8522 || regno == FRAME_POINTER_REGNUM
8523 || (GET_MODE_SIZE (mode) >= 4
8524 && (regno == STACK_POINTER_REGNUM
8525 || regno >= FIRST_PSEUDO_REGISTER
8526 || x == hard_frame_pointer_rtx
8527 || x == arg_pointer_rtx)));
8530 /* Return nonzero if x is a legitimate index register. This is the case
8531 for any base register that can access a QImode object. */
8533 thumb1_index_register_rtx_p (rtx x, int strict_p)
8535 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8538 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8540 The AP may be eliminated to either the SP or the FP, so we use the
8541 least common denominator, e.g. SImode, and offsets from 0 to 64.
8543 ??? Verify whether the above is the right approach.
8545 ??? Also, the FP may be eliminated to the SP, so perhaps that
8546 needs special handling also.
8548 ??? Look at how the mips16 port solves this problem. It probably uses
8549 better ways to solve some of these problems.
8551 Although it is not incorrect, we don't accept QImode and HImode
8552 addresses based on the frame pointer or arg pointer until the
8553 reload pass starts. This is so that eliminating such addresses
8554 into stack based ones won't produce impossible code. */
8556 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8558 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8561 /* ??? Not clear if this is right. Experiment. */
8562 if (GET_MODE_SIZE (mode) < 4
8563 && !(reload_in_progress || reload_completed)
8564 && (reg_mentioned_p (frame_pointer_rtx, x)
8565 || reg_mentioned_p (arg_pointer_rtx, x)
8566 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8567 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8568 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8569 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8572 /* Accept any base register. SP only in SImode or larger. */
8573 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8576 /* This is PC relative data before arm_reorg runs. */
8577 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8578 && GET_CODE (x) == SYMBOL_REF
8579 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8582 /* This is PC relative data after arm_reorg runs. */
8583 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8585 && (GET_CODE (x) == LABEL_REF
8586 || (GET_CODE (x) == CONST
8587 && GET_CODE (XEXP (x, 0)) == PLUS
8588 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8589 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8592 /* Post-inc indexing only supported for SImode and larger. */
8593 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8594 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8597 else if (GET_CODE (x) == PLUS)
8599 /* REG+REG address can be any two index registers. */
8600 /* We disallow FRAME+REG addressing since we know that FRAME
8601 will be replaced with STACK, and SP relative addressing only
8602 permits SP+OFFSET. */
8603 if (GET_MODE_SIZE (mode) <= 4
8604 && XEXP (x, 0) != frame_pointer_rtx
8605 && XEXP (x, 1) != frame_pointer_rtx
8606 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8607 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8608 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8611 /* REG+const has 5-7 bit offset for non-SP registers. */
8612 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8613 || XEXP (x, 0) == arg_pointer_rtx)
8614 && CONST_INT_P (XEXP (x, 1))
8615 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8618 /* REG+const has 10-bit offset for SP, but only SImode and
8619 larger is supported. */
8620 /* ??? Should probably check for DI/DFmode overflow here
8621 just like GO_IF_LEGITIMATE_OFFSET does. */
8622 else if (REG_P (XEXP (x, 0))
8623 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8624 && GET_MODE_SIZE (mode) >= 4
8625 && CONST_INT_P (XEXP (x, 1))
8626 && INTVAL (XEXP (x, 1)) >= 0
8627 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8628 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8631 else if (REG_P (XEXP (x, 0))
8632 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8633 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8634 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8635 && REGNO (XEXP (x, 0))
8636 <= LAST_VIRTUAL_POINTER_REGISTER))
8637 && GET_MODE_SIZE (mode) >= 4
8638 && CONST_INT_P (XEXP (x, 1))
8639 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8643 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8644 && GET_MODE_SIZE (mode) == 4
8645 && GET_CODE (x) == SYMBOL_REF
8646 && CONSTANT_POOL_ADDRESS_P (x)
8648 && symbol_mentioned_p (get_pool_constant (x))
8649 && ! pcrel_constant_p (get_pool_constant (x))))
8655 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8656 instruction of mode MODE. */
8658 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8660 switch (GET_MODE_SIZE (mode))
8663 return val >= 0 && val < 32;
8666 return val >= 0 && val < 64 && (val & 1) == 0;
8670 && (val + GET_MODE_SIZE (mode)) <= 128
8676 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8679 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8680 else if (TARGET_THUMB2)
8681 return thumb2_legitimate_address_p (mode, x, strict_p);
8682 else /* if (TARGET_THUMB1) */
8683 return thumb1_legitimate_address_p (mode, x, strict_p);
8686 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8688 Given an rtx X being reloaded into a reg required to be
8689 in class CLASS, return the class of reg to actually use.
8690 In general this is just CLASS, but for the Thumb core registers and
8691 immediate constants we prefer a LO_REGS class or a subset. */
8694 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8700 if (rclass == GENERAL_REGS)
8707 /* Build the SYMBOL_REF for __tls_get_addr. */
8709 static GTY(()) rtx tls_get_addr_libfunc;
8712 get_tls_get_addr (void)
8714 if (!tls_get_addr_libfunc)
8715 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8716 return tls_get_addr_libfunc;
8720 arm_load_tp (rtx target)
8723 target = gen_reg_rtx (SImode);
8727 /* Can return in any reg. */
8728 emit_insn (gen_load_tp_hard (target));
8732 /* Always returned in r0. Immediately copy the result into a pseudo,
8733 otherwise other uses of r0 (e.g. setting up function arguments) may
8734 clobber the value. */
8740 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8741 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
8743 emit_insn (gen_load_tp_soft_fdpic ());
8746 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
8749 emit_insn (gen_load_tp_soft ());
8751 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8752 emit_move_insn (target, tmp);
8758 load_tls_operand (rtx x, rtx reg)
8762 if (reg == NULL_RTX)
8763 reg = gen_reg_rtx (SImode);
8765 tmp = gen_rtx_CONST (SImode, x);
8767 emit_move_insn (reg, tmp);
8773 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8775 rtx label, labelno = NULL_RTX, sum;
8777 gcc_assert (reloc != TLS_DESCSEQ);
8782 sum = gen_rtx_UNSPEC (Pmode,
8783 gen_rtvec (2, x, GEN_INT (reloc)),
8788 labelno = GEN_INT (pic_labelno++);
8789 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8790 label = gen_rtx_CONST (VOIDmode, label);
8792 sum = gen_rtx_UNSPEC (Pmode,
8793 gen_rtvec (4, x, GEN_INT (reloc), label,
8794 GEN_INT (TARGET_ARM ? 8 : 4)),
8797 reg = load_tls_operand (sum, reg);
8800 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8801 else if (TARGET_ARM)
8802 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8804 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8806 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8807 LCT_PURE, /* LCT_CONST? */
8810 rtx_insn *insns = get_insns ();
8817 arm_tls_descseq_addr (rtx x, rtx reg)
8819 rtx labelno = GEN_INT (pic_labelno++);
8820 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8821 rtx sum = gen_rtx_UNSPEC (Pmode,
8822 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8823 gen_rtx_CONST (VOIDmode, label),
8824 GEN_INT (!TARGET_ARM)),
8826 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8828 emit_insn (gen_tlscall (x, labelno));
8830 reg = gen_reg_rtx (SImode);
8832 gcc_assert (REGNO (reg) != R0_REGNUM);
8834 emit_move_insn (reg, reg0);
8841 legitimize_tls_address (rtx x, rtx reg)
8843 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8845 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8849 case TLS_MODEL_GLOBAL_DYNAMIC:
8850 if (TARGET_GNU2_TLS)
8852 gcc_assert (!TARGET_FDPIC);
8854 reg = arm_tls_descseq_addr (x, reg);
8856 tp = arm_load_tp (NULL_RTX);
8858 dest = gen_rtx_PLUS (Pmode, tp, reg);
8862 /* Original scheme */
8864 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
8866 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8867 dest = gen_reg_rtx (Pmode);
8868 emit_libcall_block (insns, dest, ret, x);
8872 case TLS_MODEL_LOCAL_DYNAMIC:
8873 if (TARGET_GNU2_TLS)
8875 gcc_assert (!TARGET_FDPIC);
8877 reg = arm_tls_descseq_addr (x, reg);
8879 tp = arm_load_tp (NULL_RTX);
8881 dest = gen_rtx_PLUS (Pmode, tp, reg);
8886 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
8888 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8890 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8891 share the LDM result with other LD model accesses. */
8892 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8894 dest = gen_reg_rtx (Pmode);
8895 emit_libcall_block (insns, dest, ret, eqv);
8897 /* Load the addend. */
8898 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8899 GEN_INT (TLS_LDO32)),
8901 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8902 dest = gen_rtx_PLUS (Pmode, dest, addend);
8906 case TLS_MODEL_INITIAL_EXEC:
8909 sum = gen_rtx_UNSPEC (Pmode,
8910 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
8912 reg = load_tls_operand (sum, reg);
8913 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
8914 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
8918 labelno = GEN_INT (pic_labelno++);
8919 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8920 label = gen_rtx_CONST (VOIDmode, label);
8921 sum = gen_rtx_UNSPEC (Pmode,
8922 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8923 GEN_INT (TARGET_ARM ? 8 : 4)),
8925 reg = load_tls_operand (sum, reg);
8928 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8929 else if (TARGET_THUMB2)
8930 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8933 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8934 emit_move_insn (reg, gen_const_mem (SImode, reg));
8938 tp = arm_load_tp (NULL_RTX);
8940 return gen_rtx_PLUS (Pmode, tp, reg);
8942 case TLS_MODEL_LOCAL_EXEC:
8943 tp = arm_load_tp (NULL_RTX);
8945 reg = gen_rtx_UNSPEC (Pmode,
8946 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8948 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8950 return gen_rtx_PLUS (Pmode, tp, reg);
8957 /* Try machine-dependent ways of modifying an illegitimate address
8958 to be legitimate. If we find one, return the new, valid address. */
8960 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8962 if (arm_tls_referenced_p (x))
8966 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8968 addend = XEXP (XEXP (x, 0), 1);
8969 x = XEXP (XEXP (x, 0), 0);
8972 if (GET_CODE (x) != SYMBOL_REF)
8975 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8977 x = legitimize_tls_address (x, NULL_RTX);
8981 x = gen_rtx_PLUS (SImode, x, addend);
8989 return thumb_legitimize_address (x, orig_x, mode);
8991 if (GET_CODE (x) == PLUS)
8993 rtx xop0 = XEXP (x, 0);
8994 rtx xop1 = XEXP (x, 1);
8996 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8997 xop0 = force_reg (SImode, xop0);
8999 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9000 && !symbol_mentioned_p (xop1))
9001 xop1 = force_reg (SImode, xop1);
9003 if (ARM_BASE_REGISTER_RTX_P (xop0)
9004 && CONST_INT_P (xop1))
9006 HOST_WIDE_INT n, low_n;
9010 /* VFP addressing modes actually allow greater offsets, but for
9011 now we just stick with the lowest common denominator. */
9012 if (mode == DImode || mode == DFmode)
9024 low_n = ((mode) == TImode ? 0
9025 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9029 base_reg = gen_reg_rtx (SImode);
9030 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9031 emit_move_insn (base_reg, val);
9032 x = plus_constant (Pmode, base_reg, low_n);
9034 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9035 x = gen_rtx_PLUS (SImode, xop0, xop1);
9038 /* XXX We don't allow MINUS any more -- see comment in
9039 arm_legitimate_address_outer_p (). */
9040 else if (GET_CODE (x) == MINUS)
9042 rtx xop0 = XEXP (x, 0);
9043 rtx xop1 = XEXP (x, 1);
9045 if (CONSTANT_P (xop0))
9046 xop0 = force_reg (SImode, xop0);
9048 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9049 xop1 = force_reg (SImode, xop1);
9051 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9052 x = gen_rtx_MINUS (SImode, xop0, xop1);
9055 /* Make sure to take full advantage of the pre-indexed addressing mode
9056 with absolute addresses which often allows for the base register to
9057 be factorized for multiple adjacent memory references, and it might
9058 even allows for the mini pool to be avoided entirely. */
9059 else if (CONST_INT_P (x) && optimize > 0)
9062 HOST_WIDE_INT mask, base, index;
9065 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9066 only use a 8-bit index. So let's use a 12-bit index for
9067 SImode only and hope that arm_gen_constant will enable LDRB
9068 to use more bits. */
9069 bits = (mode == SImode) ? 12 : 8;
9070 mask = (1 << bits) - 1;
9071 base = INTVAL (x) & ~mask;
9072 index = INTVAL (x) & mask;
9073 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9075 /* It'll most probably be more efficient to generate the
9076 base with more bits set and use a negative index instead.
9077 Don't do this for Thumb as negative offsets are much more
9082 base_reg = force_reg (SImode, GEN_INT (base));
9083 x = plus_constant (Pmode, base_reg, index);
9088 /* We need to find and carefully transform any SYMBOL and LABEL
9089 references; so go back to the original address expression. */
9090 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9091 false /*compute_now*/);
9093 if (new_x != orig_x)
9101 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9102 to be legitimate. If we find one, return the new, valid address. */
9104 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9106 if (GET_CODE (x) == PLUS
9107 && CONST_INT_P (XEXP (x, 1))
9108 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9109 || INTVAL (XEXP (x, 1)) < 0))
9111 rtx xop0 = XEXP (x, 0);
9112 rtx xop1 = XEXP (x, 1);
9113 HOST_WIDE_INT offset = INTVAL (xop1);
9115 /* Try and fold the offset into a biasing of the base register and
9116 then offsetting that. Don't do this when optimizing for space
9117 since it can cause too many CSEs. */
9118 if (optimize_size && offset >= 0
9119 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9121 HOST_WIDE_INT delta;
9124 delta = offset - (256 - GET_MODE_SIZE (mode));
9125 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9126 delta = 31 * GET_MODE_SIZE (mode);
9128 delta = offset & (~31 * GET_MODE_SIZE (mode));
9130 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9132 x = plus_constant (Pmode, xop0, delta);
9134 else if (offset < 0 && offset > -256)
9135 /* Small negative offsets are best done with a subtract before the
9136 dereference, forcing these into a register normally takes two
9138 x = force_operand (x, NULL_RTX);
9141 /* For the remaining cases, force the constant into a register. */
9142 xop1 = force_reg (SImode, xop1);
9143 x = gen_rtx_PLUS (SImode, xop0, xop1);
9146 else if (GET_CODE (x) == PLUS
9147 && s_register_operand (XEXP (x, 1), SImode)
9148 && !s_register_operand (XEXP (x, 0), SImode))
9150 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9152 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9157 /* We need to find and carefully transform any SYMBOL and LABEL
9158 references; so go back to the original address expression. */
9159 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9160 false /*compute_now*/);
9162 if (new_x != orig_x)
9169 /* Return TRUE if X contains any TLS symbol references. */
9172 arm_tls_referenced_p (rtx x)
9174 if (! TARGET_HAVE_TLS)
9177 subrtx_iterator::array_type array;
9178 FOR_EACH_SUBRTX (iter, array, x, ALL)
9180 const_rtx x = *iter;
9181 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9183 /* ARM currently does not provide relocations to encode TLS variables
9184 into AArch32 instructions, only data, so there is no way to
9185 currently implement these if a literal pool is disabled. */
9186 if (arm_disable_literal_pool)
9187 sorry ("accessing thread-local storage is not currently supported "
9188 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9193 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9194 TLS offsets, not real symbol references. */
9195 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9196 iter.skip_subrtxes ();
9201 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9203 On the ARM, allow any integer (invalid ones are removed later by insn
9204 patterns), nice doubles and symbol_refs which refer to the function's
9207 When generating pic allow anything. */
9210 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9212 return flag_pic || !label_mentioned_p (x);
9216 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9218 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9219 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9220 for ARMv8-M Baseline or later the result is valid. */
9221 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9224 return (CONST_INT_P (x)
9225 || CONST_DOUBLE_P (x)
9226 || CONSTANT_ADDRESS_P (x)
9227 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9232 arm_legitimate_constant_p (machine_mode mode, rtx x)
9234 return (!arm_cannot_force_const_mem (mode, x)
9236 ? arm_legitimate_constant_p_1 (mode, x)
9237 : thumb_legitimate_constant_p (mode, x)));
9240 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9243 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9246 split_const (x, &base, &offset);
9248 if (SYMBOL_REF_P (base))
9250 /* Function symbols cannot have an offset due to the Thumb bit. */
9251 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9252 && INTVAL (offset) != 0)
9255 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9256 && !offset_within_block_p (base, INTVAL (offset)))
9259 return arm_tls_referenced_p (x);
9262 #define REG_OR_SUBREG_REG(X) \
9264 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9266 #define REG_OR_SUBREG_RTX(X) \
9267 (REG_P (X) ? (X) : SUBREG_REG (X))
9270 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9272 machine_mode mode = GET_MODE (x);
9281 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9288 return COSTS_N_INSNS (1);
9291 if (arm_arch6m && arm_m_profile_small_mul)
9292 return COSTS_N_INSNS (32);
9294 if (CONST_INT_P (XEXP (x, 1)))
9297 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9304 return COSTS_N_INSNS (2) + cycles;
9306 return COSTS_N_INSNS (1) + 16;
9309 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9311 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9312 return (COSTS_N_INSNS (words)
9313 + 4 * ((MEM_P (SET_SRC (x)))
9314 + MEM_P (SET_DEST (x))));
9319 if (UINTVAL (x) < 256
9320 /* 16-bit constant. */
9321 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9323 if (thumb_shiftable_const (INTVAL (x)))
9324 return COSTS_N_INSNS (2);
9325 return COSTS_N_INSNS (3);
9327 else if ((outer == PLUS || outer == COMPARE)
9328 && INTVAL (x) < 256 && INTVAL (x) > -256)
9330 else if ((outer == IOR || outer == XOR || outer == AND)
9331 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9332 return COSTS_N_INSNS (1);
9333 else if (outer == AND)
9336 /* This duplicates the tests in the andsi3 expander. */
9337 for (i = 9; i <= 31; i++)
9338 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9339 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9340 return COSTS_N_INSNS (2);
9342 else if (outer == ASHIFT || outer == ASHIFTRT
9343 || outer == LSHIFTRT)
9345 return COSTS_N_INSNS (2);
9351 return COSTS_N_INSNS (3);
9369 /* XXX another guess. */
9370 /* Memory costs quite a lot for the first word, but subsequent words
9371 load at the equivalent of a single insn each. */
9372 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9373 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9378 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9384 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9385 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9391 return total + COSTS_N_INSNS (1);
9393 /* Assume a two-shift sequence. Increase the cost slightly so
9394 we prefer actual shifts over an extend operation. */
9395 return total + 1 + COSTS_N_INSNS (2);
9402 /* Estimates the size cost of thumb1 instructions.
9403 For now most of the code is copied from thumb1_rtx_costs. We need more
9404 fine grain tuning when we have more related test cases. */
9406 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9408 machine_mode mode = GET_MODE (x);
9417 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9421 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9422 defined by RTL expansion, especially for the expansion of
9424 if ((GET_CODE (XEXP (x, 0)) == MULT
9425 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9426 || (GET_CODE (XEXP (x, 1)) == MULT
9427 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9428 return COSTS_N_INSNS (2);
9433 return COSTS_N_INSNS (1);
9436 if (CONST_INT_P (XEXP (x, 1)))
9438 /* Thumb1 mul instruction can't operate on const. We must Load it
9439 into a register first. */
9440 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9441 /* For the targets which have a very small and high-latency multiply
9442 unit, we prefer to synthesize the mult with up to 5 instructions,
9443 giving a good balance between size and performance. */
9444 if (arm_arch6m && arm_m_profile_small_mul)
9445 return COSTS_N_INSNS (5);
9447 return COSTS_N_INSNS (1) + const_size;
9449 return COSTS_N_INSNS (1);
9452 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9454 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9455 cost = COSTS_N_INSNS (words);
9456 if (satisfies_constraint_J (SET_SRC (x))
9457 || satisfies_constraint_K (SET_SRC (x))
9458 /* Too big an immediate for a 2-byte mov, using MOVT. */
9459 || (CONST_INT_P (SET_SRC (x))
9460 && UINTVAL (SET_SRC (x)) >= 256
9462 && satisfies_constraint_j (SET_SRC (x)))
9463 /* thumb1_movdi_insn. */
9464 || ((words > 1) && MEM_P (SET_SRC (x))))
9465 cost += COSTS_N_INSNS (1);
9471 if (UINTVAL (x) < 256)
9472 return COSTS_N_INSNS (1);
9473 /* movw is 4byte long. */
9474 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9475 return COSTS_N_INSNS (2);
9476 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9477 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9478 return COSTS_N_INSNS (2);
9479 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9480 if (thumb_shiftable_const (INTVAL (x)))
9481 return COSTS_N_INSNS (2);
9482 return COSTS_N_INSNS (3);
9484 else if ((outer == PLUS || outer == COMPARE)
9485 && INTVAL (x) < 256 && INTVAL (x) > -256)
9487 else if ((outer == IOR || outer == XOR || outer == AND)
9488 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9489 return COSTS_N_INSNS (1);
9490 else if (outer == AND)
9493 /* This duplicates the tests in the andsi3 expander. */
9494 for (i = 9; i <= 31; i++)
9495 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9496 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9497 return COSTS_N_INSNS (2);
9499 else if (outer == ASHIFT || outer == ASHIFTRT
9500 || outer == LSHIFTRT)
9502 return COSTS_N_INSNS (2);
9508 return COSTS_N_INSNS (3);
9522 return COSTS_N_INSNS (1);
9525 return (COSTS_N_INSNS (1)
9527 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9528 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9529 ? COSTS_N_INSNS (1) : 0));
9533 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9538 /* XXX still guessing. */
9539 switch (GET_MODE (XEXP (x, 0)))
9542 return (1 + (mode == DImode ? 4 : 0)
9543 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9546 return (4 + (mode == DImode ? 4 : 0)
9547 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9550 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9561 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9562 PLUS, adds the carry flag, then return the other operand. If
9563 neither is a carry, return OP unchanged. */
9565 strip_carry_operation (rtx op)
9567 gcc_assert (GET_CODE (op) == PLUS);
9568 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9569 return XEXP (op, 1);
9570 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9571 return XEXP (op, 0);
9575 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9576 operand, then return the operand that is being shifted. If the shift
9577 is not by a constant, then set SHIFT_REG to point to the operand.
9578 Return NULL if OP is not a shifter operand. */
9580 shifter_op_p (rtx op, rtx *shift_reg)
9582 enum rtx_code code = GET_CODE (op);
9584 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9585 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9586 return XEXP (op, 0);
9587 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9588 return XEXP (op, 0);
9589 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9590 || code == ASHIFTRT)
9592 if (!CONST_INT_P (XEXP (op, 1)))
9593 *shift_reg = XEXP (op, 1);
9594 return XEXP (op, 0);
9601 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9603 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9604 rtx_code code = GET_CODE (x);
9605 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9607 switch (XINT (x, 1))
9609 case UNSPEC_UNALIGNED_LOAD:
9610 /* We can only do unaligned loads into the integer unit, and we can't
9612 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9614 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9615 + extra_cost->ldst.load_unaligned);
9618 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9619 ADDR_SPACE_GENERIC, speed_p);
9623 case UNSPEC_UNALIGNED_STORE:
9624 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9626 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9627 + extra_cost->ldst.store_unaligned);
9629 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9631 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9632 ADDR_SPACE_GENERIC, speed_p);
9643 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9647 *cost = COSTS_N_INSNS (2);
9653 /* Cost of a libcall. We assume one insn per argument, an amount for the
9654 call (one insn for -Os) and then one for processing the result. */
9655 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9657 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9660 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9661 if (shift_op != NULL \
9662 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9667 *cost += extra_cost->alu.arith_shift_reg; \
9668 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9669 ASHIFT, 1, speed_p); \
9672 *cost += extra_cost->alu.arith_shift; \
9674 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9675 ASHIFT, 0, speed_p) \
9676 + rtx_cost (XEXP (x, 1 - IDX), \
9677 GET_MODE (shift_op), \
9684 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9685 considering the costs of the addressing mode and memory access
9688 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9689 int *cost, bool speed_p)
9691 machine_mode mode = GET_MODE (x);
9693 *cost = COSTS_N_INSNS (1);
9696 && GET_CODE (XEXP (x, 0)) == PLUS
9697 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9698 /* This will be split into two instructions. Add the cost of the
9699 additional instruction here. The cost of the memory access is computed
9700 below. See arm.md:calculate_pic_address. */
9701 *cost += COSTS_N_INSNS (1);
9703 /* Calculate cost of the addressing mode. */
9706 arm_addr_mode_op op_type;
9707 switch (GET_CODE (XEXP (x, 0)))
9711 op_type = AMO_DEFAULT;
9714 /* MINUS does not appear in RTL, but the architecture supports it,
9715 so handle this case defensively. */
9718 op_type = AMO_NO_WB;
9730 if (VECTOR_MODE_P (mode))
9731 *cost += current_tune->addr_mode_costs->vector[op_type];
9732 else if (FLOAT_MODE_P (mode))
9733 *cost += current_tune->addr_mode_costs->fp[op_type];
9735 *cost += current_tune->addr_mode_costs->integer[op_type];
9738 /* Calculate cost of memory access. */
9741 if (FLOAT_MODE_P (mode))
9743 if (GET_MODE_SIZE (mode) == 8)
9744 *cost += extra_cost->ldst.loadd;
9746 *cost += extra_cost->ldst.loadf;
9748 else if (VECTOR_MODE_P (mode))
9749 *cost += extra_cost->ldst.loadv;
9753 if (GET_MODE_SIZE (mode) == 8)
9754 *cost += extra_cost->ldst.ldrd;
9756 *cost += extra_cost->ldst.load;
9763 /* RTX costs. Make an estimate of the cost of executing the operation
9764 X, which is contained within an operation with code OUTER_CODE.
9765 SPEED_P indicates whether the cost desired is the performance cost,
9766 or the size cost. The estimate is stored in COST and the return
9767 value is TRUE if the cost calculation is final, or FALSE if the
9768 caller should recurse through the operands of X to add additional
9771 We currently make no attempt to model the size savings of Thumb-2
9772 16-bit instructions. At the normal points in compilation where
9773 this code is called we have no measure of whether the condition
9774 flags are live or not, and thus no realistic way to determine what
9775 the size will eventually be. */
9777 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9778 const struct cpu_cost_table *extra_cost,
9779 int *cost, bool speed_p)
9781 machine_mode mode = GET_MODE (x);
9783 *cost = COSTS_N_INSNS (1);
9788 *cost = thumb1_rtx_costs (x, code, outer_code);
9790 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9798 /* SET RTXs don't have a mode so we get it from the destination. */
9799 mode = GET_MODE (SET_DEST (x));
9801 if (REG_P (SET_SRC (x))
9802 && REG_P (SET_DEST (x)))
9804 /* Assume that most copies can be done with a single insn,
9805 unless we don't have HW FP, in which case everything
9806 larger than word mode will require two insns. */
9807 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9808 && GET_MODE_SIZE (mode) > 4)
9811 /* Conditional register moves can be encoded
9812 in 16 bits in Thumb mode. */
9813 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9819 if (CONST_INT_P (SET_SRC (x)))
9821 /* Handle CONST_INT here, since the value doesn't have a mode
9822 and we would otherwise be unable to work out the true cost. */
9823 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9826 /* Slightly lower the cost of setting a core reg to a constant.
9827 This helps break up chains and allows for better scheduling. */
9828 if (REG_P (SET_DEST (x))
9829 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9832 /* Immediate moves with an immediate in the range [0, 255] can be
9833 encoded in 16 bits in Thumb mode. */
9834 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9835 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9837 goto const_int_cost;
9843 return arm_mem_costs (x, extra_cost, cost, speed_p);
9847 /* Calculations of LDM costs are complex. We assume an initial cost
9848 (ldm_1st) which will load the number of registers mentioned in
9849 ldm_regs_per_insn_1st registers; then each additional
9850 ldm_regs_per_insn_subsequent registers cost one more insn. The
9851 formula for N regs is thus:
9853 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9854 + ldm_regs_per_insn_subsequent - 1)
9855 / ldm_regs_per_insn_subsequent).
9857 Additional costs may also be added for addressing. A similar
9858 formula is used for STM. */
9860 bool is_ldm = load_multiple_operation (x, SImode);
9861 bool is_stm = store_multiple_operation (x, SImode);
9863 if (is_ldm || is_stm)
9867 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9868 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9869 ? extra_cost->ldst.ldm_regs_per_insn_1st
9870 : extra_cost->ldst.stm_regs_per_insn_1st;
9871 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9872 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9873 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9875 *cost += regs_per_insn_1st
9876 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9877 + regs_per_insn_sub - 1)
9878 / regs_per_insn_sub);
9887 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9888 && (mode == SFmode || !TARGET_VFP_SINGLE))
9889 *cost += COSTS_N_INSNS (speed_p
9890 ? extra_cost->fp[mode != SFmode].div : 0);
9891 else if (mode == SImode && TARGET_IDIV)
9892 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9894 *cost = LIBCALL_COST (2);
9896 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9897 possible udiv is prefered. */
9898 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9899 return false; /* All arguments must be in registers. */
9902 /* MOD by a power of 2 can be expanded as:
9904 and r0, r0, #(n - 1)
9905 and r1, r1, #(n - 1)
9906 rsbpl r0, r1, #0. */
9907 if (CONST_INT_P (XEXP (x, 1))
9908 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9911 *cost += COSTS_N_INSNS (3);
9914 *cost += 2 * extra_cost->alu.logical
9915 + extra_cost->alu.arith;
9921 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9922 possible udiv is prefered. */
9923 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9924 return false; /* All arguments must be in registers. */
9927 if (mode == SImode && REG_P (XEXP (x, 1)))
9929 *cost += (COSTS_N_INSNS (1)
9930 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9932 *cost += extra_cost->alu.shift_reg;
9940 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9942 *cost += (COSTS_N_INSNS (2)
9943 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9945 *cost += 2 * extra_cost->alu.shift;
9946 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9947 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9951 else if (mode == SImode)
9953 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9954 /* Slightly disparage register shifts at -Os, but not by much. */
9955 if (!CONST_INT_P (XEXP (x, 1)))
9956 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9957 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9960 else if (GET_MODE_CLASS (mode) == MODE_INT
9961 && GET_MODE_SIZE (mode) < 4)
9965 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9966 /* Slightly disparage register shifts at -Os, but not by
9968 if (!CONST_INT_P (XEXP (x, 1)))
9969 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9970 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9972 else if (code == LSHIFTRT || code == ASHIFTRT)
9974 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9976 /* Can use SBFX/UBFX. */
9978 *cost += extra_cost->alu.bfx;
9979 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9983 *cost += COSTS_N_INSNS (1);
9984 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9987 if (CONST_INT_P (XEXP (x, 1)))
9988 *cost += 2 * extra_cost->alu.shift;
9990 *cost += (extra_cost->alu.shift
9991 + extra_cost->alu.shift_reg);
9994 /* Slightly disparage register shifts. */
9995 *cost += !CONST_INT_P (XEXP (x, 1));
10000 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10001 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10004 if (CONST_INT_P (XEXP (x, 1)))
10005 *cost += (2 * extra_cost->alu.shift
10006 + extra_cost->alu.log_shift);
10008 *cost += (extra_cost->alu.shift
10009 + extra_cost->alu.shift_reg
10010 + extra_cost->alu.log_shift_reg);
10016 *cost = LIBCALL_COST (2);
10022 if (mode == SImode)
10025 *cost += extra_cost->alu.rev;
10032 /* No rev instruction available. Look at arm_legacy_rev
10033 and thumb_legacy_rev for the form of RTL used then. */
10036 *cost += COSTS_N_INSNS (9);
10040 *cost += 6 * extra_cost->alu.shift;
10041 *cost += 3 * extra_cost->alu.logical;
10046 *cost += COSTS_N_INSNS (4);
10050 *cost += 2 * extra_cost->alu.shift;
10051 *cost += extra_cost->alu.arith_shift;
10052 *cost += 2 * extra_cost->alu.logical;
10060 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10061 && (mode == SFmode || !TARGET_VFP_SINGLE))
10063 if (GET_CODE (XEXP (x, 0)) == MULT
10064 || GET_CODE (XEXP (x, 1)) == MULT)
10066 rtx mul_op0, mul_op1, sub_op;
10069 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10071 if (GET_CODE (XEXP (x, 0)) == MULT)
10073 mul_op0 = XEXP (XEXP (x, 0), 0);
10074 mul_op1 = XEXP (XEXP (x, 0), 1);
10075 sub_op = XEXP (x, 1);
10079 mul_op0 = XEXP (XEXP (x, 1), 0);
10080 mul_op1 = XEXP (XEXP (x, 1), 1);
10081 sub_op = XEXP (x, 0);
10084 /* The first operand of the multiply may be optionally
10086 if (GET_CODE (mul_op0) == NEG)
10087 mul_op0 = XEXP (mul_op0, 0);
10089 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10090 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10091 + rtx_cost (sub_op, mode, code, 0, speed_p));
10097 *cost += extra_cost->fp[mode != SFmode].addsub;
10101 if (mode == SImode)
10103 rtx shift_by_reg = NULL;
10106 rtx op0 = XEXP (x, 0);
10107 rtx op1 = XEXP (x, 1);
10109 /* Factor out any borrow operation. There's more than one way
10110 of expressing this; try to recognize them all. */
10111 if (GET_CODE (op0) == MINUS)
10113 if (arm_borrow_operation (op1, SImode))
10115 op1 = XEXP (op0, 1);
10116 op0 = XEXP (op0, 0);
10118 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10119 op0 = XEXP (op0, 0);
10121 else if (GET_CODE (op1) == PLUS
10122 && arm_borrow_operation (XEXP (op1, 0), SImode))
10123 op1 = XEXP (op1, 0);
10124 else if (GET_CODE (op0) == NEG
10125 && arm_borrow_operation (op1, SImode))
10127 /* Negate with carry-in. For Thumb2 this is done with
10128 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10129 RSC instruction that exists in Arm mode. */
10131 *cost += (TARGET_THUMB2
10132 ? extra_cost->alu.arith_shift
10133 : extra_cost->alu.arith);
10134 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10137 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10138 Note we do mean ~borrow here. */
10139 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10141 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10145 shift_op = shifter_op_p (op0, &shift_by_reg);
10146 if (shift_op == NULL)
10148 shift_op = shifter_op_p (op1, &shift_by_reg);
10149 non_shift_op = op0;
10152 non_shift_op = op1;
10154 if (shift_op != NULL)
10156 if (shift_by_reg != NULL)
10159 *cost += extra_cost->alu.arith_shift_reg;
10160 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10163 *cost += extra_cost->alu.arith_shift;
10165 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10166 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10170 if (arm_arch_thumb2
10171 && GET_CODE (XEXP (x, 1)) == MULT)
10175 *cost += extra_cost->mult[0].add;
10176 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10177 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10178 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10182 if (CONST_INT_P (op0))
10184 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10185 INTVAL (op0), NULL_RTX,
10187 *cost = COSTS_N_INSNS (insns);
10189 *cost += insns * extra_cost->alu.arith;
10190 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10194 *cost += extra_cost->alu.arith;
10196 /* Don't recurse as we don't want to cost any borrow that
10198 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10199 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10203 if (GET_MODE_CLASS (mode) == MODE_INT
10204 && GET_MODE_SIZE (mode) < 4)
10206 rtx shift_op, shift_reg;
10209 /* We check both sides of the MINUS for shifter operands since,
10210 unlike PLUS, it's not commutative. */
10212 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10213 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10215 /* Slightly disparage, as we might need to widen the result. */
10218 *cost += extra_cost->alu.arith;
10220 if (CONST_INT_P (XEXP (x, 0)))
10222 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10229 if (mode == DImode)
10231 *cost += COSTS_N_INSNS (1);
10233 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10235 rtx op1 = XEXP (x, 1);
10238 *cost += 2 * extra_cost->alu.arith;
10240 if (GET_CODE (op1) == ZERO_EXTEND)
10241 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10244 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10245 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10249 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10252 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10253 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10255 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10258 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10259 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10262 *cost += (extra_cost->alu.arith
10263 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10264 ? extra_cost->alu.arith
10265 : extra_cost->alu.arith_shift));
10266 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10267 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10268 GET_CODE (XEXP (x, 1)), 0, speed_p));
10273 *cost += 2 * extra_cost->alu.arith;
10279 *cost = LIBCALL_COST (2);
10283 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10284 && (mode == SFmode || !TARGET_VFP_SINGLE))
10286 if (GET_CODE (XEXP (x, 0)) == MULT)
10288 rtx mul_op0, mul_op1, add_op;
10291 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10293 mul_op0 = XEXP (XEXP (x, 0), 0);
10294 mul_op1 = XEXP (XEXP (x, 0), 1);
10295 add_op = XEXP (x, 1);
10297 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10298 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10299 + rtx_cost (add_op, mode, code, 0, speed_p));
10305 *cost += extra_cost->fp[mode != SFmode].addsub;
10308 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10310 *cost = LIBCALL_COST (2);
10314 /* Narrow modes can be synthesized in SImode, but the range
10315 of useful sub-operations is limited. Check for shift operations
10316 on one of the operands. Only left shifts can be used in the
10318 if (GET_MODE_CLASS (mode) == MODE_INT
10319 && GET_MODE_SIZE (mode) < 4)
10321 rtx shift_op, shift_reg;
10324 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10326 if (CONST_INT_P (XEXP (x, 1)))
10328 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10329 INTVAL (XEXP (x, 1)), NULL_RTX,
10331 *cost = COSTS_N_INSNS (insns);
10333 *cost += insns * extra_cost->alu.arith;
10334 /* Slightly penalize a narrow operation as the result may
10336 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10340 /* Slightly penalize a narrow operation as the result may
10344 *cost += extra_cost->alu.arith;
10349 if (mode == SImode)
10351 rtx shift_op, shift_reg;
10353 if (TARGET_INT_SIMD
10354 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10355 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10357 /* UXTA[BH] or SXTA[BH]. */
10359 *cost += extra_cost->alu.extend_arith;
10360 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10362 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10366 rtx op0 = XEXP (x, 0);
10367 rtx op1 = XEXP (x, 1);
10369 /* Handle a side effect of adding in the carry to an addition. */
10370 if (GET_CODE (op0) == PLUS
10371 && arm_carry_operation (op1, mode))
10373 op1 = XEXP (op0, 1);
10374 op0 = XEXP (op0, 0);
10376 else if (GET_CODE (op1) == PLUS
10377 && arm_carry_operation (op0, mode))
10379 op0 = XEXP (op1, 0);
10380 op1 = XEXP (op1, 1);
10382 else if (GET_CODE (op0) == PLUS)
10384 op0 = strip_carry_operation (op0);
10385 if (swap_commutative_operands_p (op0, op1))
10386 std::swap (op0, op1);
10389 if (arm_carry_operation (op0, mode))
10391 /* Adding the carry to a register is a canonicalization of
10392 adding 0 to the register plus the carry. */
10394 *cost += extra_cost->alu.arith;
10395 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10400 shift_op = shifter_op_p (op0, &shift_reg);
10401 if (shift_op != NULL)
10406 *cost += extra_cost->alu.arith_shift_reg;
10407 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10410 *cost += extra_cost->alu.arith_shift;
10412 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10413 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10417 if (GET_CODE (op0) == MULT)
10421 if (TARGET_DSP_MULTIPLY
10422 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10423 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10424 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10425 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10426 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10427 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10428 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10429 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10430 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10431 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10432 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10433 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10436 /* SMLA[BT][BT]. */
10438 *cost += extra_cost->mult[0].extend_add;
10439 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10440 SIGN_EXTEND, 0, speed_p)
10441 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10442 SIGN_EXTEND, 0, speed_p)
10443 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10448 *cost += extra_cost->mult[0].add;
10449 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10450 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10451 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10455 if (CONST_INT_P (op1))
10457 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10458 INTVAL (op1), NULL_RTX,
10460 *cost = COSTS_N_INSNS (insns);
10462 *cost += insns * extra_cost->alu.arith;
10463 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10468 *cost += extra_cost->alu.arith;
10470 /* Don't recurse here because we want to test the operands
10471 without any carry operation. */
10472 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10473 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10477 if (mode == DImode)
10479 if (GET_CODE (XEXP (x, 0)) == MULT
10480 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10481 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10482 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10483 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10486 *cost += extra_cost->mult[1].extend_add;
10487 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10488 ZERO_EXTEND, 0, speed_p)
10489 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10490 ZERO_EXTEND, 0, speed_p)
10491 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10495 *cost += COSTS_N_INSNS (1);
10497 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10498 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10501 *cost += (extra_cost->alu.arith
10502 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10503 ? extra_cost->alu.arith
10504 : extra_cost->alu.arith_shift));
10506 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10508 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10513 *cost += 2 * extra_cost->alu.arith;
10518 *cost = LIBCALL_COST (2);
10521 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10524 *cost += extra_cost->alu.rev;
10528 /* Fall through. */
10529 case AND: case XOR:
10530 if (mode == SImode)
10532 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10533 rtx op0 = XEXP (x, 0);
10534 rtx shift_op, shift_reg;
10538 || (code == IOR && TARGET_THUMB2)))
10539 op0 = XEXP (op0, 0);
10542 shift_op = shifter_op_p (op0, &shift_reg);
10543 if (shift_op != NULL)
10548 *cost += extra_cost->alu.log_shift_reg;
10549 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10552 *cost += extra_cost->alu.log_shift;
10554 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10555 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10559 if (CONST_INT_P (XEXP (x, 1)))
10561 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10562 INTVAL (XEXP (x, 1)), NULL_RTX,
10565 *cost = COSTS_N_INSNS (insns);
10567 *cost += insns * extra_cost->alu.logical;
10568 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10573 *cost += extra_cost->alu.logical;
10574 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10575 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10579 if (mode == DImode)
10581 rtx op0 = XEXP (x, 0);
10582 enum rtx_code subcode = GET_CODE (op0);
10584 *cost += COSTS_N_INSNS (1);
10588 || (code == IOR && TARGET_THUMB2)))
10589 op0 = XEXP (op0, 0);
10591 if (GET_CODE (op0) == ZERO_EXTEND)
10594 *cost += 2 * extra_cost->alu.logical;
10596 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10598 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10601 else if (GET_CODE (op0) == SIGN_EXTEND)
10604 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10606 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10608 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10613 *cost += 2 * extra_cost->alu.logical;
10619 *cost = LIBCALL_COST (2);
10623 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10624 && (mode == SFmode || !TARGET_VFP_SINGLE))
10626 rtx op0 = XEXP (x, 0);
10628 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10629 op0 = XEXP (op0, 0);
10632 *cost += extra_cost->fp[mode != SFmode].mult;
10634 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10635 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10638 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10640 *cost = LIBCALL_COST (2);
10644 if (mode == SImode)
10646 if (TARGET_DSP_MULTIPLY
10647 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10648 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10649 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10650 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10651 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10652 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10653 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10654 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10655 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10656 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10657 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10658 && (INTVAL (XEXP (XEXP (x, 1), 1))
10661 /* SMUL[TB][TB]. */
10663 *cost += extra_cost->mult[0].extend;
10664 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10665 SIGN_EXTEND, 0, speed_p);
10666 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10667 SIGN_EXTEND, 1, speed_p);
10671 *cost += extra_cost->mult[0].simple;
10675 if (mode == DImode)
10677 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10678 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10679 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10680 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10683 *cost += extra_cost->mult[1].extend;
10684 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10685 ZERO_EXTEND, 0, speed_p)
10686 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10687 ZERO_EXTEND, 0, speed_p));
10691 *cost = LIBCALL_COST (2);
10696 *cost = LIBCALL_COST (2);
10700 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10701 && (mode == SFmode || !TARGET_VFP_SINGLE))
10703 if (GET_CODE (XEXP (x, 0)) == MULT)
10706 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10711 *cost += extra_cost->fp[mode != SFmode].neg;
10715 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10717 *cost = LIBCALL_COST (1);
10721 if (mode == SImode)
10723 if (GET_CODE (XEXP (x, 0)) == ABS)
10725 *cost += COSTS_N_INSNS (1);
10726 /* Assume the non-flag-changing variant. */
10728 *cost += (extra_cost->alu.log_shift
10729 + extra_cost->alu.arith_shift);
10730 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10734 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10735 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10737 *cost += COSTS_N_INSNS (1);
10738 /* No extra cost for MOV imm and MVN imm. */
10739 /* If the comparison op is using the flags, there's no further
10740 cost, otherwise we need to add the cost of the comparison. */
10741 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10742 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10743 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10745 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10746 *cost += (COSTS_N_INSNS (1)
10747 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10749 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10752 *cost += extra_cost->alu.arith;
10758 *cost += extra_cost->alu.arith;
10762 if (GET_MODE_CLASS (mode) == MODE_INT
10763 && GET_MODE_SIZE (mode) < 4)
10765 /* Slightly disparage, as we might need an extend operation. */
10768 *cost += extra_cost->alu.arith;
10772 if (mode == DImode)
10774 *cost += COSTS_N_INSNS (1);
10776 *cost += 2 * extra_cost->alu.arith;
10781 *cost = LIBCALL_COST (1);
10785 if (mode == SImode)
10788 rtx shift_reg = NULL;
10790 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10794 if (shift_reg != NULL)
10797 *cost += extra_cost->alu.log_shift_reg;
10798 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10801 *cost += extra_cost->alu.log_shift;
10802 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10807 *cost += extra_cost->alu.logical;
10810 if (mode == DImode)
10812 *cost += COSTS_N_INSNS (1);
10818 *cost += LIBCALL_COST (1);
10823 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10825 *cost += COSTS_N_INSNS (3);
10828 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10829 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10831 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10832 /* Assume that if one arm of the if_then_else is a register,
10833 that it will be tied with the result and eliminate the
10834 conditional insn. */
10835 if (REG_P (XEXP (x, 1)))
10837 else if (REG_P (XEXP (x, 2)))
10843 if (extra_cost->alu.non_exec_costs_exec)
10844 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10846 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10849 *cost += op1cost + op2cost;
10855 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10859 machine_mode op0mode;
10860 /* We'll mostly assume that the cost of a compare is the cost of the
10861 LHS. However, there are some notable exceptions. */
10863 /* Floating point compares are never done as side-effects. */
10864 op0mode = GET_MODE (XEXP (x, 0));
10865 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10866 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10869 *cost += extra_cost->fp[op0mode != SFmode].compare;
10871 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10873 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10879 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10881 *cost = LIBCALL_COST (2);
10885 /* DImode compares normally take two insns. */
10886 if (op0mode == DImode)
10888 *cost += COSTS_N_INSNS (1);
10890 *cost += 2 * extra_cost->alu.arith;
10894 if (op0mode == SImode)
10899 if (XEXP (x, 1) == const0_rtx
10900 && !(REG_P (XEXP (x, 0))
10901 || (GET_CODE (XEXP (x, 0)) == SUBREG
10902 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10904 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10906 /* Multiply operations that set the flags are often
10907 significantly more expensive. */
10909 && GET_CODE (XEXP (x, 0)) == MULT
10910 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10911 *cost += extra_cost->mult[0].flag_setting;
10914 && GET_CODE (XEXP (x, 0)) == PLUS
10915 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10916 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10918 *cost += extra_cost->mult[0].flag_setting;
10923 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10924 if (shift_op != NULL)
10926 if (shift_reg != NULL)
10928 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10931 *cost += extra_cost->alu.arith_shift_reg;
10934 *cost += extra_cost->alu.arith_shift;
10935 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10936 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10941 *cost += extra_cost->alu.arith;
10942 if (CONST_INT_P (XEXP (x, 1))
10943 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10945 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10953 *cost = LIBCALL_COST (2);
10976 if (outer_code == SET)
10978 /* Is it a store-flag operation? */
10979 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10980 && XEXP (x, 1) == const0_rtx)
10982 /* Thumb also needs an IT insn. */
10983 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10986 if (XEXP (x, 1) == const0_rtx)
10991 /* LSR Rd, Rn, #31. */
10993 *cost += extra_cost->alu.shift;
11003 *cost += COSTS_N_INSNS (1);
11007 /* RSBS T1, Rn, Rn, LSR #31
11009 *cost += COSTS_N_INSNS (1);
11011 *cost += extra_cost->alu.arith_shift;
11015 /* RSB Rd, Rn, Rn, ASR #1
11016 LSR Rd, Rd, #31. */
11017 *cost += COSTS_N_INSNS (1);
11019 *cost += (extra_cost->alu.arith_shift
11020 + extra_cost->alu.shift);
11026 *cost += COSTS_N_INSNS (1);
11028 *cost += extra_cost->alu.shift;
11032 /* Remaining cases are either meaningless or would take
11033 three insns anyway. */
11034 *cost = COSTS_N_INSNS (3);
11037 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11042 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11043 if (CONST_INT_P (XEXP (x, 1))
11044 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11046 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11053 /* Not directly inside a set. If it involves the condition code
11054 register it must be the condition for a branch, cond_exec or
11055 I_T_E operation. Since the comparison is performed elsewhere
11056 this is just the control part which has no additional
11058 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11059 && XEXP (x, 1) == const0_rtx)
11067 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11068 && (mode == SFmode || !TARGET_VFP_SINGLE))
11071 *cost += extra_cost->fp[mode != SFmode].neg;
11075 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11077 *cost = LIBCALL_COST (1);
11081 if (mode == SImode)
11084 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11088 *cost = LIBCALL_COST (1);
11092 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11093 && MEM_P (XEXP (x, 0)))
11095 if (mode == DImode)
11096 *cost += COSTS_N_INSNS (1);
11101 if (GET_MODE (XEXP (x, 0)) == SImode)
11102 *cost += extra_cost->ldst.load;
11104 *cost += extra_cost->ldst.load_sign_extend;
11106 if (mode == DImode)
11107 *cost += extra_cost->alu.shift;
11112 /* Widening from less than 32-bits requires an extend operation. */
11113 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11115 /* We have SXTB/SXTH. */
11116 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11118 *cost += extra_cost->alu.extend;
11120 else if (GET_MODE (XEXP (x, 0)) != SImode)
11122 /* Needs two shifts. */
11123 *cost += COSTS_N_INSNS (1);
11124 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11126 *cost += 2 * extra_cost->alu.shift;
11129 /* Widening beyond 32-bits requires one more insn. */
11130 if (mode == DImode)
11132 *cost += COSTS_N_INSNS (1);
11134 *cost += extra_cost->alu.shift;
11141 || GET_MODE (XEXP (x, 0)) == SImode
11142 || GET_MODE (XEXP (x, 0)) == QImode)
11143 && MEM_P (XEXP (x, 0)))
11145 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11147 if (mode == DImode)
11148 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11153 /* Widening from less than 32-bits requires an extend operation. */
11154 if (GET_MODE (XEXP (x, 0)) == QImode)
11156 /* UXTB can be a shorter instruction in Thumb2, but it might
11157 be slower than the AND Rd, Rn, #255 alternative. When
11158 optimizing for speed it should never be slower to use
11159 AND, and we don't really model 16-bit vs 32-bit insns
11162 *cost += extra_cost->alu.logical;
11164 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11166 /* We have UXTB/UXTH. */
11167 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11169 *cost += extra_cost->alu.extend;
11171 else if (GET_MODE (XEXP (x, 0)) != SImode)
11173 /* Needs two shifts. It's marginally preferable to use
11174 shifts rather than two BIC instructions as the second
11175 shift may merge with a subsequent insn as a shifter
11177 *cost = COSTS_N_INSNS (2);
11178 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11180 *cost += 2 * extra_cost->alu.shift;
11183 /* Widening beyond 32-bits requires one more insn. */
11184 if (mode == DImode)
11186 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11193 /* CONST_INT has no mode, so we cannot tell for sure how many
11194 insns are really going to be needed. The best we can do is
11195 look at the value passed. If it fits in SImode, then assume
11196 that's the mode it will be used for. Otherwise assume it
11197 will be used in DImode. */
11198 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11203 /* Avoid blowing up in arm_gen_constant (). */
11204 if (!(outer_code == PLUS
11205 || outer_code == AND
11206 || outer_code == IOR
11207 || outer_code == XOR
11208 || outer_code == MINUS))
11212 if (mode == SImode)
11214 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11215 INTVAL (x), NULL, NULL,
11221 *cost += COSTS_N_INSNS (arm_gen_constant
11222 (outer_code, SImode, NULL,
11223 trunc_int_for_mode (INTVAL (x), SImode),
11225 + arm_gen_constant (outer_code, SImode, NULL,
11226 INTVAL (x) >> 32, NULL,
11238 if (arm_arch_thumb2 && !flag_pic)
11239 *cost += COSTS_N_INSNS (1);
11241 *cost += extra_cost->ldst.load;
11244 *cost += COSTS_N_INSNS (1);
11248 *cost += COSTS_N_INSNS (1);
11250 *cost += extra_cost->alu.arith;
11256 *cost = COSTS_N_INSNS (4);
11261 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11262 && (mode == SFmode || !TARGET_VFP_SINGLE))
11264 if (vfp3_const_double_rtx (x))
11267 *cost += extra_cost->fp[mode == DFmode].fpconst;
11273 if (mode == DFmode)
11274 *cost += extra_cost->ldst.loadd;
11276 *cost += extra_cost->ldst.loadf;
11279 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11283 *cost = COSTS_N_INSNS (4);
11289 && TARGET_HARD_FLOAT
11290 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11291 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11292 *cost = COSTS_N_INSNS (1);
11294 *cost = COSTS_N_INSNS (4);
11299 /* When optimizing for size, we prefer constant pool entries to
11300 MOVW/MOVT pairs, so bump the cost of these slightly. */
11307 *cost += extra_cost->alu.clz;
11311 if (XEXP (x, 1) == const0_rtx)
11314 *cost += extra_cost->alu.log_shift;
11315 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11318 /* Fall through. */
11322 *cost += COSTS_N_INSNS (1);
11326 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11327 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11328 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11330 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11331 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11332 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11333 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11337 *cost += extra_cost->mult[1].extend;
11338 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11339 ZERO_EXTEND, 0, speed_p)
11340 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11341 ZERO_EXTEND, 0, speed_p));
11344 *cost = LIBCALL_COST (1);
11347 case UNSPEC_VOLATILE:
11349 return arm_unspec_cost (x, outer_code, speed_p, cost);
11352 /* Reading the PC is like reading any other register. Writing it
11353 is more expensive, but we take that into account elsewhere. */
11358 /* TODO: Simple zero_extract of bottom bits using AND. */
11359 /* Fall through. */
11363 && CONST_INT_P (XEXP (x, 1))
11364 && CONST_INT_P (XEXP (x, 2)))
11367 *cost += extra_cost->alu.bfx;
11368 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11371 /* Without UBFX/SBFX, need to resort to shift operations. */
11372 *cost += COSTS_N_INSNS (1);
11374 *cost += 2 * extra_cost->alu.shift;
11375 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11379 if (TARGET_HARD_FLOAT)
11382 *cost += extra_cost->fp[mode == DFmode].widen;
11384 && GET_MODE (XEXP (x, 0)) == HFmode)
11386 /* Pre v8, widening HF->DF is a two-step process, first
11387 widening to SFmode. */
11388 *cost += COSTS_N_INSNS (1);
11390 *cost += extra_cost->fp[0].widen;
11392 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11396 *cost = LIBCALL_COST (1);
11399 case FLOAT_TRUNCATE:
11400 if (TARGET_HARD_FLOAT)
11403 *cost += extra_cost->fp[mode == DFmode].narrow;
11404 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11406 /* Vector modes? */
11408 *cost = LIBCALL_COST (1);
11412 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11414 rtx op0 = XEXP (x, 0);
11415 rtx op1 = XEXP (x, 1);
11416 rtx op2 = XEXP (x, 2);
11419 /* vfms or vfnma. */
11420 if (GET_CODE (op0) == NEG)
11421 op0 = XEXP (op0, 0);
11423 /* vfnms or vfnma. */
11424 if (GET_CODE (op2) == NEG)
11425 op2 = XEXP (op2, 0);
11427 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11428 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11429 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11432 *cost += extra_cost->fp[mode ==DFmode].fma;
11437 *cost = LIBCALL_COST (3);
11442 if (TARGET_HARD_FLOAT)
11444 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11445 a vcvt fixed-point conversion. */
11446 if (code == FIX && mode == SImode
11447 && GET_CODE (XEXP (x, 0)) == FIX
11448 && GET_MODE (XEXP (x, 0)) == SFmode
11449 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11450 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11454 *cost += extra_cost->fp[0].toint;
11456 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11461 if (GET_MODE_CLASS (mode) == MODE_INT)
11463 mode = GET_MODE (XEXP (x, 0));
11465 *cost += extra_cost->fp[mode == DFmode].toint;
11466 /* Strip of the 'cost' of rounding towards zero. */
11467 if (GET_CODE (XEXP (x, 0)) == FIX)
11468 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11471 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11472 /* ??? Increase the cost to deal with transferring from
11473 FP -> CORE registers? */
11476 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11480 *cost += extra_cost->fp[mode == DFmode].roundint;
11483 /* Vector costs? */
11485 *cost = LIBCALL_COST (1);
11489 case UNSIGNED_FLOAT:
11490 if (TARGET_HARD_FLOAT)
11492 /* ??? Increase the cost to deal with transferring from CORE
11493 -> FP registers? */
11495 *cost += extra_cost->fp[mode == DFmode].fromint;
11498 *cost = LIBCALL_COST (1);
11506 /* Just a guess. Guess number of instructions in the asm
11507 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11508 though (see PR60663). */
11509 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11510 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11512 *cost = COSTS_N_INSNS (asm_length + num_operands);
11516 if (mode != VOIDmode)
11517 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11519 *cost = COSTS_N_INSNS (4); /* Who knows? */
11524 #undef HANDLE_NARROW_SHIFT_ARITH
11526 /* RTX costs entry point. */
11529 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11530 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11533 int code = GET_CODE (x);
11534 gcc_assert (current_tune->insn_extra_cost);
11536 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11537 (enum rtx_code) outer_code,
11538 current_tune->insn_extra_cost,
11541 if (dump_file && arm_verbose_cost)
11543 print_rtl_single (dump_file, x);
11544 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11545 *total, result ? "final" : "partial");
11551 arm_insn_cost (rtx_insn *insn, bool speed)
11555 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11556 will likely disappear during register allocation. */
11557 if (!reload_completed
11558 && GET_CODE (PATTERN (insn)) == SET
11559 && REG_P (SET_DEST (PATTERN (insn)))
11560 && REG_P (SET_SRC (PATTERN (insn))))
11562 cost = pattern_cost (PATTERN (insn), speed);
11563 /* If the cost is zero, then it's likely a complex insn. We don't want the
11564 cost of these to be less than something we know about. */
11565 return cost ? cost : COSTS_N_INSNS (2);
11568 /* All address computations that can be done are free, but rtx cost returns
11569 the same for practically all of them. So we weight the different types
11570 of address here in the order (most pref first):
11571 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11573 arm_arm_address_cost (rtx x)
11575 enum rtx_code c = GET_CODE (x);
11577 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11579 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11584 if (CONST_INT_P (XEXP (x, 1)))
11587 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11597 arm_thumb_address_cost (rtx x)
11599 enum rtx_code c = GET_CODE (x);
11604 && REG_P (XEXP (x, 0))
11605 && CONST_INT_P (XEXP (x, 1)))
11612 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11613 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11615 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11618 /* Adjust cost hook for XScale. */
11620 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11623 /* Some true dependencies can have a higher cost depending
11624 on precisely how certain input operands are used. */
11626 && recog_memoized (insn) >= 0
11627 && recog_memoized (dep) >= 0)
11629 int shift_opnum = get_attr_shift (insn);
11630 enum attr_type attr_type = get_attr_type (dep);
11632 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11633 operand for INSN. If we have a shifted input operand and the
11634 instruction we depend on is another ALU instruction, then we may
11635 have to account for an additional stall. */
11636 if (shift_opnum != 0
11637 && (attr_type == TYPE_ALU_SHIFT_IMM
11638 || attr_type == TYPE_ALUS_SHIFT_IMM
11639 || attr_type == TYPE_LOGIC_SHIFT_IMM
11640 || attr_type == TYPE_LOGICS_SHIFT_IMM
11641 || attr_type == TYPE_ALU_SHIFT_REG
11642 || attr_type == TYPE_ALUS_SHIFT_REG
11643 || attr_type == TYPE_LOGIC_SHIFT_REG
11644 || attr_type == TYPE_LOGICS_SHIFT_REG
11645 || attr_type == TYPE_MOV_SHIFT
11646 || attr_type == TYPE_MVN_SHIFT
11647 || attr_type == TYPE_MOV_SHIFT_REG
11648 || attr_type == TYPE_MVN_SHIFT_REG))
11650 rtx shifted_operand;
11653 /* Get the shifted operand. */
11654 extract_insn (insn);
11655 shifted_operand = recog_data.operand[shift_opnum];
11657 /* Iterate over all the operands in DEP. If we write an operand
11658 that overlaps with SHIFTED_OPERAND, then we have increase the
11659 cost of this dependency. */
11660 extract_insn (dep);
11661 preprocess_constraints (dep);
11662 for (opno = 0; opno < recog_data.n_operands; opno++)
11664 /* We can ignore strict inputs. */
11665 if (recog_data.operand_type[opno] == OP_IN)
11668 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11680 /* Adjust cost hook for Cortex A9. */
11682 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11692 case REG_DEP_OUTPUT:
11693 if (recog_memoized (insn) >= 0
11694 && recog_memoized (dep) >= 0)
11696 if (GET_CODE (PATTERN (insn)) == SET)
11699 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11701 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11703 enum attr_type attr_type_insn = get_attr_type (insn);
11704 enum attr_type attr_type_dep = get_attr_type (dep);
11706 /* By default all dependencies of the form
11709 have an extra latency of 1 cycle because
11710 of the input and output dependency in this
11711 case. However this gets modeled as an true
11712 dependency and hence all these checks. */
11713 if (REG_P (SET_DEST (PATTERN (insn)))
11714 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11716 /* FMACS is a special case where the dependent
11717 instruction can be issued 3 cycles before
11718 the normal latency in case of an output
11720 if ((attr_type_insn == TYPE_FMACS
11721 || attr_type_insn == TYPE_FMACD)
11722 && (attr_type_dep == TYPE_FMACS
11723 || attr_type_dep == TYPE_FMACD))
11725 if (dep_type == REG_DEP_OUTPUT)
11726 *cost = insn_default_latency (dep) - 3;
11728 *cost = insn_default_latency (dep);
11733 if (dep_type == REG_DEP_OUTPUT)
11734 *cost = insn_default_latency (dep) + 1;
11736 *cost = insn_default_latency (dep);
11746 gcc_unreachable ();
11752 /* Adjust cost hook for FA726TE. */
11754 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11757 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11758 have penalty of 3. */
11759 if (dep_type == REG_DEP_TRUE
11760 && recog_memoized (insn) >= 0
11761 && recog_memoized (dep) >= 0
11762 && get_attr_conds (dep) == CONDS_SET)
11764 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11765 if (get_attr_conds (insn) == CONDS_USE
11766 && get_attr_type (insn) != TYPE_BRANCH)
11772 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11773 || get_attr_conds (insn) == CONDS_USE)
11783 /* Implement TARGET_REGISTER_MOVE_COST.
11785 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11786 it is typically more expensive than a single memory access. We set
11787 the cost to less than two memory accesses so that floating
11788 point to integer conversion does not go through memory. */
11791 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11792 reg_class_t from, reg_class_t to)
11796 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11797 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11799 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11800 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11802 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11809 if (from == HI_REGS || to == HI_REGS)
11816 /* Implement TARGET_MEMORY_MOVE_COST. */
11819 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11820 bool in ATTRIBUTE_UNUSED)
11826 if (GET_MODE_SIZE (mode) < 4)
11829 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11833 /* Vectorizer cost model implementation. */
11835 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11837 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11839 int misalign ATTRIBUTE_UNUSED)
11843 switch (type_of_cost)
11846 return current_tune->vec_costs->scalar_stmt_cost;
11849 return current_tune->vec_costs->scalar_load_cost;
11852 return current_tune->vec_costs->scalar_store_cost;
11855 return current_tune->vec_costs->vec_stmt_cost;
11858 return current_tune->vec_costs->vec_align_load_cost;
11861 return current_tune->vec_costs->vec_store_cost;
11863 case vec_to_scalar:
11864 return current_tune->vec_costs->vec_to_scalar_cost;
11866 case scalar_to_vec:
11867 return current_tune->vec_costs->scalar_to_vec_cost;
11869 case unaligned_load:
11870 case vector_gather_load:
11871 return current_tune->vec_costs->vec_unalign_load_cost;
11873 case unaligned_store:
11874 case vector_scatter_store:
11875 return current_tune->vec_costs->vec_unalign_store_cost;
11877 case cond_branch_taken:
11878 return current_tune->vec_costs->cond_taken_branch_cost;
11880 case cond_branch_not_taken:
11881 return current_tune->vec_costs->cond_not_taken_branch_cost;
11884 case vec_promote_demote:
11885 return current_tune->vec_costs->vec_stmt_cost;
11887 case vec_construct:
11888 elements = TYPE_VECTOR_SUBPARTS (vectype);
11889 return elements / 2 + 1;
11892 gcc_unreachable ();
11896 /* Implement targetm.vectorize.add_stmt_cost. */
11899 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11900 struct _stmt_vec_info *stmt_info, int misalign,
11901 enum vect_cost_model_location where)
11903 unsigned *cost = (unsigned *) data;
11904 unsigned retval = 0;
11906 if (flag_vect_cost_model)
11908 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11909 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11911 /* Statements in an inner loop relative to the loop being
11912 vectorized are weighted more heavily. The value here is
11913 arbitrary and could potentially be improved with analysis. */
11914 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11915 count *= 50; /* FIXME. */
11917 retval = (unsigned) (count * stmt_cost);
11918 cost[where] += retval;
11924 /* Return true if and only if this insn can dual-issue only as older. */
11926 cortexa7_older_only (rtx_insn *insn)
11928 if (recog_memoized (insn) < 0)
11931 switch (get_attr_type (insn))
11933 case TYPE_ALU_DSP_REG:
11934 case TYPE_ALU_SREG:
11935 case TYPE_ALUS_SREG:
11936 case TYPE_LOGIC_REG:
11937 case TYPE_LOGICS_REG:
11939 case TYPE_ADCS_REG:
11944 case TYPE_SHIFT_IMM:
11945 case TYPE_SHIFT_REG:
11946 case TYPE_LOAD_BYTE:
11949 case TYPE_FFARITHS:
11951 case TYPE_FFARITHD:
11969 case TYPE_F_STORES:
11976 /* Return true if and only if this insn can dual-issue as younger. */
11978 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11980 if (recog_memoized (insn) < 0)
11983 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11987 switch (get_attr_type (insn))
11990 case TYPE_ALUS_IMM:
11991 case TYPE_LOGIC_IMM:
11992 case TYPE_LOGICS_IMM:
11997 case TYPE_MOV_SHIFT:
11998 case TYPE_MOV_SHIFT_REG:
12008 /* Look for an instruction that can dual issue only as an older
12009 instruction, and move it in front of any instructions that can
12010 dual-issue as younger, while preserving the relative order of all
12011 other instructions in the ready list. This is a hueuristic to help
12012 dual-issue in later cycles, by postponing issue of more flexible
12013 instructions. This heuristic may affect dual issue opportunities
12014 in the current cycle. */
12016 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12017 int *n_readyp, int clock)
12020 int first_older_only = -1, first_younger = -1;
12024 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12028 /* Traverse the ready list from the head (the instruction to issue
12029 first), and looking for the first instruction that can issue as
12030 younger and the first instruction that can dual-issue only as
12032 for (i = *n_readyp - 1; i >= 0; i--)
12034 rtx_insn *insn = ready[i];
12035 if (cortexa7_older_only (insn))
12037 first_older_only = i;
12039 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12042 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12046 /* Nothing to reorder because either no younger insn found or insn
12047 that can dual-issue only as older appears before any insn that
12048 can dual-issue as younger. */
12049 if (first_younger == -1)
12052 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12056 /* Nothing to reorder because no older-only insn in the ready list. */
12057 if (first_older_only == -1)
12060 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12064 /* Move first_older_only insn before first_younger. */
12066 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12067 INSN_UID(ready [first_older_only]),
12068 INSN_UID(ready [first_younger]));
12069 rtx_insn *first_older_only_insn = ready [first_older_only];
12070 for (i = first_older_only; i < first_younger; i++)
12072 ready[i] = ready[i+1];
12075 ready[i] = first_older_only_insn;
12079 /* Implement TARGET_SCHED_REORDER. */
12081 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12086 case TARGET_CPU_cortexa7:
12087 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12090 /* Do nothing for other cores. */
12094 return arm_issue_rate ();
12097 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12098 It corrects the value of COST based on the relationship between
12099 INSN and DEP through the dependence LINK. It returns the new
12100 value. There is a per-core adjust_cost hook to adjust scheduler costs
12101 and the per-core hook can choose to completely override the generic
12102 adjust_cost function. Only put bits of code into arm_adjust_cost that
12103 are common across all cores. */
12105 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12110 /* When generating Thumb-1 code, we want to place flag-setting operations
12111 close to a conditional branch which depends on them, so that we can
12112 omit the comparison. */
12115 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12116 && recog_memoized (dep) >= 0
12117 && get_attr_conds (dep) == CONDS_SET)
12120 if (current_tune->sched_adjust_cost != NULL)
12122 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12126 /* XXX Is this strictly true? */
12127 if (dep_type == REG_DEP_ANTI
12128 || dep_type == REG_DEP_OUTPUT)
12131 /* Call insns don't incur a stall, even if they follow a load. */
12136 if ((i_pat = single_set (insn)) != NULL
12137 && MEM_P (SET_SRC (i_pat))
12138 && (d_pat = single_set (dep)) != NULL
12139 && MEM_P (SET_DEST (d_pat)))
12141 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12142 /* This is a load after a store, there is no conflict if the load reads
12143 from a cached area. Assume that loads from the stack, and from the
12144 constant pool are cached, and that others will miss. This is a
12147 if ((GET_CODE (src_mem) == SYMBOL_REF
12148 && CONSTANT_POOL_ADDRESS_P (src_mem))
12149 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12150 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12151 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12159 arm_max_conditional_execute (void)
12161 return max_insns_skipped;
12165 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12168 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12170 return (optimize > 0) ? 2 : 0;
12174 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12176 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12179 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12180 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12181 sequences of non-executed instructions in IT blocks probably take the same
12182 amount of time as executed instructions (and the IT instruction itself takes
12183 space in icache). This function was experimentally determined to give good
12184 results on a popular embedded benchmark. */
12187 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12189 return (TARGET_32BIT && speed_p) ? 1
12190 : arm_default_branch_cost (speed_p, predictable_p);
12194 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12196 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12199 static bool fp_consts_inited = false;
12201 static REAL_VALUE_TYPE value_fp0;
12204 init_fp_table (void)
12208 r = REAL_VALUE_ATOF ("0", DFmode);
12210 fp_consts_inited = true;
12213 /* Return TRUE if rtx X is a valid immediate FP constant. */
12215 arm_const_double_rtx (rtx x)
12217 const REAL_VALUE_TYPE *r;
12219 if (!fp_consts_inited)
12222 r = CONST_DOUBLE_REAL_VALUE (x);
12223 if (REAL_VALUE_MINUS_ZERO (*r))
12226 if (real_equal (r, &value_fp0))
12232 /* VFPv3 has a fairly wide range of representable immediates, formed from
12233 "quarter-precision" floating-point values. These can be evaluated using this
12234 formula (with ^ for exponentiation):
12238 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12239 16 <= n <= 31 and 0 <= r <= 7.
12241 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12243 - A (most-significant) is the sign bit.
12244 - BCD are the exponent (encoded as r XOR 3).
12245 - EFGH are the mantissa (encoded as n - 16).
12248 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12249 fconst[sd] instruction, or -1 if X isn't suitable. */
12251 vfp3_const_double_index (rtx x)
12253 REAL_VALUE_TYPE r, m;
12254 int sign, exponent;
12255 unsigned HOST_WIDE_INT mantissa, mant_hi;
12256 unsigned HOST_WIDE_INT mask;
12257 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12260 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12263 r = *CONST_DOUBLE_REAL_VALUE (x);
12265 /* We can't represent these things, so detect them first. */
12266 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12269 /* Extract sign, exponent and mantissa. */
12270 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12271 r = real_value_abs (&r);
12272 exponent = REAL_EXP (&r);
12273 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12274 highest (sign) bit, with a fixed binary point at bit point_pos.
12275 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12276 bits for the mantissa, this may fail (low bits would be lost). */
12277 real_ldexp (&m, &r, point_pos - exponent);
12278 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12279 mantissa = w.elt (0);
12280 mant_hi = w.elt (1);
12282 /* If there are bits set in the low part of the mantissa, we can't
12283 represent this value. */
12287 /* Now make it so that mantissa contains the most-significant bits, and move
12288 the point_pos to indicate that the least-significant bits have been
12290 point_pos -= HOST_BITS_PER_WIDE_INT;
12291 mantissa = mant_hi;
12293 /* We can permit four significant bits of mantissa only, plus a high bit
12294 which is always 1. */
12295 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12296 if ((mantissa & mask) != 0)
12299 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12300 mantissa >>= point_pos - 5;
12302 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12303 floating-point immediate zero with Neon using an integer-zero load, but
12304 that case is handled elsewhere.) */
12308 gcc_assert (mantissa >= 16 && mantissa <= 31);
12310 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12311 normalized significands are in the range [1, 2). (Our mantissa is shifted
12312 left 4 places at this point relative to normalized IEEE754 values). GCC
12313 internally uses [0.5, 1) (see real.c), so the exponent returned from
12314 REAL_EXP must be altered. */
12315 exponent = 5 - exponent;
12317 if (exponent < 0 || exponent > 7)
12320 /* Sign, mantissa and exponent are now in the correct form to plug into the
12321 formula described in the comment above. */
12322 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12325 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12327 vfp3_const_double_rtx (rtx x)
12332 return vfp3_const_double_index (x) != -1;
12335 /* Recognize immediates which can be used in various Neon instructions. Legal
12336 immediates are described by the following table (for VMVN variants, the
12337 bitwise inverse of the constant shown is recognized. In either case, VMOV
12338 is output and the correct instruction to use for a given constant is chosen
12339 by the assembler). The constant shown is replicated across all elements of
12340 the destination vector.
12342 insn elems variant constant (binary)
12343 ---- ----- ------- -----------------
12344 vmov i32 0 00000000 00000000 00000000 abcdefgh
12345 vmov i32 1 00000000 00000000 abcdefgh 00000000
12346 vmov i32 2 00000000 abcdefgh 00000000 00000000
12347 vmov i32 3 abcdefgh 00000000 00000000 00000000
12348 vmov i16 4 00000000 abcdefgh
12349 vmov i16 5 abcdefgh 00000000
12350 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12351 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12352 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12353 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12354 vmvn i16 10 00000000 abcdefgh
12355 vmvn i16 11 abcdefgh 00000000
12356 vmov i32 12 00000000 00000000 abcdefgh 11111111
12357 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12358 vmov i32 14 00000000 abcdefgh 11111111 11111111
12359 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12360 vmov i8 16 abcdefgh
12361 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12362 eeeeeeee ffffffff gggggggg hhhhhhhh
12363 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12364 vmov f32 19 00000000 00000000 00000000 00000000
12366 For case 18, B = !b. Representable values are exactly those accepted by
12367 vfp3_const_double_index, but are output as floating-point numbers rather
12370 For case 19, we will change it to vmov.i32 when assembling.
12372 Variants 0-5 (inclusive) may also be used as immediates for the second
12373 operand of VORR/VBIC instructions.
12375 The INVERSE argument causes the bitwise inverse of the given operand to be
12376 recognized instead (used for recognizing legal immediates for the VAND/VORN
12377 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12378 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12379 output, rather than the real insns vbic/vorr).
12381 INVERSE makes no difference to the recognition of float vectors.
12383 The return value is the variant of immediate as shown in the above table, or
12384 -1 if the given value doesn't match any of the listed patterns.
12387 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12388 rtx *modconst, int *elementwidth)
12390 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12392 for (i = 0; i < idx; i += (STRIDE)) \
12397 immtype = (CLASS); \
12398 elsize = (ELSIZE); \
12402 unsigned int i, elsize = 0, idx = 0, n_elts;
12403 unsigned int innersize;
12404 unsigned char bytes[16] = {};
12405 int immtype = -1, matches;
12406 unsigned int invmask = inverse ? 0xff : 0;
12407 bool vector = GET_CODE (op) == CONST_VECTOR;
12410 n_elts = CONST_VECTOR_NUNITS (op);
12414 gcc_assert (mode != VOIDmode);
12417 innersize = GET_MODE_UNIT_SIZE (mode);
12419 /* Vectors of float constants. */
12420 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12422 rtx el0 = CONST_VECTOR_ELT (op, 0);
12424 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12427 /* FP16 vectors cannot be represented. */
12428 if (GET_MODE_INNER (mode) == HFmode)
12431 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12432 are distinct in this context. */
12433 if (!const_vec_duplicate_p (op))
12437 *modconst = CONST_VECTOR_ELT (op, 0);
12442 if (el0 == CONST0_RTX (GET_MODE (el0)))
12448 /* The tricks done in the code below apply for little-endian vector layout.
12449 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12450 FIXME: Implement logic for big-endian vectors. */
12451 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12454 /* Splat vector constant out into a byte vector. */
12455 for (i = 0; i < n_elts; i++)
12457 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12458 unsigned HOST_WIDE_INT elpart;
12460 gcc_assert (CONST_INT_P (el));
12461 elpart = INTVAL (el);
12463 for (unsigned int byte = 0; byte < innersize; byte++)
12465 bytes[idx++] = (elpart & 0xff) ^ invmask;
12466 elpart >>= BITS_PER_UNIT;
12470 /* Sanity check. */
12471 gcc_assert (idx == GET_MODE_SIZE (mode));
12475 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12476 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12478 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12479 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12481 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12482 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12484 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12485 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12487 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12489 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12491 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12492 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12494 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12495 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12497 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12498 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12500 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12501 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12503 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12505 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12507 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12508 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12510 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12511 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12513 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12514 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12516 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12517 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12519 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12521 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12522 && bytes[i] == bytes[(i + 8) % idx]);
12530 *elementwidth = elsize;
12534 unsigned HOST_WIDE_INT imm = 0;
12536 /* Un-invert bytes of recognized vector, if necessary. */
12538 for (i = 0; i < idx; i++)
12539 bytes[i] ^= invmask;
12543 /* FIXME: Broken on 32-bit H_W_I hosts. */
12544 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12546 for (i = 0; i < 8; i++)
12547 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12548 << (i * BITS_PER_UNIT);
12550 *modconst = GEN_INT (imm);
12554 unsigned HOST_WIDE_INT imm = 0;
12556 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12557 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12559 *modconst = GEN_INT (imm);
12567 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12568 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12569 float elements), and a modified constant (whatever should be output for a
12570 VMOV) in *MODCONST. */
12573 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12574 rtx *modconst, int *elementwidth)
12578 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12584 *modconst = tmpconst;
12587 *elementwidth = tmpwidth;
12592 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12593 the immediate is valid, write a constant suitable for using as an operand
12594 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12595 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12598 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12599 rtx *modconst, int *elementwidth)
12603 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12605 if (retval < 0 || retval > 5)
12609 *modconst = tmpconst;
12612 *elementwidth = tmpwidth;
12617 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12618 the immediate is valid, write a constant suitable for using as an operand
12619 to VSHR/VSHL to *MODCONST and the corresponding element width to
12620 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12621 because they have different limitations. */
12624 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12625 rtx *modconst, int *elementwidth,
12628 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12629 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12630 unsigned HOST_WIDE_INT last_elt = 0;
12631 unsigned HOST_WIDE_INT maxshift;
12633 /* Split vector constant out into a byte vector. */
12634 for (i = 0; i < n_elts; i++)
12636 rtx el = CONST_VECTOR_ELT (op, i);
12637 unsigned HOST_WIDE_INT elpart;
12639 if (CONST_INT_P (el))
12640 elpart = INTVAL (el);
12641 else if (CONST_DOUBLE_P (el))
12644 gcc_unreachable ();
12646 if (i != 0 && elpart != last_elt)
12652 /* Shift less than element size. */
12653 maxshift = innersize * 8;
12657 /* Left shift immediate value can be from 0 to <size>-1. */
12658 if (last_elt >= maxshift)
12663 /* Right shift immediate value can be from 1 to <size>. */
12664 if (last_elt == 0 || last_elt > maxshift)
12669 *elementwidth = innersize * 8;
12672 *modconst = CONST_VECTOR_ELT (op, 0);
12677 /* Return a string suitable for output of Neon immediate logic operation
12681 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12682 int inverse, int quad)
12684 int width, is_valid;
12685 static char templ[40];
12687 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12689 gcc_assert (is_valid != 0);
12692 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12694 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12699 /* Return a string suitable for output of Neon immediate shift operation
12700 (VSHR or VSHL) MNEM. */
12703 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12704 machine_mode mode, int quad,
12707 int width, is_valid;
12708 static char templ[40];
12710 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12711 gcc_assert (is_valid != 0);
12714 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12716 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12721 /* Output a sequence of pairwise operations to implement a reduction.
12722 NOTE: We do "too much work" here, because pairwise operations work on two
12723 registers-worth of operands in one go. Unfortunately we can't exploit those
12724 extra calculations to do the full operation in fewer steps, I don't think.
12725 Although all vector elements of the result but the first are ignored, we
12726 actually calculate the same result in each of the elements. An alternative
12727 such as initially loading a vector with zero to use as each of the second
12728 operands would use up an additional register and take an extra instruction,
12729 for no particular gain. */
12732 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12733 rtx (*reduc) (rtx, rtx, rtx))
12735 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12738 for (i = parts / 2; i >= 1; i /= 2)
12740 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12741 emit_insn (reduc (dest, tmpsum, tmpsum));
12746 /* If VALS is a vector constant that can be loaded into a register
12747 using VDUP, generate instructions to do so and return an RTX to
12748 assign to the register. Otherwise return NULL_RTX. */
12751 neon_vdup_constant (rtx vals)
12753 machine_mode mode = GET_MODE (vals);
12754 machine_mode inner_mode = GET_MODE_INNER (mode);
12757 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12760 if (!const_vec_duplicate_p (vals, &x))
12761 /* The elements are not all the same. We could handle repeating
12762 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12763 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12767 /* We can load this constant by using VDUP and a constant in a
12768 single ARM register. This will be cheaper than a vector
12771 x = copy_to_mode_reg (inner_mode, x);
12772 return gen_vec_duplicate (mode, x);
12775 /* Generate code to load VALS, which is a PARALLEL containing only
12776 constants (for vec_init) or CONST_VECTOR, efficiently into a
12777 register. Returns an RTX to copy into the register, or NULL_RTX
12778 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12781 neon_make_constant (rtx vals)
12783 machine_mode mode = GET_MODE (vals);
12785 rtx const_vec = NULL_RTX;
12786 int n_elts = GET_MODE_NUNITS (mode);
12790 if (GET_CODE (vals) == CONST_VECTOR)
12792 else if (GET_CODE (vals) == PARALLEL)
12794 /* A CONST_VECTOR must contain only CONST_INTs and
12795 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12796 Only store valid constants in a CONST_VECTOR. */
12797 for (i = 0; i < n_elts; ++i)
12799 rtx x = XVECEXP (vals, 0, i);
12800 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12803 if (n_const == n_elts)
12804 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12807 gcc_unreachable ();
12809 if (const_vec != NULL
12810 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12811 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12813 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12814 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12815 pipeline cycle; creating the constant takes one or two ARM
12816 pipeline cycles. */
12818 else if (const_vec != NULL_RTX)
12819 /* Load from constant pool. On Cortex-A8 this takes two cycles
12820 (for either double or quad vectors). We cannot take advantage
12821 of single-cycle VLD1 because we need a PC-relative addressing
12825 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12826 We cannot construct an initializer. */
12830 /* Initialize vector TARGET to VALS. */
12833 neon_expand_vector_init (rtx target, rtx vals)
12835 machine_mode mode = GET_MODE (target);
12836 machine_mode inner_mode = GET_MODE_INNER (mode);
12837 int n_elts = GET_MODE_NUNITS (mode);
12838 int n_var = 0, one_var = -1;
12839 bool all_same = true;
12843 for (i = 0; i < n_elts; ++i)
12845 x = XVECEXP (vals, 0, i);
12846 if (!CONSTANT_P (x))
12847 ++n_var, one_var = i;
12849 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12855 rtx constant = neon_make_constant (vals);
12856 if (constant != NULL_RTX)
12858 emit_move_insn (target, constant);
12863 /* Splat a single non-constant element if we can. */
12864 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12866 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12867 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12871 /* One field is non-constant. Load constant then overwrite varying
12872 field. This is more efficient than using the stack. */
12875 rtx copy = copy_rtx (vals);
12876 rtx merge_mask = GEN_INT (1 << one_var);
12878 /* Load constant part of vector, substitute neighboring value for
12879 varying element. */
12880 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12881 neon_expand_vector_init (target, copy);
12883 /* Insert variable. */
12884 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12885 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12889 /* Construct the vector in memory one field at a time
12890 and load the whole vector. */
12891 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12892 for (i = 0; i < n_elts; i++)
12893 emit_move_insn (adjust_address_nv (mem, inner_mode,
12894 i * GET_MODE_SIZE (inner_mode)),
12895 XVECEXP (vals, 0, i));
12896 emit_move_insn (target, mem);
12899 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12900 ERR if it doesn't. EXP indicates the source location, which includes the
12901 inlining history for intrinsics. */
12904 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12905 const_tree exp, const char *desc)
12907 HOST_WIDE_INT lane;
12909 gcc_assert (CONST_INT_P (operand));
12911 lane = INTVAL (operand);
12913 if (lane < low || lane >= high)
12916 error ("%K%s %wd out of range %wd - %wd",
12917 exp, desc, lane, low, high - 1);
12919 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12923 /* Bounds-check lanes. */
12926 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12929 bounds_check (operand, low, high, exp, "lane");
12932 /* Bounds-check constants. */
12935 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12937 bounds_check (operand, low, high, NULL_TREE, "constant");
12941 neon_element_bits (machine_mode mode)
12943 return GET_MODE_UNIT_BITSIZE (mode);
12947 /* Predicates for `match_operand' and `match_operator'. */
12949 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12950 WB is true if full writeback address modes are allowed and is false
12951 if limited writeback address modes (POST_INC and PRE_DEC) are
12955 arm_coproc_mem_operand (rtx op, bool wb)
12959 /* Reject eliminable registers. */
12960 if (! (reload_in_progress || reload_completed || lra_in_progress)
12961 && ( reg_mentioned_p (frame_pointer_rtx, op)
12962 || reg_mentioned_p (arg_pointer_rtx, op)
12963 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12964 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12965 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12966 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12969 /* Constants are converted into offsets from labels. */
12973 ind = XEXP (op, 0);
12975 if (reload_completed
12976 && (GET_CODE (ind) == LABEL_REF
12977 || (GET_CODE (ind) == CONST
12978 && GET_CODE (XEXP (ind, 0)) == PLUS
12979 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12980 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12983 /* Match: (mem (reg)). */
12985 return arm_address_register_rtx_p (ind, 0);
12987 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12988 acceptable in any case (subject to verification by
12989 arm_address_register_rtx_p). We need WB to be true to accept
12990 PRE_INC and POST_DEC. */
12991 if (GET_CODE (ind) == POST_INC
12992 || GET_CODE (ind) == PRE_DEC
12994 && (GET_CODE (ind) == PRE_INC
12995 || GET_CODE (ind) == POST_DEC)))
12996 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12999 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13000 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13001 && GET_CODE (XEXP (ind, 1)) == PLUS
13002 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13003 ind = XEXP (ind, 1);
13008 if (GET_CODE (ind) == PLUS
13009 && REG_P (XEXP (ind, 0))
13010 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13011 && CONST_INT_P (XEXP (ind, 1))
13012 && INTVAL (XEXP (ind, 1)) > -1024
13013 && INTVAL (XEXP (ind, 1)) < 1024
13014 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13020 /* Return TRUE if OP is a memory operand which we can load or store a vector
13021 to/from. TYPE is one of the following values:
13022 0 - Vector load/stor (vldr)
13023 1 - Core registers (ldm)
13024 2 - Element/structure loads (vld1)
13027 neon_vector_mem_operand (rtx op, int type, bool strict)
13031 /* Reject eliminable registers. */
13032 if (strict && ! (reload_in_progress || reload_completed)
13033 && (reg_mentioned_p (frame_pointer_rtx, op)
13034 || reg_mentioned_p (arg_pointer_rtx, op)
13035 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13036 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13037 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13038 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13041 /* Constants are converted into offsets from labels. */
13045 ind = XEXP (op, 0);
13047 if (reload_completed
13048 && (GET_CODE (ind) == LABEL_REF
13049 || (GET_CODE (ind) == CONST
13050 && GET_CODE (XEXP (ind, 0)) == PLUS
13051 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13052 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13055 /* Match: (mem (reg)). */
13057 return arm_address_register_rtx_p (ind, 0);
13059 /* Allow post-increment with Neon registers. */
13060 if ((type != 1 && GET_CODE (ind) == POST_INC)
13061 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13062 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13064 /* Allow post-increment by register for VLDn */
13065 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13066 && GET_CODE (XEXP (ind, 1)) == PLUS
13067 && REG_P (XEXP (XEXP (ind, 1), 1)))
13074 && GET_CODE (ind) == PLUS
13075 && REG_P (XEXP (ind, 0))
13076 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13077 && CONST_INT_P (XEXP (ind, 1))
13078 && INTVAL (XEXP (ind, 1)) > -1024
13079 /* For quad modes, we restrict the constant offset to be slightly less
13080 than what the instruction format permits. We have no such constraint
13081 on double mode offsets. (This must match arm_legitimate_index_p.) */
13082 && (INTVAL (XEXP (ind, 1))
13083 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13084 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13090 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13093 neon_struct_mem_operand (rtx op)
13097 /* Reject eliminable registers. */
13098 if (! (reload_in_progress || reload_completed)
13099 && ( reg_mentioned_p (frame_pointer_rtx, op)
13100 || reg_mentioned_p (arg_pointer_rtx, op)
13101 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13102 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13103 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13104 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13107 /* Constants are converted into offsets from labels. */
13111 ind = XEXP (op, 0);
13113 if (reload_completed
13114 && (GET_CODE (ind) == LABEL_REF
13115 || (GET_CODE (ind) == CONST
13116 && GET_CODE (XEXP (ind, 0)) == PLUS
13117 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13118 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13121 /* Match: (mem (reg)). */
13123 return arm_address_register_rtx_p (ind, 0);
13125 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13126 if (GET_CODE (ind) == POST_INC
13127 || GET_CODE (ind) == PRE_DEC)
13128 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13133 /* Prepares the operands for the VCMLA by lane instruction such that the right
13134 register number is selected. This instruction is special in that it always
13135 requires a D register, however there is a choice to be made between Dn[0],
13136 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13138 The VCMLA by lane function always selects two values. For instance given D0
13139 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13140 used by the instruction. However given V4SF then index 0 and 1 are valid as
13141 D0[0] or D1[0] are both valid.
13143 This function centralizes that information based on OPERANDS, OPERANDS[3]
13144 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13145 updated to contain the right index. */
13148 neon_vcmla_lane_prepare_operands (rtx *operands)
13150 int lane = INTVAL (operands[4]);
13151 machine_mode constmode = SImode;
13152 machine_mode mode = GET_MODE (operands[3]);
13153 int regno = REGNO (operands[3]);
13154 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13155 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13157 operands[3] = gen_int_mode (regno + 1, constmode);
13159 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13163 operands[3] = gen_int_mode (regno, constmode);
13164 operands[4] = gen_int_mode (lane, constmode);
13170 /* Return true if X is a register that will be eliminated later on. */
13172 arm_eliminable_register (rtx x)
13174 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13175 || REGNO (x) == ARG_POINTER_REGNUM
13176 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13177 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13180 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13181 coprocessor registers. Otherwise return NO_REGS. */
13184 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13186 if (mode == HFmode)
13188 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13189 return GENERAL_REGS;
13190 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13192 return GENERAL_REGS;
13195 /* The neon move patterns handle all legitimate vector and struct
13198 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13199 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13200 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13201 || VALID_NEON_STRUCT_MODE (mode)))
13204 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13207 return GENERAL_REGS;
13210 /* Values which must be returned in the most-significant end of the return
13214 arm_return_in_msb (const_tree valtype)
13216 return (TARGET_AAPCS_BASED
13217 && BYTES_BIG_ENDIAN
13218 && (AGGREGATE_TYPE_P (valtype)
13219 || TREE_CODE (valtype) == COMPLEX_TYPE
13220 || FIXED_POINT_TYPE_P (valtype)));
13223 /* Return TRUE if X references a SYMBOL_REF. */
13225 symbol_mentioned_p (rtx x)
13230 if (GET_CODE (x) == SYMBOL_REF)
13233 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13234 are constant offsets, not symbols. */
13235 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13238 fmt = GET_RTX_FORMAT (GET_CODE (x));
13240 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13246 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13247 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13250 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13257 /* Return TRUE if X references a LABEL_REF. */
13259 label_mentioned_p (rtx x)
13264 if (GET_CODE (x) == LABEL_REF)
13267 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13268 instruction, but they are constant offsets, not symbols. */
13269 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13272 fmt = GET_RTX_FORMAT (GET_CODE (x));
13273 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13279 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13280 if (label_mentioned_p (XVECEXP (x, i, j)))
13283 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13291 tls_mentioned_p (rtx x)
13293 switch (GET_CODE (x))
13296 return tls_mentioned_p (XEXP (x, 0));
13299 if (XINT (x, 1) == UNSPEC_TLS)
13302 /* Fall through. */
13308 /* Must not copy any rtx that uses a pc-relative address.
13309 Also, disallow copying of load-exclusive instructions that
13310 may appear after splitting of compare-and-swap-style operations
13311 so as to prevent those loops from being transformed away from their
13312 canonical forms (see PR 69904). */
13315 arm_cannot_copy_insn_p (rtx_insn *insn)
13317 /* The tls call insn cannot be copied, as it is paired with a data
13319 if (recog_memoized (insn) == CODE_FOR_tlscall)
13322 subrtx_iterator::array_type array;
13323 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13325 const_rtx x = *iter;
13326 if (GET_CODE (x) == UNSPEC
13327 && (XINT (x, 1) == UNSPEC_PIC_BASE
13328 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13332 rtx set = single_set (insn);
13335 rtx src = SET_SRC (set);
13336 if (GET_CODE (src) == ZERO_EXTEND)
13337 src = XEXP (src, 0);
13339 /* Catch the load-exclusive and load-acquire operations. */
13340 if (GET_CODE (src) == UNSPEC_VOLATILE
13341 && (XINT (src, 1) == VUNSPEC_LL
13342 || XINT (src, 1) == VUNSPEC_LAX))
13349 minmax_code (rtx x)
13351 enum rtx_code code = GET_CODE (x);
13364 gcc_unreachable ();
13368 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13371 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13372 int *mask, bool *signed_sat)
13374 /* The high bound must be a power of two minus one. */
13375 int log = exact_log2 (INTVAL (hi_bound) + 1);
13379 /* The low bound is either zero (for usat) or one less than the
13380 negation of the high bound (for ssat). */
13381 if (INTVAL (lo_bound) == 0)
13386 *signed_sat = false;
13391 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13396 *signed_sat = true;
13404 /* Return 1 if memory locations are adjacent. */
13406 adjacent_mem_locations (rtx a, rtx b)
13408 /* We don't guarantee to preserve the order of these memory refs. */
13409 if (volatile_refs_p (a) || volatile_refs_p (b))
13412 if ((REG_P (XEXP (a, 0))
13413 || (GET_CODE (XEXP (a, 0)) == PLUS
13414 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13415 && (REG_P (XEXP (b, 0))
13416 || (GET_CODE (XEXP (b, 0)) == PLUS
13417 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13419 HOST_WIDE_INT val0 = 0, val1 = 0;
13423 if (GET_CODE (XEXP (a, 0)) == PLUS)
13425 reg0 = XEXP (XEXP (a, 0), 0);
13426 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13429 reg0 = XEXP (a, 0);
13431 if (GET_CODE (XEXP (b, 0)) == PLUS)
13433 reg1 = XEXP (XEXP (b, 0), 0);
13434 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13437 reg1 = XEXP (b, 0);
13439 /* Don't accept any offset that will require multiple
13440 instructions to handle, since this would cause the
13441 arith_adjacentmem pattern to output an overlong sequence. */
13442 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13445 /* Don't allow an eliminable register: register elimination can make
13446 the offset too large. */
13447 if (arm_eliminable_register (reg0))
13450 val_diff = val1 - val0;
13454 /* If the target has load delay slots, then there's no benefit
13455 to using an ldm instruction unless the offset is zero and
13456 we are optimizing for size. */
13457 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13458 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13459 && (val_diff == 4 || val_diff == -4));
13462 return ((REGNO (reg0) == REGNO (reg1))
13463 && (val_diff == 4 || val_diff == -4));
13469 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13470 for load operations, false for store operations. CONSECUTIVE is true
13471 if the register numbers in the operation must be consecutive in the register
13472 bank. RETURN_PC is true if value is to be loaded in PC.
13473 The pattern we are trying to match for load is:
13474 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13475 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13478 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13481 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13482 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13483 3. If consecutive is TRUE, then for kth register being loaded,
13484 REGNO (R_dk) = REGNO (R_d0) + k.
13485 The pattern for store is similar. */
13487 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13488 bool consecutive, bool return_pc)
13490 HOST_WIDE_INT count = XVECLEN (op, 0);
13491 rtx reg, mem, addr;
13493 unsigned first_regno;
13494 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13496 bool addr_reg_in_reglist = false;
13497 bool update = false;
13502 /* If not in SImode, then registers must be consecutive
13503 (e.g., VLDM instructions for DFmode). */
13504 gcc_assert ((mode == SImode) || consecutive);
13505 /* Setting return_pc for stores is illegal. */
13506 gcc_assert (!return_pc || load);
13508 /* Set up the increments and the regs per val based on the mode. */
13509 reg_increment = GET_MODE_SIZE (mode);
13510 regs_per_val = reg_increment / 4;
13511 offset_adj = return_pc ? 1 : 0;
13514 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13515 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13518 /* Check if this is a write-back. */
13519 elt = XVECEXP (op, 0, offset_adj);
13520 if (GET_CODE (SET_SRC (elt)) == PLUS)
13526 /* The offset adjustment must be the number of registers being
13527 popped times the size of a single register. */
13528 if (!REG_P (SET_DEST (elt))
13529 || !REG_P (XEXP (SET_SRC (elt), 0))
13530 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13531 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13532 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13533 ((count - 1 - offset_adj) * reg_increment))
13537 i = i + offset_adj;
13538 base = base + offset_adj;
13539 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13540 success depends on the type: VLDM can do just one reg,
13541 LDM must do at least two. */
13542 if ((count <= i) && (mode == SImode))
13545 elt = XVECEXP (op, 0, i - 1);
13546 if (GET_CODE (elt) != SET)
13551 reg = SET_DEST (elt);
13552 mem = SET_SRC (elt);
13556 reg = SET_SRC (elt);
13557 mem = SET_DEST (elt);
13560 if (!REG_P (reg) || !MEM_P (mem))
13563 regno = REGNO (reg);
13564 first_regno = regno;
13565 addr = XEXP (mem, 0);
13566 if (GET_CODE (addr) == PLUS)
13568 if (!CONST_INT_P (XEXP (addr, 1)))
13571 offset = INTVAL (XEXP (addr, 1));
13572 addr = XEXP (addr, 0);
13578 /* Don't allow SP to be loaded unless it is also the base register. It
13579 guarantees that SP is reset correctly when an LDM instruction
13580 is interrupted. Otherwise, we might end up with a corrupt stack. */
13581 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13584 if (regno == REGNO (addr))
13585 addr_reg_in_reglist = true;
13587 for (; i < count; i++)
13589 elt = XVECEXP (op, 0, i);
13590 if (GET_CODE (elt) != SET)
13595 reg = SET_DEST (elt);
13596 mem = SET_SRC (elt);
13600 reg = SET_SRC (elt);
13601 mem = SET_DEST (elt);
13605 || GET_MODE (reg) != mode
13606 || REGNO (reg) <= regno
13609 (unsigned int) (first_regno + regs_per_val * (i - base))))
13610 /* Don't allow SP to be loaded unless it is also the base register. It
13611 guarantees that SP is reset correctly when an LDM instruction
13612 is interrupted. Otherwise, we might end up with a corrupt stack. */
13613 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13615 || GET_MODE (mem) != mode
13616 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13617 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13618 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13619 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13620 offset + (i - base) * reg_increment))
13621 && (!REG_P (XEXP (mem, 0))
13622 || offset + (i - base) * reg_increment != 0)))
13625 regno = REGNO (reg);
13626 if (regno == REGNO (addr))
13627 addr_reg_in_reglist = true;
13632 if (update && addr_reg_in_reglist)
13635 /* For Thumb-1, address register is always modified - either by write-back
13636 or by explicit load. If the pattern does not describe an update,
13637 then the address register must be in the list of loaded registers. */
13639 return update || addr_reg_in_reglist;
13645 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13646 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13647 instruction. ADD_OFFSET is nonzero if the base address register needs
13648 to be modified with an add instruction before we can use it. */
13651 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13652 int nops, HOST_WIDE_INT add_offset)
13654 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13655 if the offset isn't small enough. The reason 2 ldrs are faster
13656 is because these ARMs are able to do more than one cache access
13657 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13658 whilst the ARM8 has a double bandwidth cache. This means that
13659 these cores can do both an instruction fetch and a data fetch in
13660 a single cycle, so the trick of calculating the address into a
13661 scratch register (one of the result regs) and then doing a load
13662 multiple actually becomes slower (and no smaller in code size).
13663 That is the transformation
13665 ldr rd1, [rbase + offset]
13666 ldr rd2, [rbase + offset + 4]
13670 add rd1, rbase, offset
13671 ldmia rd1, {rd1, rd2}
13673 produces worse code -- '3 cycles + any stalls on rd2' instead of
13674 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13675 access per cycle, the first sequence could never complete in less
13676 than 6 cycles, whereas the ldm sequence would only take 5 and
13677 would make better use of sequential accesses if not hitting the
13680 We cheat here and test 'arm_ld_sched' which we currently know to
13681 only be true for the ARM8, ARM9 and StrongARM. If this ever
13682 changes, then the test below needs to be reworked. */
13683 if (nops == 2 && arm_ld_sched && add_offset != 0)
13686 /* XScale has load-store double instructions, but they have stricter
13687 alignment requirements than load-store multiple, so we cannot
13690 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13691 the pipeline until completion.
13699 An ldr instruction takes 1-3 cycles, but does not block the
13708 Best case ldr will always win. However, the more ldr instructions
13709 we issue, the less likely we are to be able to schedule them well.
13710 Using ldr instructions also increases code size.
13712 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13713 for counts of 3 or 4 regs. */
13714 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13719 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13720 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13721 an array ORDER which describes the sequence to use when accessing the
13722 offsets that produces an ascending order. In this sequence, each
13723 offset must be larger by exactly 4 than the previous one. ORDER[0]
13724 must have been filled in with the lowest offset by the caller.
13725 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13726 we use to verify that ORDER produces an ascending order of registers.
13727 Return true if it was possible to construct such an order, false if
13731 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13732 int *unsorted_regs)
13735 for (i = 1; i < nops; i++)
13739 order[i] = order[i - 1];
13740 for (j = 0; j < nops; j++)
13741 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13743 /* We must find exactly one offset that is higher than the
13744 previous one by 4. */
13745 if (order[i] != order[i - 1])
13749 if (order[i] == order[i - 1])
13751 /* The register numbers must be ascending. */
13752 if (unsorted_regs != NULL
13753 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13759 /* Used to determine in a peephole whether a sequence of load
13760 instructions can be changed into a load-multiple instruction.
13761 NOPS is the number of separate load instructions we are examining. The
13762 first NOPS entries in OPERANDS are the destination registers, the
13763 next NOPS entries are memory operands. If this function is
13764 successful, *BASE is set to the common base register of the memory
13765 accesses; *LOAD_OFFSET is set to the first memory location's offset
13766 from that base register.
13767 REGS is an array filled in with the destination register numbers.
13768 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13769 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13770 the sequence of registers in REGS matches the loads from ascending memory
13771 locations, and the function verifies that the register numbers are
13772 themselves ascending. If CHECK_REGS is false, the register numbers
13773 are stored in the order they are found in the operands. */
13775 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13776 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13778 int unsorted_regs[MAX_LDM_STM_OPS];
13779 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13780 int order[MAX_LDM_STM_OPS];
13784 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13785 easily extended if required. */
13786 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13788 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13790 /* Loop over the operands and check that the memory references are
13791 suitable (i.e. immediate offsets from the same base register). At
13792 the same time, extract the target register, and the memory
13794 for (i = 0; i < nops; i++)
13799 /* Convert a subreg of a mem into the mem itself. */
13800 if (GET_CODE (operands[nops + i]) == SUBREG)
13801 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13803 gcc_assert (MEM_P (operands[nops + i]));
13805 /* Don't reorder volatile memory references; it doesn't seem worth
13806 looking for the case where the order is ok anyway. */
13807 if (MEM_VOLATILE_P (operands[nops + i]))
13810 offset = const0_rtx;
13812 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13813 || (GET_CODE (reg) == SUBREG
13814 && REG_P (reg = SUBREG_REG (reg))))
13815 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13816 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13817 || (GET_CODE (reg) == SUBREG
13818 && REG_P (reg = SUBREG_REG (reg))))
13819 && (CONST_INT_P (offset
13820 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13824 base_reg = REGNO (reg);
13825 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13828 else if (base_reg != (int) REGNO (reg))
13829 /* Not addressed from the same base register. */
13832 unsorted_regs[i] = (REG_P (operands[i])
13833 ? REGNO (operands[i])
13834 : REGNO (SUBREG_REG (operands[i])));
13836 /* If it isn't an integer register, or if it overwrites the
13837 base register but isn't the last insn in the list, then
13838 we can't do this. */
13839 if (unsorted_regs[i] < 0
13840 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13841 || unsorted_regs[i] > 14
13842 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13845 /* Don't allow SP to be loaded unless it is also the base
13846 register. It guarantees that SP is reset correctly when
13847 an LDM instruction is interrupted. Otherwise, we might
13848 end up with a corrupt stack. */
13849 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13852 unsorted_offsets[i] = INTVAL (offset);
13853 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13857 /* Not a suitable memory address. */
13861 /* All the useful information has now been extracted from the
13862 operands into unsorted_regs and unsorted_offsets; additionally,
13863 order[0] has been set to the lowest offset in the list. Sort
13864 the offsets into order, verifying that they are adjacent, and
13865 check that the register numbers are ascending. */
13866 if (!compute_offset_order (nops, unsorted_offsets, order,
13867 check_regs ? unsorted_regs : NULL))
13871 memcpy (saved_order, order, sizeof order);
13877 for (i = 0; i < nops; i++)
13878 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13880 *load_offset = unsorted_offsets[order[0]];
13883 if (unsorted_offsets[order[0]] == 0)
13884 ldm_case = 1; /* ldmia */
13885 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13886 ldm_case = 2; /* ldmib */
13887 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13888 ldm_case = 3; /* ldmda */
13889 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13890 ldm_case = 4; /* ldmdb */
13891 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13892 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13897 if (!multiple_operation_profitable_p (false, nops,
13899 ? unsorted_offsets[order[0]] : 0))
13905 /* Used to determine in a peephole whether a sequence of store instructions can
13906 be changed into a store-multiple instruction.
13907 NOPS is the number of separate store instructions we are examining.
13908 NOPS_TOTAL is the total number of instructions recognized by the peephole
13910 The first NOPS entries in OPERANDS are the source registers, the next
13911 NOPS entries are memory operands. If this function is successful, *BASE is
13912 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13913 to the first memory location's offset from that base register. REGS is an
13914 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13915 likewise filled with the corresponding rtx's.
13916 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13917 numbers to an ascending order of stores.
13918 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13919 from ascending memory locations, and the function verifies that the register
13920 numbers are themselves ascending. If CHECK_REGS is false, the register
13921 numbers are stored in the order they are found in the operands. */
13923 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13924 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13925 HOST_WIDE_INT *load_offset, bool check_regs)
13927 int unsorted_regs[MAX_LDM_STM_OPS];
13928 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13929 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13930 int order[MAX_LDM_STM_OPS];
13932 rtx base_reg_rtx = NULL;
13935 /* Write back of base register is currently only supported for Thumb 1. */
13936 int base_writeback = TARGET_THUMB1;
13938 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13939 easily extended if required. */
13940 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13942 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13944 /* Loop over the operands and check that the memory references are
13945 suitable (i.e. immediate offsets from the same base register). At
13946 the same time, extract the target register, and the memory
13948 for (i = 0; i < nops; i++)
13953 /* Convert a subreg of a mem into the mem itself. */
13954 if (GET_CODE (operands[nops + i]) == SUBREG)
13955 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13957 gcc_assert (MEM_P (operands[nops + i]));
13959 /* Don't reorder volatile memory references; it doesn't seem worth
13960 looking for the case where the order is ok anyway. */
13961 if (MEM_VOLATILE_P (operands[nops + i]))
13964 offset = const0_rtx;
13966 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13967 || (GET_CODE (reg) == SUBREG
13968 && REG_P (reg = SUBREG_REG (reg))))
13969 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13970 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13971 || (GET_CODE (reg) == SUBREG
13972 && REG_P (reg = SUBREG_REG (reg))))
13973 && (CONST_INT_P (offset
13974 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13976 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13977 ? operands[i] : SUBREG_REG (operands[i]));
13978 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13982 base_reg = REGNO (reg);
13983 base_reg_rtx = reg;
13984 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13987 else if (base_reg != (int) REGNO (reg))
13988 /* Not addressed from the same base register. */
13991 /* If it isn't an integer register, then we can't do this. */
13992 if (unsorted_regs[i] < 0
13993 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13994 /* The effects are unpredictable if the base register is
13995 both updated and stored. */
13996 || (base_writeback && unsorted_regs[i] == base_reg)
13997 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13998 || unsorted_regs[i] > 14)
14001 unsorted_offsets[i] = INTVAL (offset);
14002 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14006 /* Not a suitable memory address. */
14010 /* All the useful information has now been extracted from the
14011 operands into unsorted_regs and unsorted_offsets; additionally,
14012 order[0] has been set to the lowest offset in the list. Sort
14013 the offsets into order, verifying that they are adjacent, and
14014 check that the register numbers are ascending. */
14015 if (!compute_offset_order (nops, unsorted_offsets, order,
14016 check_regs ? unsorted_regs : NULL))
14020 memcpy (saved_order, order, sizeof order);
14026 for (i = 0; i < nops; i++)
14028 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14030 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14033 *load_offset = unsorted_offsets[order[0]];
14037 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14040 if (unsorted_offsets[order[0]] == 0)
14041 stm_case = 1; /* stmia */
14042 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14043 stm_case = 2; /* stmib */
14044 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14045 stm_case = 3; /* stmda */
14046 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14047 stm_case = 4; /* stmdb */
14051 if (!multiple_operation_profitable_p (false, nops, 0))
14057 /* Routines for use in generating RTL. */
14059 /* Generate a load-multiple instruction. COUNT is the number of loads in
14060 the instruction; REGS and MEMS are arrays containing the operands.
14061 BASEREG is the base register to be used in addressing the memory operands.
14062 WBACK_OFFSET is nonzero if the instruction should update the base
14066 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14067 HOST_WIDE_INT wback_offset)
14072 if (!multiple_operation_profitable_p (false, count, 0))
14078 for (i = 0; i < count; i++)
14079 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14081 if (wback_offset != 0)
14082 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14084 seq = get_insns ();
14090 result = gen_rtx_PARALLEL (VOIDmode,
14091 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14092 if (wback_offset != 0)
14094 XVECEXP (result, 0, 0)
14095 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14100 for (j = 0; i < count; i++, j++)
14101 XVECEXP (result, 0, i)
14102 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14107 /* Generate a store-multiple instruction. COUNT is the number of stores in
14108 the instruction; REGS and MEMS are arrays containing the operands.
14109 BASEREG is the base register to be used in addressing the memory operands.
14110 WBACK_OFFSET is nonzero if the instruction should update the base
14114 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14115 HOST_WIDE_INT wback_offset)
14120 if (GET_CODE (basereg) == PLUS)
14121 basereg = XEXP (basereg, 0);
14123 if (!multiple_operation_profitable_p (false, count, 0))
14129 for (i = 0; i < count; i++)
14130 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14132 if (wback_offset != 0)
14133 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14135 seq = get_insns ();
14141 result = gen_rtx_PARALLEL (VOIDmode,
14142 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14143 if (wback_offset != 0)
14145 XVECEXP (result, 0, 0)
14146 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14151 for (j = 0; i < count; i++, j++)
14152 XVECEXP (result, 0, i)
14153 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14158 /* Generate either a load-multiple or a store-multiple instruction. This
14159 function can be used in situations where we can start with a single MEM
14160 rtx and adjust its address upwards.
14161 COUNT is the number of operations in the instruction, not counting a
14162 possible update of the base register. REGS is an array containing the
14164 BASEREG is the base register to be used in addressing the memory operands,
14165 which are constructed from BASEMEM.
14166 WRITE_BACK specifies whether the generated instruction should include an
14167 update of the base register.
14168 OFFSETP is used to pass an offset to and from this function; this offset
14169 is not used when constructing the address (instead BASEMEM should have an
14170 appropriate offset in its address), it is used only for setting
14171 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14174 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14175 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14177 rtx mems[MAX_LDM_STM_OPS];
14178 HOST_WIDE_INT offset = *offsetp;
14181 gcc_assert (count <= MAX_LDM_STM_OPS);
14183 if (GET_CODE (basereg) == PLUS)
14184 basereg = XEXP (basereg, 0);
14186 for (i = 0; i < count; i++)
14188 rtx addr = plus_constant (Pmode, basereg, i * 4);
14189 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14197 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14198 write_back ? 4 * count : 0);
14200 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14201 write_back ? 4 * count : 0);
14205 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14206 rtx basemem, HOST_WIDE_INT *offsetp)
14208 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14213 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14214 rtx basemem, HOST_WIDE_INT *offsetp)
14216 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14220 /* Called from a peephole2 expander to turn a sequence of loads into an
14221 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14222 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14223 is true if we can reorder the registers because they are used commutatively
14225 Returns true iff we could generate a new instruction. */
14228 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14230 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14231 rtx mems[MAX_LDM_STM_OPS];
14232 int i, j, base_reg;
14234 HOST_WIDE_INT offset;
14235 int write_back = FALSE;
14239 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14240 &base_reg, &offset, !sort_regs);
14246 for (i = 0; i < nops - 1; i++)
14247 for (j = i + 1; j < nops; j++)
14248 if (regs[i] > regs[j])
14254 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14258 gcc_assert (ldm_case == 1 || ldm_case == 5);
14260 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14262 for (i = 0; i < nops; i++)
14263 if (base_reg == regs[i])
14264 write_back = false;
14266 /* Ensure the base is dead if it is updated. */
14267 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14273 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14274 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14276 base_reg_rtx = newbase;
14279 for (i = 0; i < nops; i++)
14281 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14282 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14285 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14286 write_back ? offset + i * 4 : 0));
14290 /* Called from a peephole2 expander to turn a sequence of stores into an
14291 STM instruction. OPERANDS are the operands found by the peephole matcher;
14292 NOPS indicates how many separate stores we are trying to combine.
14293 Returns true iff we could generate a new instruction. */
14296 gen_stm_seq (rtx *operands, int nops)
14299 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14300 rtx mems[MAX_LDM_STM_OPS];
14303 HOST_WIDE_INT offset;
14304 int write_back = FALSE;
14307 bool base_reg_dies;
14309 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14310 mem_order, &base_reg, &offset, true);
14315 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14317 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14320 gcc_assert (base_reg_dies);
14326 gcc_assert (base_reg_dies);
14327 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14331 addr = plus_constant (Pmode, base_reg_rtx, offset);
14333 for (i = 0; i < nops; i++)
14335 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14336 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14339 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14340 write_back ? offset + i * 4 : 0));
14344 /* Called from a peephole2 expander to turn a sequence of stores that are
14345 preceded by constant loads into an STM instruction. OPERANDS are the
14346 operands found by the peephole matcher; NOPS indicates how many
14347 separate stores we are trying to combine; there are 2 * NOPS
14348 instructions in the peephole.
14349 Returns true iff we could generate a new instruction. */
14352 gen_const_stm_seq (rtx *operands, int nops)
14354 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14355 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14356 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14357 rtx mems[MAX_LDM_STM_OPS];
14360 HOST_WIDE_INT offset;
14361 int write_back = FALSE;
14364 bool base_reg_dies;
14366 HARD_REG_SET allocated;
14368 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14369 mem_order, &base_reg, &offset, false);
14374 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14376 /* If the same register is used more than once, try to find a free
14378 CLEAR_HARD_REG_SET (allocated);
14379 for (i = 0; i < nops; i++)
14381 for (j = i + 1; j < nops; j++)
14382 if (regs[i] == regs[j])
14384 rtx t = peep2_find_free_register (0, nops * 2,
14385 TARGET_THUMB1 ? "l" : "r",
14386 SImode, &allocated);
14390 regs[i] = REGNO (t);
14394 /* Compute an ordering that maps the register numbers to an ascending
14397 for (i = 0; i < nops; i++)
14398 if (regs[i] < regs[reg_order[0]])
14401 for (i = 1; i < nops; i++)
14403 int this_order = reg_order[i - 1];
14404 for (j = 0; j < nops; j++)
14405 if (regs[j] > regs[reg_order[i - 1]]
14406 && (this_order == reg_order[i - 1]
14407 || regs[j] < regs[this_order]))
14409 reg_order[i] = this_order;
14412 /* Ensure that registers that must be live after the instruction end
14413 up with the correct value. */
14414 for (i = 0; i < nops; i++)
14416 int this_order = reg_order[i];
14417 if ((this_order != mem_order[i]
14418 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14419 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14423 /* Load the constants. */
14424 for (i = 0; i < nops; i++)
14426 rtx op = operands[2 * nops + mem_order[i]];
14427 sorted_regs[i] = regs[reg_order[i]];
14428 emit_move_insn (reg_rtxs[reg_order[i]], op);
14431 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14433 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14436 gcc_assert (base_reg_dies);
14442 gcc_assert (base_reg_dies);
14443 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14447 addr = plus_constant (Pmode, base_reg_rtx, offset);
14449 for (i = 0; i < nops; i++)
14451 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14452 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14455 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14456 write_back ? offset + i * 4 : 0));
14460 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14461 unaligned copies on processors which support unaligned semantics for those
14462 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14463 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14464 An interleave factor of 1 (the minimum) will perform no interleaving.
14465 Load/store multiple are used for aligned addresses where possible. */
14468 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14469 HOST_WIDE_INT length,
14470 unsigned int interleave_factor)
14472 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14473 int *regnos = XALLOCAVEC (int, interleave_factor);
14474 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14475 HOST_WIDE_INT i, j;
14476 HOST_WIDE_INT remaining = length, words;
14477 rtx halfword_tmp = NULL, byte_tmp = NULL;
14479 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14480 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14481 HOST_WIDE_INT srcoffset, dstoffset;
14482 HOST_WIDE_INT src_autoinc, dst_autoinc;
14485 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14487 /* Use hard registers if we have aligned source or destination so we can use
14488 load/store multiple with contiguous registers. */
14489 if (dst_aligned || src_aligned)
14490 for (i = 0; i < interleave_factor; i++)
14491 regs[i] = gen_rtx_REG (SImode, i);
14493 for (i = 0; i < interleave_factor; i++)
14494 regs[i] = gen_reg_rtx (SImode);
14496 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14497 src = copy_addr_to_reg (XEXP (srcbase, 0));
14499 srcoffset = dstoffset = 0;
14501 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14502 For copying the last bytes we want to subtract this offset again. */
14503 src_autoinc = dst_autoinc = 0;
14505 for (i = 0; i < interleave_factor; i++)
14508 /* Copy BLOCK_SIZE_BYTES chunks. */
14510 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14513 if (src_aligned && interleave_factor > 1)
14515 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14516 TRUE, srcbase, &srcoffset));
14517 src_autoinc += UNITS_PER_WORD * interleave_factor;
14521 for (j = 0; j < interleave_factor; j++)
14523 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14525 mem = adjust_automodify_address (srcbase, SImode, addr,
14526 srcoffset + j * UNITS_PER_WORD);
14527 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14529 srcoffset += block_size_bytes;
14533 if (dst_aligned && interleave_factor > 1)
14535 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14536 TRUE, dstbase, &dstoffset));
14537 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14541 for (j = 0; j < interleave_factor; j++)
14543 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14545 mem = adjust_automodify_address (dstbase, SImode, addr,
14546 dstoffset + j * UNITS_PER_WORD);
14547 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14549 dstoffset += block_size_bytes;
14552 remaining -= block_size_bytes;
14555 /* Copy any whole words left (note these aren't interleaved with any
14556 subsequent halfword/byte load/stores in the interests of simplicity). */
14558 words = remaining / UNITS_PER_WORD;
14560 gcc_assert (words < interleave_factor);
14562 if (src_aligned && words > 1)
14564 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14566 src_autoinc += UNITS_PER_WORD * words;
14570 for (j = 0; j < words; j++)
14572 addr = plus_constant (Pmode, src,
14573 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14574 mem = adjust_automodify_address (srcbase, SImode, addr,
14575 srcoffset + j * UNITS_PER_WORD);
14577 emit_move_insn (regs[j], mem);
14579 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14581 srcoffset += words * UNITS_PER_WORD;
14584 if (dst_aligned && words > 1)
14586 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14588 dst_autoinc += words * UNITS_PER_WORD;
14592 for (j = 0; j < words; j++)
14594 addr = plus_constant (Pmode, dst,
14595 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14596 mem = adjust_automodify_address (dstbase, SImode, addr,
14597 dstoffset + j * UNITS_PER_WORD);
14599 emit_move_insn (mem, regs[j]);
14601 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14603 dstoffset += words * UNITS_PER_WORD;
14606 remaining -= words * UNITS_PER_WORD;
14608 gcc_assert (remaining < 4);
14610 /* Copy a halfword if necessary. */
14612 if (remaining >= 2)
14614 halfword_tmp = gen_reg_rtx (SImode);
14616 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14617 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14618 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14620 /* Either write out immediately, or delay until we've loaded the last
14621 byte, depending on interleave factor. */
14622 if (interleave_factor == 1)
14624 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14625 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14626 emit_insn (gen_unaligned_storehi (mem,
14627 gen_lowpart (HImode, halfword_tmp)));
14628 halfword_tmp = NULL;
14636 gcc_assert (remaining < 2);
14638 /* Copy last byte. */
14640 if ((remaining & 1) != 0)
14642 byte_tmp = gen_reg_rtx (SImode);
14644 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14645 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14646 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14648 if (interleave_factor == 1)
14650 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14651 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14652 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14661 /* Store last halfword if we haven't done so already. */
14665 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14666 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14667 emit_insn (gen_unaligned_storehi (mem,
14668 gen_lowpart (HImode, halfword_tmp)));
14672 /* Likewise for last byte. */
14676 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14677 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14678 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14682 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14685 /* From mips_adjust_block_mem:
14687 Helper function for doing a loop-based block operation on memory
14688 reference MEM. Each iteration of the loop will operate on LENGTH
14691 Create a new base register for use within the loop and point it to
14692 the start of MEM. Create a new memory reference that uses this
14693 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14696 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14699 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14701 /* Although the new mem does not refer to a known location,
14702 it does keep up to LENGTH bytes of alignment. */
14703 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14704 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14707 /* From mips_block_move_loop:
14709 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14710 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14711 the memory regions do not overlap. */
14714 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14715 unsigned int interleave_factor,
14716 HOST_WIDE_INT bytes_per_iter)
14718 rtx src_reg, dest_reg, final_src, test;
14719 HOST_WIDE_INT leftover;
14721 leftover = length % bytes_per_iter;
14722 length -= leftover;
14724 /* Create registers and memory references for use within the loop. */
14725 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14726 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14728 /* Calculate the value that SRC_REG should have after the last iteration of
14730 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14731 0, 0, OPTAB_WIDEN);
14733 /* Emit the start of the loop. */
14734 rtx_code_label *label = gen_label_rtx ();
14735 emit_label (label);
14737 /* Emit the loop body. */
14738 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14739 interleave_factor);
14741 /* Move on to the next block. */
14742 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14743 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14745 /* Emit the loop condition. */
14746 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14747 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14749 /* Mop up any left-over bytes. */
14751 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14754 /* Emit a block move when either the source or destination is unaligned (not
14755 aligned to a four-byte boundary). This may need further tuning depending on
14756 core type, optimize_size setting, etc. */
14759 arm_cpymemqi_unaligned (rtx *operands)
14761 HOST_WIDE_INT length = INTVAL (operands[2]);
14765 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14766 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14767 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14768 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14769 or dst_aligned though: allow more interleaving in those cases since the
14770 resulting code can be smaller. */
14771 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14772 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14775 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14776 interleave_factor, bytes_per_iter);
14778 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14779 interleave_factor);
14783 /* Note that the loop created by arm_block_move_unaligned_loop may be
14784 subject to loop unrolling, which makes tuning this condition a little
14787 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14789 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14796 arm_gen_cpymemqi (rtx *operands)
14798 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14799 HOST_WIDE_INT srcoffset, dstoffset;
14800 rtx src, dst, srcbase, dstbase;
14801 rtx part_bytes_reg = NULL;
14804 if (!CONST_INT_P (operands[2])
14805 || !CONST_INT_P (operands[3])
14806 || INTVAL (operands[2]) > 64)
14809 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14810 return arm_cpymemqi_unaligned (operands);
14812 if (INTVAL (operands[3]) & 3)
14815 dstbase = operands[0];
14816 srcbase = operands[1];
14818 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14819 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14821 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14822 out_words_to_go = INTVAL (operands[2]) / 4;
14823 last_bytes = INTVAL (operands[2]) & 3;
14824 dstoffset = srcoffset = 0;
14826 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14827 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14829 while (in_words_to_go >= 2)
14831 if (in_words_to_go > 4)
14832 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14833 TRUE, srcbase, &srcoffset));
14835 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14836 src, FALSE, srcbase,
14839 if (out_words_to_go)
14841 if (out_words_to_go > 4)
14842 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14843 TRUE, dstbase, &dstoffset));
14844 else if (out_words_to_go != 1)
14845 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14846 out_words_to_go, dst,
14849 dstbase, &dstoffset));
14852 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14853 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14854 if (last_bytes != 0)
14856 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14862 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14863 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14866 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14867 if (out_words_to_go)
14871 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14872 sreg = copy_to_reg (mem);
14874 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14875 emit_move_insn (mem, sreg);
14878 gcc_assert (!in_words_to_go); /* Sanity check */
14881 if (in_words_to_go)
14883 gcc_assert (in_words_to_go > 0);
14885 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14886 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14889 gcc_assert (!last_bytes || part_bytes_reg);
14891 if (BYTES_BIG_ENDIAN && last_bytes)
14893 rtx tmp = gen_reg_rtx (SImode);
14895 /* The bytes we want are in the top end of the word. */
14896 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14897 GEN_INT (8 * (4 - last_bytes))));
14898 part_bytes_reg = tmp;
14902 mem = adjust_automodify_address (dstbase, QImode,
14903 plus_constant (Pmode, dst,
14905 dstoffset + last_bytes - 1);
14906 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14910 tmp = gen_reg_rtx (SImode);
14911 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14912 part_bytes_reg = tmp;
14919 if (last_bytes > 1)
14921 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14922 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14926 rtx tmp = gen_reg_rtx (SImode);
14927 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14928 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14929 part_bytes_reg = tmp;
14936 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14937 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14944 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14947 next_consecutive_mem (rtx mem)
14949 machine_mode mode = GET_MODE (mem);
14950 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14951 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14953 return adjust_automodify_address (mem, mode, addr, offset);
14956 /* Copy using LDRD/STRD instructions whenever possible.
14957 Returns true upon success. */
14959 gen_cpymem_ldrd_strd (rtx *operands)
14961 unsigned HOST_WIDE_INT len;
14962 HOST_WIDE_INT align;
14963 rtx src, dst, base;
14965 bool src_aligned, dst_aligned;
14966 bool src_volatile, dst_volatile;
14968 gcc_assert (CONST_INT_P (operands[2]));
14969 gcc_assert (CONST_INT_P (operands[3]));
14971 len = UINTVAL (operands[2]);
14975 /* Maximum alignment we can assume for both src and dst buffers. */
14976 align = INTVAL (operands[3]);
14978 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14981 /* Place src and dst addresses in registers
14982 and update the corresponding mem rtx. */
14984 dst_volatile = MEM_VOLATILE_P (dst);
14985 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14986 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14987 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14990 src_volatile = MEM_VOLATILE_P (src);
14991 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14992 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14993 src = adjust_automodify_address (src, VOIDmode, base, 0);
14995 if (!unaligned_access && !(src_aligned && dst_aligned))
14998 if (src_volatile || dst_volatile)
15001 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15002 if (!(dst_aligned || src_aligned))
15003 return arm_gen_cpymemqi (operands);
15005 /* If the either src or dst is unaligned we'll be accessing it as pairs
15006 of unaligned SImode accesses. Otherwise we can generate DImode
15007 ldrd/strd instructions. */
15008 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15009 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15014 reg0 = gen_reg_rtx (DImode);
15015 rtx low_reg = NULL_RTX;
15016 rtx hi_reg = NULL_RTX;
15018 if (!src_aligned || !dst_aligned)
15020 low_reg = gen_lowpart (SImode, reg0);
15021 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15023 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15024 emit_move_insn (reg0, src);
15025 else if (src_aligned)
15026 emit_insn (gen_unaligned_loaddi (reg0, src));
15029 emit_insn (gen_unaligned_loadsi (low_reg, src));
15030 src = next_consecutive_mem (src);
15031 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15034 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15035 emit_move_insn (dst, reg0);
15036 else if (dst_aligned)
15037 emit_insn (gen_unaligned_storedi (dst, reg0));
15040 emit_insn (gen_unaligned_storesi (dst, low_reg));
15041 dst = next_consecutive_mem (dst);
15042 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15045 src = next_consecutive_mem (src);
15046 dst = next_consecutive_mem (dst);
15049 gcc_assert (len < 8);
15052 /* More than a word but less than a double-word to copy. Copy a word. */
15053 reg0 = gen_reg_rtx (SImode);
15054 src = adjust_address (src, SImode, 0);
15055 dst = adjust_address (dst, SImode, 0);
15057 emit_move_insn (reg0, src);
15059 emit_insn (gen_unaligned_loadsi (reg0, src));
15062 emit_move_insn (dst, reg0);
15064 emit_insn (gen_unaligned_storesi (dst, reg0));
15066 src = next_consecutive_mem (src);
15067 dst = next_consecutive_mem (dst);
15074 /* Copy the remaining bytes. */
15077 dst = adjust_address (dst, HImode, 0);
15078 src = adjust_address (src, HImode, 0);
15079 reg0 = gen_reg_rtx (SImode);
15081 emit_insn (gen_zero_extendhisi2 (reg0, src));
15083 emit_insn (gen_unaligned_loadhiu (reg0, src));
15086 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15088 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15090 src = next_consecutive_mem (src);
15091 dst = next_consecutive_mem (dst);
15096 dst = adjust_address (dst, QImode, 0);
15097 src = adjust_address (src, QImode, 0);
15098 reg0 = gen_reg_rtx (QImode);
15099 emit_move_insn (reg0, src);
15100 emit_move_insn (dst, reg0);
15104 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15105 into its component 32-bit subregs. OP2 may be an immediate
15106 constant and we want to simplify it in that case. */
15108 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15109 rtx *lo_op2, rtx *hi_op2)
15111 *lo_op1 = gen_lowpart (SImode, op1);
15112 *hi_op1 = gen_highpart (SImode, op1);
15113 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15114 subreg_lowpart_offset (SImode, DImode));
15115 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15116 subreg_highpart_offset (SImode, DImode));
15119 /* Select a dominance comparison mode if possible for a test of the general
15120 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15121 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15122 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15123 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15124 In all cases OP will be either EQ or NE, but we don't need to know which
15125 here. If we are unable to support a dominance comparison we return
15126 CC mode. This will then fail to match for the RTL expressions that
15127 generate this call. */
15129 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15131 enum rtx_code cond1, cond2;
15134 /* Currently we will probably get the wrong result if the individual
15135 comparisons are not simple. This also ensures that it is safe to
15136 reverse a comparison if necessary. */
15137 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15139 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15143 /* The if_then_else variant of this tests the second condition if the
15144 first passes, but is true if the first fails. Reverse the first
15145 condition to get a true "inclusive-or" expression. */
15146 if (cond_or == DOM_CC_NX_OR_Y)
15147 cond1 = reverse_condition (cond1);
15149 /* If the comparisons are not equal, and one doesn't dominate the other,
15150 then we can't do this. */
15152 && !comparison_dominates_p (cond1, cond2)
15153 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15157 std::swap (cond1, cond2);
15162 if (cond_or == DOM_CC_X_AND_Y)
15167 case EQ: return CC_DEQmode;
15168 case LE: return CC_DLEmode;
15169 case LEU: return CC_DLEUmode;
15170 case GE: return CC_DGEmode;
15171 case GEU: return CC_DGEUmode;
15172 default: gcc_unreachable ();
15176 if (cond_or == DOM_CC_X_AND_Y)
15188 gcc_unreachable ();
15192 if (cond_or == DOM_CC_X_AND_Y)
15204 gcc_unreachable ();
15208 if (cond_or == DOM_CC_X_AND_Y)
15209 return CC_DLTUmode;
15214 return CC_DLTUmode;
15216 return CC_DLEUmode;
15220 gcc_unreachable ();
15224 if (cond_or == DOM_CC_X_AND_Y)
15225 return CC_DGTUmode;
15230 return CC_DGTUmode;
15232 return CC_DGEUmode;
15236 gcc_unreachable ();
15239 /* The remaining cases only occur when both comparisons are the
15242 gcc_assert (cond1 == cond2);
15246 gcc_assert (cond1 == cond2);
15250 gcc_assert (cond1 == cond2);
15254 gcc_assert (cond1 == cond2);
15255 return CC_DLEUmode;
15258 gcc_assert (cond1 == cond2);
15259 return CC_DGEUmode;
15262 gcc_unreachable ();
15267 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15269 /* All floating point compares return CCFP if it is an equality
15270 comparison, and CCFPE otherwise. */
15271 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15294 gcc_unreachable ();
15298 /* A compare with a shifted operand. Because of canonicalization, the
15299 comparison will have to be swapped when we emit the assembler. */
15300 if (GET_MODE (y) == SImode
15301 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15302 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15303 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15304 || GET_CODE (x) == ROTATERT))
15307 /* A widened compare of the sum of a value plus a carry against a
15308 constant. This is a representation of RSC. We want to swap the
15309 result of the comparison at output. Not valid if the Z bit is
15311 if (GET_MODE (x) == DImode
15312 && GET_CODE (x) == PLUS
15313 && arm_borrow_operation (XEXP (x, 1), DImode)
15315 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15316 && (op == LE || op == GT))
15317 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15318 && (op == LEU || op == GTU))))
15321 /* If X is a constant we want to use CC_RSBmode. This is
15322 non-canonical, but arm_gen_compare_reg uses this to generate the
15323 correct canonical form. */
15324 if (GET_MODE (y) == SImode
15325 && (REG_P (y) || GET_CODE (y) == SUBREG)
15326 && CONST_INT_P (x))
15329 /* This operation is performed swapped, but since we only rely on the Z
15330 flag we don't need an additional mode. */
15331 if (GET_MODE (y) == SImode
15332 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15333 && GET_CODE (x) == NEG
15334 && (op == EQ || op == NE))
15337 /* This is a special case that is used by combine to allow a
15338 comparison of a shifted byte load to be split into a zero-extend
15339 followed by a comparison of the shifted integer (only valid for
15340 equalities and unsigned inequalities). */
15341 if (GET_MODE (x) == SImode
15342 && GET_CODE (x) == ASHIFT
15343 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15344 && GET_CODE (XEXP (x, 0)) == SUBREG
15345 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15346 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15347 && (op == EQ || op == NE
15348 || op == GEU || op == GTU || op == LTU || op == LEU)
15349 && CONST_INT_P (y))
15352 /* A construct for a conditional compare, if the false arm contains
15353 0, then both conditions must be true, otherwise either condition
15354 must be true. Not all conditions are possible, so CCmode is
15355 returned if it can't be done. */
15356 if (GET_CODE (x) == IF_THEN_ELSE
15357 && (XEXP (x, 2) == const0_rtx
15358 || XEXP (x, 2) == const1_rtx)
15359 && COMPARISON_P (XEXP (x, 0))
15360 && COMPARISON_P (XEXP (x, 1)))
15361 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15362 INTVAL (XEXP (x, 2)));
15364 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15365 if (GET_CODE (x) == AND
15366 && (op == EQ || op == NE)
15367 && COMPARISON_P (XEXP (x, 0))
15368 && COMPARISON_P (XEXP (x, 1)))
15369 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15372 if (GET_CODE (x) == IOR
15373 && (op == EQ || op == NE)
15374 && COMPARISON_P (XEXP (x, 0))
15375 && COMPARISON_P (XEXP (x, 1)))
15376 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15379 /* An operation (on Thumb) where we want to test for a single bit.
15380 This is done by shifting that bit up into the top bit of a
15381 scratch register; we can then branch on the sign bit. */
15383 && GET_MODE (x) == SImode
15384 && (op == EQ || op == NE)
15385 && GET_CODE (x) == ZERO_EXTRACT
15386 && XEXP (x, 1) == const1_rtx)
15389 /* An operation that sets the condition codes as a side-effect, the
15390 V flag is not set correctly, so we can only use comparisons where
15391 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15393 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15394 if (GET_MODE (x) == SImode
15396 && (op == EQ || op == NE || op == LT || op == GE)
15397 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15398 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15399 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15400 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15401 || GET_CODE (x) == LSHIFTRT
15402 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15403 || GET_CODE (x) == ROTATERT
15404 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15407 /* A comparison of ~reg with a const is really a special
15408 canoncialization of compare (~const, reg), which is a reverse
15409 subtract operation. We may not get here if CONST is 0, but that
15410 doesn't matter because ~0 isn't a valid immediate for RSB. */
15411 if (GET_MODE (x) == SImode
15412 && GET_CODE (x) == NOT
15413 && CONST_INT_P (y))
15416 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15419 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15420 && GET_CODE (x) == PLUS
15421 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15424 if (GET_MODE (x) == DImode
15425 && GET_CODE (x) == PLUS
15426 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15428 && UINTVAL (y) == 0x800000000
15429 && (op == GEU || op == LTU))
15432 if (GET_MODE (x) == DImode
15433 && (op == GE || op == LT)
15434 && GET_CODE (x) == SIGN_EXTEND
15435 && ((GET_CODE (y) == PLUS
15436 && arm_borrow_operation (XEXP (y, 0), DImode))
15437 || arm_borrow_operation (y, DImode)))
15440 if (GET_MODE (x) == DImode
15441 && (op == GEU || op == LTU)
15442 && GET_CODE (x) == ZERO_EXTEND
15443 && ((GET_CODE (y) == PLUS
15444 && arm_borrow_operation (XEXP (y, 0), DImode))
15445 || arm_borrow_operation (y, DImode)))
15448 if (GET_MODE (x) == DImode
15449 && (op == EQ || op == NE)
15450 && (GET_CODE (x) == PLUS
15451 || GET_CODE (x) == MINUS)
15452 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15453 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15454 && GET_CODE (y) == SIGN_EXTEND
15455 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15458 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15459 return GET_MODE (x);
15464 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15465 the sequence of instructions needed to generate a suitable condition
15466 code register. Return the CC register result. */
15468 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15473 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15474 gcc_assert (TARGET_32BIT);
15475 gcc_assert (!CONST_INT_P (x));
15477 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15478 subreg_lowpart_offset (SImode, DImode));
15479 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15480 subreg_highpart_offset (SImode, DImode));
15481 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15482 subreg_lowpart_offset (SImode, DImode));
15483 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15484 subreg_highpart_offset (SImode, DImode));
15490 if (y_lo == const0_rtx || y_hi == const0_rtx)
15492 if (y_lo != const0_rtx)
15494 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15496 gcc_assert (y_hi == const0_rtx);
15497 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
15498 if (!arm_add_operand (y_lo, SImode))
15499 y_lo = force_reg (SImode, y_lo);
15500 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
15503 else if (y_hi != const0_rtx)
15505 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15507 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
15508 if (!arm_add_operand (y_hi, SImode))
15509 y_hi = force_reg (SImode, y_hi);
15510 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
15516 gcc_assert (!reload_completed);
15517 scratch = gen_rtx_SCRATCH (SImode);
15520 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15521 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
15524 = gen_rtx_SET (cc_reg,
15525 gen_rtx_COMPARE (CC_NZmode,
15526 gen_rtx_IOR (SImode, x_lo, x_hi),
15528 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
15533 if (!arm_add_operand (y_lo, SImode))
15534 y_lo = force_reg (SImode, y_lo);
15536 if (!arm_add_operand (y_hi, SImode))
15537 y_hi = force_reg (SImode, y_hi);
15539 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
15540 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
15541 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
15542 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
15543 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15545 emit_insn (gen_rtx_SET (cc_reg,
15546 gen_rtx_COMPARE (VOIDmode, conjunction,
15554 if (y_lo == const0_rtx)
15556 /* If the low word of y is 0, then this is simply a normal
15557 compare of the upper words. */
15558 if (!arm_add_operand (y_hi, SImode))
15559 y_hi = force_reg (SImode, y_hi);
15561 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
15564 if (!arm_add_operand (y_lo, SImode))
15565 y_lo = force_reg (SImode, y_lo);
15568 = gen_rtx_LTU (DImode,
15569 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
15573 scratch = gen_rtx_SCRATCH (SImode);
15575 if (!arm_not_operand (y_hi, SImode))
15576 y_hi = force_reg (SImode, y_hi);
15579 if (y_hi == const0_rtx)
15580 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
15582 else if (CONST_INT_P (y_hi))
15583 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
15586 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
15588 return SET_DEST (single_set (insn));
15594 /* During expansion, we only expect to get here if y is a
15595 constant that we want to handle, otherwise we should have
15596 swapped the operands already. */
15597 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
15599 if (!const_ok_for_arm (INTVAL (y_lo)))
15600 y_lo = force_reg (SImode, y_lo);
15602 /* Perform a reverse subtract and compare. */
15604 = gen_rtx_LTU (DImode,
15605 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
15607 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
15609 return SET_DEST (single_set (insn));
15615 if (y_lo == const0_rtx)
15617 /* If the low word of y is 0, then this is simply a normal
15618 compare of the upper words. */
15619 if (!arm_add_operand (y_hi, SImode))
15620 y_hi = force_reg (SImode, y_hi);
15622 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
15625 if (!arm_add_operand (y_lo, SImode))
15626 y_lo = force_reg (SImode, y_lo);
15629 = gen_rtx_LTU (DImode,
15630 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
15634 scratch = gen_rtx_SCRATCH (SImode);
15635 if (!arm_not_operand (y_hi, SImode))
15636 y_hi = force_reg (SImode, y_hi);
15639 if (y_hi == const0_rtx)
15640 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
15642 else if (CONST_INT_P (y_hi))
15644 /* Constant is viewed as unsigned when zero-extended. */
15645 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
15646 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
15650 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
15652 return SET_DEST (single_set (insn));
15658 /* During expansion, we only expect to get here if y is a
15659 constant that we want to handle, otherwise we should have
15660 swapped the operands already. */
15661 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
15663 if (!const_ok_for_arm (INTVAL (y_lo)))
15664 y_lo = force_reg (SImode, y_lo);
15666 /* Perform a reverse subtract and compare. */
15668 = gen_rtx_LTU (DImode,
15669 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
15671 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
15672 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
15674 return SET_DEST (single_set (insn));
15678 gcc_unreachable ();
15682 /* X and Y are two things to compare using CODE. Emit the compare insn and
15683 return the rtx for register 0 in the proper mode. */
15685 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15687 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15688 return arm_gen_dicompare_reg (code, x, y, scratch);
15690 machine_mode mode = SELECT_CC_MODE (code, x, y);
15691 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15692 if (mode == CC_RSBmode)
15695 scratch = gen_rtx_SCRATCH (SImode);
15696 emit_insn (gen_rsb_imm_compare_scratch (scratch,
15697 GEN_INT (~UINTVAL (x)), y));
15700 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15705 /* Generate a sequence of insns that will generate the correct return
15706 address mask depending on the physical architecture that the program
15709 arm_gen_return_addr_mask (void)
15711 rtx reg = gen_reg_rtx (Pmode);
15713 emit_insn (gen_return_addr_mask (reg));
15718 arm_reload_in_hi (rtx *operands)
15720 rtx ref = operands[1];
15722 HOST_WIDE_INT offset = 0;
15724 if (GET_CODE (ref) == SUBREG)
15726 offset = SUBREG_BYTE (ref);
15727 ref = SUBREG_REG (ref);
15732 /* We have a pseudo which has been spilt onto the stack; there
15733 are two cases here: the first where there is a simple
15734 stack-slot replacement and a second where the stack-slot is
15735 out of range, or is used as a subreg. */
15736 if (reg_equiv_mem (REGNO (ref)))
15738 ref = reg_equiv_mem (REGNO (ref));
15739 base = find_replacement (&XEXP (ref, 0));
15742 /* The slot is out of range, or was dressed up in a SUBREG. */
15743 base = reg_equiv_address (REGNO (ref));
15745 /* PR 62554: If there is no equivalent memory location then just move
15746 the value as an SImode register move. This happens when the target
15747 architecture variant does not have an HImode register move. */
15750 gcc_assert (REG_P (operands[0]));
15751 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15752 gen_rtx_SUBREG (SImode, ref, 0)));
15757 base = find_replacement (&XEXP (ref, 0));
15759 /* Handle the case where the address is too complex to be offset by 1. */
15760 if (GET_CODE (base) == MINUS
15761 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15763 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15765 emit_set_insn (base_plus, base);
15768 else if (GET_CODE (base) == PLUS)
15770 /* The addend must be CONST_INT, or we would have dealt with it above. */
15771 HOST_WIDE_INT hi, lo;
15773 offset += INTVAL (XEXP (base, 1));
15774 base = XEXP (base, 0);
15776 /* Rework the address into a legal sequence of insns. */
15777 /* Valid range for lo is -4095 -> 4095 */
15780 : -((-offset) & 0xfff));
15782 /* Corner case, if lo is the max offset then we would be out of range
15783 once we have added the additional 1 below, so bump the msb into the
15784 pre-loading insn(s). */
15788 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15789 ^ (HOST_WIDE_INT) 0x80000000)
15790 - (HOST_WIDE_INT) 0x80000000);
15792 gcc_assert (hi + lo == offset);
15796 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15798 /* Get the base address; addsi3 knows how to handle constants
15799 that require more than one insn. */
15800 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15806 /* Operands[2] may overlap operands[0] (though it won't overlap
15807 operands[1]), that's why we asked for a DImode reg -- so we can
15808 use the bit that does not overlap. */
15809 if (REGNO (operands[2]) == REGNO (operands[0]))
15810 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15812 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15814 emit_insn (gen_zero_extendqisi2 (scratch,
15815 gen_rtx_MEM (QImode,
15816 plus_constant (Pmode, base,
15818 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15819 gen_rtx_MEM (QImode,
15820 plus_constant (Pmode, base,
15822 if (!BYTES_BIG_ENDIAN)
15823 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15824 gen_rtx_IOR (SImode,
15827 gen_rtx_SUBREG (SImode, operands[0], 0),
15831 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15832 gen_rtx_IOR (SImode,
15833 gen_rtx_ASHIFT (SImode, scratch,
15835 gen_rtx_SUBREG (SImode, operands[0], 0)));
15838 /* Handle storing a half-word to memory during reload by synthesizing as two
15839 byte stores. Take care not to clobber the input values until after we
15840 have moved them somewhere safe. This code assumes that if the DImode
15841 scratch in operands[2] overlaps either the input value or output address
15842 in some way, then that value must die in this insn (we absolutely need
15843 two scratch registers for some corner cases). */
15845 arm_reload_out_hi (rtx *operands)
15847 rtx ref = operands[0];
15848 rtx outval = operands[1];
15850 HOST_WIDE_INT offset = 0;
15852 if (GET_CODE (ref) == SUBREG)
15854 offset = SUBREG_BYTE (ref);
15855 ref = SUBREG_REG (ref);
15860 /* We have a pseudo which has been spilt onto the stack; there
15861 are two cases here: the first where there is a simple
15862 stack-slot replacement and a second where the stack-slot is
15863 out of range, or is used as a subreg. */
15864 if (reg_equiv_mem (REGNO (ref)))
15866 ref = reg_equiv_mem (REGNO (ref));
15867 base = find_replacement (&XEXP (ref, 0));
15870 /* The slot is out of range, or was dressed up in a SUBREG. */
15871 base = reg_equiv_address (REGNO (ref));
15873 /* PR 62254: If there is no equivalent memory location then just move
15874 the value as an SImode register move. This happens when the target
15875 architecture variant does not have an HImode register move. */
15878 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15880 if (REG_P (outval))
15882 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15883 gen_rtx_SUBREG (SImode, outval, 0)));
15885 else /* SUBREG_P (outval) */
15887 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15888 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15889 SUBREG_REG (outval)));
15891 /* FIXME: Handle other cases ? */
15892 gcc_unreachable ();
15898 base = find_replacement (&XEXP (ref, 0));
15900 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15902 /* Handle the case where the address is too complex to be offset by 1. */
15903 if (GET_CODE (base) == MINUS
15904 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15906 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15908 /* Be careful not to destroy OUTVAL. */
15909 if (reg_overlap_mentioned_p (base_plus, outval))
15911 /* Updating base_plus might destroy outval, see if we can
15912 swap the scratch and base_plus. */
15913 if (!reg_overlap_mentioned_p (scratch, outval))
15914 std::swap (scratch, base_plus);
15917 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15919 /* Be conservative and copy OUTVAL into the scratch now,
15920 this should only be necessary if outval is a subreg
15921 of something larger than a word. */
15922 /* XXX Might this clobber base? I can't see how it can,
15923 since scratch is known to overlap with OUTVAL, and
15924 must be wider than a word. */
15925 emit_insn (gen_movhi (scratch_hi, outval));
15926 outval = scratch_hi;
15930 emit_set_insn (base_plus, base);
15933 else if (GET_CODE (base) == PLUS)
15935 /* The addend must be CONST_INT, or we would have dealt with it above. */
15936 HOST_WIDE_INT hi, lo;
15938 offset += INTVAL (XEXP (base, 1));
15939 base = XEXP (base, 0);
15941 /* Rework the address into a legal sequence of insns. */
15942 /* Valid range for lo is -4095 -> 4095 */
15945 : -((-offset) & 0xfff));
15947 /* Corner case, if lo is the max offset then we would be out of range
15948 once we have added the additional 1 below, so bump the msb into the
15949 pre-loading insn(s). */
15953 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15954 ^ (HOST_WIDE_INT) 0x80000000)
15955 - (HOST_WIDE_INT) 0x80000000);
15957 gcc_assert (hi + lo == offset);
15961 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15963 /* Be careful not to destroy OUTVAL. */
15964 if (reg_overlap_mentioned_p (base_plus, outval))
15966 /* Updating base_plus might destroy outval, see if we
15967 can swap the scratch and base_plus. */
15968 if (!reg_overlap_mentioned_p (scratch, outval))
15969 std::swap (scratch, base_plus);
15972 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15974 /* Be conservative and copy outval into scratch now,
15975 this should only be necessary if outval is a
15976 subreg of something larger than a word. */
15977 /* XXX Might this clobber base? I can't see how it
15978 can, since scratch is known to overlap with
15980 emit_insn (gen_movhi (scratch_hi, outval));
15981 outval = scratch_hi;
15985 /* Get the base address; addsi3 knows how to handle constants
15986 that require more than one insn. */
15987 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15993 if (BYTES_BIG_ENDIAN)
15995 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15996 plus_constant (Pmode, base,
15998 gen_lowpart (QImode, outval)));
15999 emit_insn (gen_lshrsi3 (scratch,
16000 gen_rtx_SUBREG (SImode, outval, 0),
16002 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16004 gen_lowpart (QImode, scratch)));
16008 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16010 gen_lowpart (QImode, outval)));
16011 emit_insn (gen_lshrsi3 (scratch,
16012 gen_rtx_SUBREG (SImode, outval, 0),
16014 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16015 plus_constant (Pmode, base,
16017 gen_lowpart (QImode, scratch)));
16021 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16022 (padded to the size of a word) should be passed in a register. */
16025 arm_must_pass_in_stack (const function_arg_info &arg)
16027 if (TARGET_AAPCS_BASED)
16028 return must_pass_in_stack_var_size (arg);
16030 return must_pass_in_stack_var_size_or_pad (arg);
16034 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16035 byte of a stack argument has useful data. For legacy APCS ABIs we use
16036 the default. For AAPCS based ABIs small aggregate types are placed
16037 in the lowest memory address. */
16039 static pad_direction
16040 arm_function_arg_padding (machine_mode mode, const_tree type)
16042 if (!TARGET_AAPCS_BASED)
16043 return default_function_arg_padding (mode, type);
16045 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16046 return PAD_DOWNWARD;
16052 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16053 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16054 register has useful data, and return the opposite if the most
16055 significant byte does. */
16058 arm_pad_reg_upward (machine_mode mode,
16059 tree type, int first ATTRIBUTE_UNUSED)
16061 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16063 /* For AAPCS, small aggregates, small fixed-point types,
16064 and small complex types are always padded upwards. */
16067 if ((AGGREGATE_TYPE_P (type)
16068 || TREE_CODE (type) == COMPLEX_TYPE
16069 || FIXED_POINT_TYPE_P (type))
16070 && int_size_in_bytes (type) <= 4)
16075 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16076 && GET_MODE_SIZE (mode) <= 4)
16081 /* Otherwise, use default padding. */
16082 return !BYTES_BIG_ENDIAN;
16085 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16086 assuming that the address in the base register is word aligned. */
16088 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16090 HOST_WIDE_INT max_offset;
16092 /* Offset must be a multiple of 4 in Thumb mode. */
16093 if (TARGET_THUMB2 && ((offset & 3) != 0))
16098 else if (TARGET_ARM)
16103 return ((offset <= max_offset) && (offset >= -max_offset));
16106 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16107 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16108 Assumes that the address in the base register RN is word aligned. Pattern
16109 guarantees that both memory accesses use the same base register,
16110 the offsets are constants within the range, and the gap between the offsets is 4.
16111 If preload complete then check that registers are legal. WBACK indicates whether
16112 address is updated. LOAD indicates whether memory access is load or store. */
16114 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16115 bool wback, bool load)
16117 unsigned int t, t2, n;
16119 if (!reload_completed)
16122 if (!offset_ok_for_ldrd_strd (offset))
16129 if ((TARGET_THUMB2)
16130 && ((wback && (n == t || n == t2))
16131 || (t == SP_REGNUM)
16132 || (t == PC_REGNUM)
16133 || (t2 == SP_REGNUM)
16134 || (t2 == PC_REGNUM)
16135 || (!load && (n == PC_REGNUM))
16136 || (load && (t == t2))
16137 /* Triggers Cortex-M3 LDRD errata. */
16138 || (!wback && load && fix_cm3_ldrd && (n == t))))
16142 && ((wback && (n == t || n == t2))
16143 || (t2 == PC_REGNUM)
16144 || (t % 2 != 0) /* First destination register is not even. */
16146 /* PC can be used as base register (for offset addressing only),
16147 but it is depricated. */
16148 || (n == PC_REGNUM)))
16154 /* Return true if a 64-bit access with alignment ALIGN and with a
16155 constant offset OFFSET from the base pointer is permitted on this
16158 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16160 return (unaligned_access
16161 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16162 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16165 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16166 operand MEM's address contains an immediate offset from the base
16167 register and has no side effects, in which case it sets BASE,
16168 OFFSET and ALIGN accordingly. */
16170 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16174 gcc_assert (base != NULL && offset != NULL);
16176 /* TODO: Handle more general memory operand patterns, such as
16177 PRE_DEC and PRE_INC. */
16179 if (side_effects_p (mem))
16182 /* Can't deal with subregs. */
16183 if (GET_CODE (mem) == SUBREG)
16186 gcc_assert (MEM_P (mem));
16188 *offset = const0_rtx;
16189 *align = MEM_ALIGN (mem);
16191 addr = XEXP (mem, 0);
16193 /* If addr isn't valid for DImode, then we can't handle it. */
16194 if (!arm_legitimate_address_p (DImode, addr,
16195 reload_in_progress || reload_completed))
16203 else if (GET_CODE (addr) == PLUS)
16205 *base = XEXP (addr, 0);
16206 *offset = XEXP (addr, 1);
16207 return (REG_P (*base) && CONST_INT_P (*offset));
16213 /* Called from a peephole2 to replace two word-size accesses with a
16214 single LDRD/STRD instruction. Returns true iff we can generate a
16215 new instruction sequence. That is, both accesses use the same base
16216 register and the gap between constant offsets is 4. This function
16217 may reorder its operands to match ldrd/strd RTL templates.
16218 OPERANDS are the operands found by the peephole matcher;
16219 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16220 corresponding memory operands. LOAD indicaates whether the access
16221 is load or store. CONST_STORE indicates a store of constant
16222 integer values held in OPERANDS[4,5] and assumes that the pattern
16223 is of length 4 insn, for the purpose of checking dead registers.
16224 COMMUTE indicates that register operands may be reordered. */
16226 gen_operands_ldrd_strd (rtx *operands, bool load,
16227 bool const_store, bool commute)
16230 HOST_WIDE_INT offsets[2], offset, align[2];
16231 rtx base = NULL_RTX;
16232 rtx cur_base, cur_offset, tmp;
16234 HARD_REG_SET regset;
16236 gcc_assert (!const_store || !load);
16237 /* Check that the memory references are immediate offsets from the
16238 same base register. Extract the base register, the destination
16239 registers, and the corresponding memory offsets. */
16240 for (i = 0; i < nops; i++)
16242 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16248 else if (REGNO (base) != REGNO (cur_base))
16251 offsets[i] = INTVAL (cur_offset);
16252 if (GET_CODE (operands[i]) == SUBREG)
16254 tmp = SUBREG_REG (operands[i]);
16255 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16260 /* Make sure there is no dependency between the individual loads. */
16261 if (load && REGNO (operands[0]) == REGNO (base))
16262 return false; /* RAW */
16264 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16265 return false; /* WAW */
16267 /* If the same input register is used in both stores
16268 when storing different constants, try to find a free register.
16269 For example, the code
16274 can be transformed into
16278 in Thumb mode assuming that r1 is free.
16279 For ARM mode do the same but only if the starting register
16280 can be made to be even. */
16282 && REGNO (operands[0]) == REGNO (operands[1])
16283 && INTVAL (operands[4]) != INTVAL (operands[5]))
16287 CLEAR_HARD_REG_SET (regset);
16288 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
16289 if (tmp == NULL_RTX)
16292 /* Use the new register in the first load to ensure that
16293 if the original input register is not dead after peephole,
16294 then it will have the correct constant value. */
16297 else if (TARGET_ARM)
16299 int regno = REGNO (operands[0]);
16300 if (!peep2_reg_dead_p (4, operands[0]))
16302 /* When the input register is even and is not dead after the
16303 pattern, it has to hold the second constant but we cannot
16304 form a legal STRD in ARM mode with this register as the second
16306 if (regno % 2 == 0)
16309 /* Is regno-1 free? */
16310 SET_HARD_REG_SET (regset);
16311 CLEAR_HARD_REG_BIT(regset, regno - 1);
16312 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
16313 if (tmp == NULL_RTX)
16320 /* Find a DImode register. */
16321 CLEAR_HARD_REG_SET (regset);
16322 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
16323 if (tmp != NULL_RTX)
16325 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16326 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16330 /* Can we use the input register to form a DI register? */
16331 SET_HARD_REG_SET (regset);
16332 CLEAR_HARD_REG_BIT(regset,
16333 regno % 2 == 0 ? regno + 1 : regno - 1);
16334 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
16335 if (tmp == NULL_RTX)
16337 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16341 gcc_assert (operands[0] != NULL_RTX);
16342 gcc_assert (operands[1] != NULL_RTX);
16343 gcc_assert (REGNO (operands[0]) % 2 == 0);
16344 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16348 /* Make sure the instructions are ordered with lower memory access first. */
16349 if (offsets[0] > offsets[1])
16351 gap = offsets[0] - offsets[1];
16352 offset = offsets[1];
16354 /* Swap the instructions such that lower memory is accessed first. */
16355 std::swap (operands[0], operands[1]);
16356 std::swap (operands[2], operands[3]);
16357 std::swap (align[0], align[1]);
16359 std::swap (operands[4], operands[5]);
16363 gap = offsets[1] - offsets[0];
16364 offset = offsets[0];
16367 /* Make sure accesses are to consecutive memory locations. */
16368 if (gap != GET_MODE_SIZE (SImode))
16371 if (!align_ok_ldrd_strd (align[0], offset))
16374 /* Make sure we generate legal instructions. */
16375 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16379 /* In Thumb state, where registers are almost unconstrained, there
16380 is little hope to fix it. */
16384 if (load && commute)
16386 /* Try reordering registers. */
16387 std::swap (operands[0], operands[1]);
16388 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16395 /* If input registers are dead after this pattern, they can be
16396 reordered or replaced by other registers that are free in the
16397 current pattern. */
16398 if (!peep2_reg_dead_p (4, operands[0])
16399 || !peep2_reg_dead_p (4, operands[1]))
16402 /* Try to reorder the input registers. */
16403 /* For example, the code
16408 can be transformed into
16413 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16416 std::swap (operands[0], operands[1]);
16420 /* Try to find a free DI register. */
16421 CLEAR_HARD_REG_SET (regset);
16422 add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
16423 add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
16426 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
16427 if (tmp == NULL_RTX)
16430 /* DREG must be an even-numbered register in DImode.
16431 Split it into SI registers. */
16432 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16433 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16434 gcc_assert (operands[0] != NULL_RTX);
16435 gcc_assert (operands[1] != NULL_RTX);
16436 gcc_assert (REGNO (operands[0]) % 2 == 0);
16437 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16439 return (operands_ok_ldrd_strd (operands[0], operands[1],
16449 /* Return true if parallel execution of the two word-size accesses provided
16450 could be satisfied with a single LDRD/STRD instruction. Two word-size
16451 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16452 register operands and OPERANDS[2,3] are the corresponding memory operands.
16455 valid_operands_ldrd_strd (rtx *operands, bool load)
16458 HOST_WIDE_INT offsets[2], offset, align[2];
16459 rtx base = NULL_RTX;
16460 rtx cur_base, cur_offset;
16463 /* Check that the memory references are immediate offsets from the
16464 same base register. Extract the base register, the destination
16465 registers, and the corresponding memory offsets. */
16466 for (i = 0; i < nops; i++)
16468 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16474 else if (REGNO (base) != REGNO (cur_base))
16477 offsets[i] = INTVAL (cur_offset);
16478 if (GET_CODE (operands[i]) == SUBREG)
16482 if (offsets[0] > offsets[1])
16485 gap = offsets[1] - offsets[0];
16486 offset = offsets[0];
16488 /* Make sure accesses are to consecutive memory locations. */
16489 if (gap != GET_MODE_SIZE (SImode))
16492 if (!align_ok_ldrd_strd (align[0], offset))
16495 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16500 /* Print a symbolic form of X to the debug file, F. */
16502 arm_print_value (FILE *f, rtx x)
16504 switch (GET_CODE (x))
16507 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16513 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16514 sizeof (fpstr), 0, 1);
16524 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16526 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16527 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16535 fprintf (f, "\"%s\"", XSTR (x, 0));
16539 fprintf (f, "`%s'", XSTR (x, 0));
16543 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16547 arm_print_value (f, XEXP (x, 0));
16551 arm_print_value (f, XEXP (x, 0));
16553 arm_print_value (f, XEXP (x, 1));
16561 fprintf (f, "????");
16566 /* Routines for manipulation of the constant pool. */
16568 /* Arm instructions cannot load a large constant directly into a
16569 register; they have to come from a pc relative load. The constant
16570 must therefore be placed in the addressable range of the pc
16571 relative load. Depending on the precise pc relative load
16572 instruction the range is somewhere between 256 bytes and 4k. This
16573 means that we often have to dump a constant inside a function, and
16574 generate code to branch around it.
16576 It is important to minimize this, since the branches will slow
16577 things down and make the code larger.
16579 Normally we can hide the table after an existing unconditional
16580 branch so that there is no interruption of the flow, but in the
16581 worst case the code looks like this:
16599 We fix this by performing a scan after scheduling, which notices
16600 which instructions need to have their operands fetched from the
16601 constant table and builds the table.
16603 The algorithm starts by building a table of all the constants that
16604 need fixing up and all the natural barriers in the function (places
16605 where a constant table can be dropped without breaking the flow).
16606 For each fixup we note how far the pc-relative replacement will be
16607 able to reach and the offset of the instruction into the function.
16609 Having built the table we then group the fixes together to form
16610 tables that are as large as possible (subject to addressing
16611 constraints) and emit each table of constants after the last
16612 barrier that is within range of all the instructions in the group.
16613 If a group does not contain a barrier, then we forcibly create one
16614 by inserting a jump instruction into the flow. Once the table has
16615 been inserted, the insns are then modified to reference the
16616 relevant entry in the pool.
16618 Possible enhancements to the algorithm (not implemented) are:
16620 1) For some processors and object formats, there may be benefit in
16621 aligning the pools to the start of cache lines; this alignment
16622 would need to be taken into account when calculating addressability
16625 /* These typedefs are located at the start of this file, so that
16626 they can be used in the prototypes there. This comment is to
16627 remind readers of that fact so that the following structures
16628 can be understood more easily.
16630 typedef struct minipool_node Mnode;
16631 typedef struct minipool_fixup Mfix; */
16633 struct minipool_node
16635 /* Doubly linked chain of entries. */
16638 /* The maximum offset into the code that this entry can be placed. While
16639 pushing fixes for forward references, all entries are sorted in order
16640 of increasing max_address. */
16641 HOST_WIDE_INT max_address;
16642 /* Similarly for an entry inserted for a backwards ref. */
16643 HOST_WIDE_INT min_address;
16644 /* The number of fixes referencing this entry. This can become zero
16645 if we "unpush" an entry. In this case we ignore the entry when we
16646 come to emit the code. */
16648 /* The offset from the start of the minipool. */
16649 HOST_WIDE_INT offset;
16650 /* The value in table. */
16652 /* The mode of value. */
16654 /* The size of the value. With iWMMXt enabled
16655 sizes > 4 also imply an alignment of 8-bytes. */
16659 struct minipool_fixup
16663 HOST_WIDE_INT address;
16669 HOST_WIDE_INT forwards;
16670 HOST_WIDE_INT backwards;
16673 /* Fixes less than a word need padding out to a word boundary. */
16674 #define MINIPOOL_FIX_SIZE(mode) \
16675 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16677 static Mnode * minipool_vector_head;
16678 static Mnode * minipool_vector_tail;
16679 static rtx_code_label *minipool_vector_label;
16680 static int minipool_pad;
16682 /* The linked list of all minipool fixes required for this function. */
16683 Mfix * minipool_fix_head;
16684 Mfix * minipool_fix_tail;
16685 /* The fix entry for the current minipool, once it has been placed. */
16686 Mfix * minipool_barrier;
16688 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16689 #define JUMP_TABLES_IN_TEXT_SECTION 0
16692 static HOST_WIDE_INT
16693 get_jump_table_size (rtx_jump_table_data *insn)
16695 /* ADDR_VECs only take room if read-only data does into the text
16697 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16699 rtx body = PATTERN (insn);
16700 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16701 HOST_WIDE_INT size;
16702 HOST_WIDE_INT modesize;
16704 modesize = GET_MODE_SIZE (GET_MODE (body));
16705 size = modesize * XVECLEN (body, elt);
16709 /* Round up size of TBB table to a halfword boundary. */
16710 size = (size + 1) & ~HOST_WIDE_INT_1;
16713 /* No padding necessary for TBH. */
16716 /* Add two bytes for alignment on Thumb. */
16721 gcc_unreachable ();
16729 /* Emit insns to load the function address from FUNCDESC (an FDPIC
16730 function descriptor) into a register and the GOT address into the
16731 FDPIC register, returning an rtx for the register holding the
16732 function address. */
16735 arm_load_function_descriptor (rtx funcdesc)
16737 rtx fnaddr_reg = gen_reg_rtx (Pmode);
16738 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
16739 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
16740 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
16742 emit_move_insn (fnaddr_reg, fnaddr);
16744 /* The ABI requires the entry point address to be loaded first, but
16745 since we cannot support lazy binding for lack of atomic load of
16746 two 32-bits values, we do not need to bother to prevent the
16747 previous load from being moved after that of the GOT address. */
16748 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
16753 /* Return the maximum amount of padding that will be inserted before
16755 static HOST_WIDE_INT
16756 get_label_padding (rtx label)
16758 HOST_WIDE_INT align, min_insn_size;
16760 align = 1 << label_to_alignment (label).levels[0].log;
16761 min_insn_size = TARGET_THUMB ? 2 : 4;
16762 return align > min_insn_size ? align - min_insn_size : 0;
16765 /* Move a minipool fix MP from its current location to before MAX_MP.
16766 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16767 constraints may need updating. */
16769 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16770 HOST_WIDE_INT max_address)
16772 /* The code below assumes these are different. */
16773 gcc_assert (mp != max_mp);
16775 if (max_mp == NULL)
16777 if (max_address < mp->max_address)
16778 mp->max_address = max_address;
16782 if (max_address > max_mp->max_address - mp->fix_size)
16783 mp->max_address = max_mp->max_address - mp->fix_size;
16785 mp->max_address = max_address;
16787 /* Unlink MP from its current position. Since max_mp is non-null,
16788 mp->prev must be non-null. */
16789 mp->prev->next = mp->next;
16790 if (mp->next != NULL)
16791 mp->next->prev = mp->prev;
16793 minipool_vector_tail = mp->prev;
16795 /* Re-insert it before MAX_MP. */
16797 mp->prev = max_mp->prev;
16800 if (mp->prev != NULL)
16801 mp->prev->next = mp;
16803 minipool_vector_head = mp;
16806 /* Save the new entry. */
16809 /* Scan over the preceding entries and adjust their addresses as
16811 while (mp->prev != NULL
16812 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16814 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16821 /* Add a constant to the minipool for a forward reference. Returns the
16822 node added or NULL if the constant will not fit in this pool. */
16824 add_minipool_forward_ref (Mfix *fix)
16826 /* If set, max_mp is the first pool_entry that has a lower
16827 constraint than the one we are trying to add. */
16828 Mnode * max_mp = NULL;
16829 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16832 /* If the minipool starts before the end of FIX->INSN then this FIX
16833 cannot be placed into the current pool. Furthermore, adding the
16834 new constant pool entry may cause the pool to start FIX_SIZE bytes
16836 if (minipool_vector_head &&
16837 (fix->address + get_attr_length (fix->insn)
16838 >= minipool_vector_head->max_address - fix->fix_size))
16841 /* Scan the pool to see if a constant with the same value has
16842 already been added. While we are doing this, also note the
16843 location where we must insert the constant if it doesn't already
16845 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16847 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16848 && fix->mode == mp->mode
16849 && (!LABEL_P (fix->value)
16850 || (CODE_LABEL_NUMBER (fix->value)
16851 == CODE_LABEL_NUMBER (mp->value)))
16852 && rtx_equal_p (fix->value, mp->value))
16854 /* More than one fix references this entry. */
16856 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16859 /* Note the insertion point if necessary. */
16861 && mp->max_address > max_address)
16864 /* If we are inserting an 8-bytes aligned quantity and
16865 we have not already found an insertion point, then
16866 make sure that all such 8-byte aligned quantities are
16867 placed at the start of the pool. */
16868 if (ARM_DOUBLEWORD_ALIGN
16870 && fix->fix_size >= 8
16871 && mp->fix_size < 8)
16874 max_address = mp->max_address;
16878 /* The value is not currently in the minipool, so we need to create
16879 a new entry for it. If MAX_MP is NULL, the entry will be put on
16880 the end of the list since the placement is less constrained than
16881 any existing entry. Otherwise, we insert the new fix before
16882 MAX_MP and, if necessary, adjust the constraints on the other
16885 mp->fix_size = fix->fix_size;
16886 mp->mode = fix->mode;
16887 mp->value = fix->value;
16889 /* Not yet required for a backwards ref. */
16890 mp->min_address = -65536;
16892 if (max_mp == NULL)
16894 mp->max_address = max_address;
16896 mp->prev = minipool_vector_tail;
16898 if (mp->prev == NULL)
16900 minipool_vector_head = mp;
16901 minipool_vector_label = gen_label_rtx ();
16904 mp->prev->next = mp;
16906 minipool_vector_tail = mp;
16910 if (max_address > max_mp->max_address - mp->fix_size)
16911 mp->max_address = max_mp->max_address - mp->fix_size;
16913 mp->max_address = max_address;
16916 mp->prev = max_mp->prev;
16918 if (mp->prev != NULL)
16919 mp->prev->next = mp;
16921 minipool_vector_head = mp;
16924 /* Save the new entry. */
16927 /* Scan over the preceding entries and adjust their addresses as
16929 while (mp->prev != NULL
16930 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16932 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16940 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16941 HOST_WIDE_INT min_address)
16943 HOST_WIDE_INT offset;
16945 /* The code below assumes these are different. */
16946 gcc_assert (mp != min_mp);
16948 if (min_mp == NULL)
16950 if (min_address > mp->min_address)
16951 mp->min_address = min_address;
16955 /* We will adjust this below if it is too loose. */
16956 mp->min_address = min_address;
16958 /* Unlink MP from its current position. Since min_mp is non-null,
16959 mp->next must be non-null. */
16960 mp->next->prev = mp->prev;
16961 if (mp->prev != NULL)
16962 mp->prev->next = mp->next;
16964 minipool_vector_head = mp->next;
16966 /* Reinsert it after MIN_MP. */
16968 mp->next = min_mp->next;
16970 if (mp->next != NULL)
16971 mp->next->prev = mp;
16973 minipool_vector_tail = mp;
16979 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16981 mp->offset = offset;
16982 if (mp->refcount > 0)
16983 offset += mp->fix_size;
16985 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16986 mp->next->min_address = mp->min_address + mp->fix_size;
16992 /* Add a constant to the minipool for a backward reference. Returns the
16993 node added or NULL if the constant will not fit in this pool.
16995 Note that the code for insertion for a backwards reference can be
16996 somewhat confusing because the calculated offsets for each fix do
16997 not take into account the size of the pool (which is still under
17000 add_minipool_backward_ref (Mfix *fix)
17002 /* If set, min_mp is the last pool_entry that has a lower constraint
17003 than the one we are trying to add. */
17004 Mnode *min_mp = NULL;
17005 /* This can be negative, since it is only a constraint. */
17006 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17009 /* If we can't reach the current pool from this insn, or if we can't
17010 insert this entry at the end of the pool without pushing other
17011 fixes out of range, then we don't try. This ensures that we
17012 can't fail later on. */
17013 if (min_address >= minipool_barrier->address
17014 || (minipool_vector_tail->min_address + fix->fix_size
17015 >= minipool_barrier->address))
17018 /* Scan the pool to see if a constant with the same value has
17019 already been added. While we are doing this, also note the
17020 location where we must insert the constant if it doesn't already
17022 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17024 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17025 && fix->mode == mp->mode
17026 && (!LABEL_P (fix->value)
17027 || (CODE_LABEL_NUMBER (fix->value)
17028 == CODE_LABEL_NUMBER (mp->value)))
17029 && rtx_equal_p (fix->value, mp->value)
17030 /* Check that there is enough slack to move this entry to the
17031 end of the table (this is conservative). */
17032 && (mp->max_address
17033 > (minipool_barrier->address
17034 + minipool_vector_tail->offset
17035 + minipool_vector_tail->fix_size)))
17038 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17041 if (min_mp != NULL)
17042 mp->min_address += fix->fix_size;
17045 /* Note the insertion point if necessary. */
17046 if (mp->min_address < min_address)
17048 /* For now, we do not allow the insertion of 8-byte alignment
17049 requiring nodes anywhere but at the start of the pool. */
17050 if (ARM_DOUBLEWORD_ALIGN
17051 && fix->fix_size >= 8 && mp->fix_size < 8)
17056 else if (mp->max_address
17057 < minipool_barrier->address + mp->offset + fix->fix_size)
17059 /* Inserting before this entry would push the fix beyond
17060 its maximum address (which can happen if we have
17061 re-located a forwards fix); force the new fix to come
17063 if (ARM_DOUBLEWORD_ALIGN
17064 && fix->fix_size >= 8 && mp->fix_size < 8)
17069 min_address = mp->min_address + fix->fix_size;
17072 /* Do not insert a non-8-byte aligned quantity before 8-byte
17073 aligned quantities. */
17074 else if (ARM_DOUBLEWORD_ALIGN
17075 && fix->fix_size < 8
17076 && mp->fix_size >= 8)
17079 min_address = mp->min_address + fix->fix_size;
17084 /* We need to create a new entry. */
17086 mp->fix_size = fix->fix_size;
17087 mp->mode = fix->mode;
17088 mp->value = fix->value;
17090 mp->max_address = minipool_barrier->address + 65536;
17092 mp->min_address = min_address;
17094 if (min_mp == NULL)
17097 mp->next = minipool_vector_head;
17099 if (mp->next == NULL)
17101 minipool_vector_tail = mp;
17102 minipool_vector_label = gen_label_rtx ();
17105 mp->next->prev = mp;
17107 minipool_vector_head = mp;
17111 mp->next = min_mp->next;
17115 if (mp->next != NULL)
17116 mp->next->prev = mp;
17118 minipool_vector_tail = mp;
17121 /* Save the new entry. */
17129 /* Scan over the following entries and adjust their offsets. */
17130 while (mp->next != NULL)
17132 if (mp->next->min_address < mp->min_address + mp->fix_size)
17133 mp->next->min_address = mp->min_address + mp->fix_size;
17136 mp->next->offset = mp->offset + mp->fix_size;
17138 mp->next->offset = mp->offset;
17147 assign_minipool_offsets (Mfix *barrier)
17149 HOST_WIDE_INT offset = 0;
17152 minipool_barrier = barrier;
17154 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17156 mp->offset = offset;
17158 if (mp->refcount > 0)
17159 offset += mp->fix_size;
17163 /* Output the literal table */
17165 dump_minipool (rtx_insn *scan)
17171 if (ARM_DOUBLEWORD_ALIGN)
17172 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17173 if (mp->refcount > 0 && mp->fix_size >= 8)
17180 fprintf (dump_file,
17181 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17182 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17184 scan = emit_label_after (gen_label_rtx (), scan);
17185 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17186 scan = emit_label_after (minipool_vector_label, scan);
17188 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17190 if (mp->refcount > 0)
17194 fprintf (dump_file,
17195 ";; Offset %u, min %ld, max %ld ",
17196 (unsigned) mp->offset, (unsigned long) mp->min_address,
17197 (unsigned long) mp->max_address);
17198 arm_print_value (dump_file, mp->value);
17199 fputc ('\n', dump_file);
17202 rtx val = copy_rtx (mp->value);
17204 switch (GET_MODE_SIZE (mp->mode))
17206 #ifdef HAVE_consttable_1
17208 scan = emit_insn_after (gen_consttable_1 (val), scan);
17212 #ifdef HAVE_consttable_2
17214 scan = emit_insn_after (gen_consttable_2 (val), scan);
17218 #ifdef HAVE_consttable_4
17220 scan = emit_insn_after (gen_consttable_4 (val), scan);
17224 #ifdef HAVE_consttable_8
17226 scan = emit_insn_after (gen_consttable_8 (val), scan);
17230 #ifdef HAVE_consttable_16
17232 scan = emit_insn_after (gen_consttable_16 (val), scan);
17237 gcc_unreachable ();
17245 minipool_vector_head = minipool_vector_tail = NULL;
17246 scan = emit_insn_after (gen_consttable_end (), scan);
17247 scan = emit_barrier_after (scan);
17250 /* Return the cost of forcibly inserting a barrier after INSN. */
17252 arm_barrier_cost (rtx_insn *insn)
17254 /* Basing the location of the pool on the loop depth is preferable,
17255 but at the moment, the basic block information seems to be
17256 corrupt by this stage of the compilation. */
17257 int base_cost = 50;
17258 rtx_insn *next = next_nonnote_insn (insn);
17260 if (next != NULL && LABEL_P (next))
17263 switch (GET_CODE (insn))
17266 /* It will always be better to place the table before the label, rather
17275 return base_cost - 10;
17278 return base_cost + 10;
17282 /* Find the best place in the insn stream in the range
17283 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17284 Create the barrier by inserting a jump and add a new fix entry for
17287 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17289 HOST_WIDE_INT count = 0;
17290 rtx_barrier *barrier;
17291 rtx_insn *from = fix->insn;
17292 /* The instruction after which we will insert the jump. */
17293 rtx_insn *selected = NULL;
17295 /* The address at which the jump instruction will be placed. */
17296 HOST_WIDE_INT selected_address;
17298 HOST_WIDE_INT max_count = max_address - fix->address;
17299 rtx_code_label *label = gen_label_rtx ();
17301 selected_cost = arm_barrier_cost (from);
17302 selected_address = fix->address;
17304 while (from && count < max_count)
17306 rtx_jump_table_data *tmp;
17309 /* This code shouldn't have been called if there was a natural barrier
17311 gcc_assert (!BARRIER_P (from));
17313 /* Count the length of this insn. This must stay in sync with the
17314 code that pushes minipool fixes. */
17315 if (LABEL_P (from))
17316 count += get_label_padding (from);
17318 count += get_attr_length (from);
17320 /* If there is a jump table, add its length. */
17321 if (tablejump_p (from, NULL, &tmp))
17323 count += get_jump_table_size (tmp);
17325 /* Jump tables aren't in a basic block, so base the cost on
17326 the dispatch insn. If we select this location, we will
17327 still put the pool after the table. */
17328 new_cost = arm_barrier_cost (from);
17330 if (count < max_count
17331 && (!selected || new_cost <= selected_cost))
17334 selected_cost = new_cost;
17335 selected_address = fix->address + count;
17338 /* Continue after the dispatch table. */
17339 from = NEXT_INSN (tmp);
17343 new_cost = arm_barrier_cost (from);
17345 if (count < max_count
17346 && (!selected || new_cost <= selected_cost))
17349 selected_cost = new_cost;
17350 selected_address = fix->address + count;
17353 from = NEXT_INSN (from);
17356 /* Make sure that we found a place to insert the jump. */
17357 gcc_assert (selected);
17359 /* Create a new JUMP_INSN that branches around a barrier. */
17360 from = emit_jump_insn_after (gen_jump (label), selected);
17361 JUMP_LABEL (from) = label;
17362 barrier = emit_barrier_after (from);
17363 emit_label_after (label, barrier);
17365 /* Create a minipool barrier entry for the new barrier. */
17366 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17367 new_fix->insn = barrier;
17368 new_fix->address = selected_address;
17369 new_fix->next = fix->next;
17370 fix->next = new_fix;
17375 /* Record that there is a natural barrier in the insn stream at
17378 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17380 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17383 fix->address = address;
17386 if (minipool_fix_head != NULL)
17387 minipool_fix_tail->next = fix;
17389 minipool_fix_head = fix;
17391 minipool_fix_tail = fix;
17394 /* Record INSN, which will need fixing up to load a value from the
17395 minipool. ADDRESS is the offset of the insn since the start of the
17396 function; LOC is a pointer to the part of the insn which requires
17397 fixing; VALUE is the constant that must be loaded, which is of type
17400 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17401 machine_mode mode, rtx value)
17403 gcc_assert (!arm_disable_literal_pool);
17404 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17407 fix->address = address;
17410 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17411 fix->value = value;
17412 fix->forwards = get_attr_pool_range (insn);
17413 fix->backwards = get_attr_neg_pool_range (insn);
17414 fix->minipool = NULL;
17416 /* If an insn doesn't have a range defined for it, then it isn't
17417 expecting to be reworked by this code. Better to stop now than
17418 to generate duff assembly code. */
17419 gcc_assert (fix->forwards || fix->backwards);
17421 /* If an entry requires 8-byte alignment then assume all constant pools
17422 require 4 bytes of padding. Trying to do this later on a per-pool
17423 basis is awkward because existing pool entries have to be modified. */
17424 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17429 fprintf (dump_file,
17430 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17431 GET_MODE_NAME (mode),
17432 INSN_UID (insn), (unsigned long) address,
17433 -1 * (long)fix->backwards, (long)fix->forwards);
17434 arm_print_value (dump_file, fix->value);
17435 fprintf (dump_file, "\n");
17438 /* Add it to the chain of fixes. */
17441 if (minipool_fix_head != NULL)
17442 minipool_fix_tail->next = fix;
17444 minipool_fix_head = fix;
17446 minipool_fix_tail = fix;
17449 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17450 Returns the number of insns needed, or 99 if we always want to synthesize
17453 arm_max_const_double_inline_cost ()
17455 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17458 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17459 Returns the number of insns needed, or 99 if we don't know how to
17462 arm_const_double_inline_cost (rtx val)
17464 rtx lowpart, highpart;
17467 mode = GET_MODE (val);
17469 if (mode == VOIDmode)
17472 gcc_assert (GET_MODE_SIZE (mode) == 8);
17474 lowpart = gen_lowpart (SImode, val);
17475 highpart = gen_highpart_mode (SImode, mode, val);
17477 gcc_assert (CONST_INT_P (lowpart));
17478 gcc_assert (CONST_INT_P (highpart));
17480 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17481 NULL_RTX, NULL_RTX, 0, 0)
17482 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17483 NULL_RTX, NULL_RTX, 0, 0));
17486 /* Cost of loading a SImode constant. */
17488 arm_const_inline_cost (enum rtx_code code, rtx val)
17490 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17491 NULL_RTX, NULL_RTX, 1, 0);
17494 /* Return true if it is worthwhile to split a 64-bit constant into two
17495 32-bit operations. This is the case if optimizing for size, or
17496 if we have load delay slots, or if one 32-bit part can be done with
17497 a single data operation. */
17499 arm_const_double_by_parts (rtx val)
17501 machine_mode mode = GET_MODE (val);
17504 if (optimize_size || arm_ld_sched)
17507 if (mode == VOIDmode)
17510 part = gen_highpart_mode (SImode, mode, val);
17512 gcc_assert (CONST_INT_P (part));
17514 if (const_ok_for_arm (INTVAL (part))
17515 || const_ok_for_arm (~INTVAL (part)))
17518 part = gen_lowpart (SImode, val);
17520 gcc_assert (CONST_INT_P (part));
17522 if (const_ok_for_arm (INTVAL (part))
17523 || const_ok_for_arm (~INTVAL (part)))
17529 /* Return true if it is possible to inline both the high and low parts
17530 of a 64-bit constant into 32-bit data processing instructions. */
17532 arm_const_double_by_immediates (rtx val)
17534 machine_mode mode = GET_MODE (val);
17537 if (mode == VOIDmode)
17540 part = gen_highpart_mode (SImode, mode, val);
17542 gcc_assert (CONST_INT_P (part));
17544 if (!const_ok_for_arm (INTVAL (part)))
17547 part = gen_lowpart (SImode, val);
17549 gcc_assert (CONST_INT_P (part));
17551 if (!const_ok_for_arm (INTVAL (part)))
17557 /* Scan INSN and note any of its operands that need fixing.
17558 If DO_PUSHES is false we do not actually push any of the fixups
17561 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17565 extract_constrain_insn (insn);
17567 if (recog_data.n_alternatives == 0)
17570 /* Fill in recog_op_alt with information about the constraints of
17572 preprocess_constraints (insn);
17574 const operand_alternative *op_alt = which_op_alt ();
17575 for (opno = 0; opno < recog_data.n_operands; opno++)
17577 /* Things we need to fix can only occur in inputs. */
17578 if (recog_data.operand_type[opno] != OP_IN)
17581 /* If this alternative is a memory reference, then any mention
17582 of constants in this alternative is really to fool reload
17583 into allowing us to accept one there. We need to fix them up
17584 now so that we output the right code. */
17585 if (op_alt[opno].memory_ok)
17587 rtx op = recog_data.operand[opno];
17589 if (CONSTANT_P (op))
17592 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17593 recog_data.operand_mode[opno], op);
17595 else if (MEM_P (op)
17596 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17597 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17601 rtx cop = avoid_constant_pool_reference (op);
17603 /* Casting the address of something to a mode narrower
17604 than a word can cause avoid_constant_pool_reference()
17605 to return the pool reference itself. That's no good to
17606 us here. Lets just hope that we can use the
17607 constant pool value directly. */
17609 cop = get_pool_constant (XEXP (op, 0));
17611 push_minipool_fix (insn, address,
17612 recog_data.operand_loc[opno],
17613 recog_data.operand_mode[opno], cop);
17623 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
17624 and unions in the context of ARMv8-M Security Extensions. It is used as a
17625 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
17626 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
17627 or four masks, depending on whether it is being computed for a
17628 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
17629 respectively. The tree for the type of the argument or a field within an
17630 argument is passed in ARG_TYPE, the current register this argument or field
17631 starts in is kept in the pointer REGNO and updated accordingly, the bit this
17632 argument or field starts at is passed in STARTING_BIT and the last used bit
17633 is kept in LAST_USED_BIT which is also updated accordingly. */
17635 static unsigned HOST_WIDE_INT
17636 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
17637 uint32_t * padding_bits_to_clear,
17638 unsigned starting_bit, int * last_used_bit)
17641 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
17643 if (TREE_CODE (arg_type) == RECORD_TYPE)
17645 unsigned current_bit = starting_bit;
17647 long int offset, size;
17650 field = TYPE_FIELDS (arg_type);
17653 /* The offset within a structure is always an offset from
17654 the start of that structure. Make sure we take that into the
17655 calculation of the register based offset that we use here. */
17656 offset = starting_bit;
17657 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
17660 /* This is the actual size of the field, for bitfields this is the
17661 bitfield width and not the container size. */
17662 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17664 if (*last_used_bit != offset)
17666 if (offset < *last_used_bit)
17668 /* This field's offset is before the 'last_used_bit', that
17669 means this field goes on the next register. So we need to
17670 pad the rest of the current register and increase the
17671 register number. */
17673 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17676 padding_bits_to_clear[*regno] |= mask;
17677 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17682 /* Otherwise we pad the bits between the last field's end and
17683 the start of the new field. */
17686 mask = ((uint32_t)-1) >> (32 - offset);
17687 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17688 padding_bits_to_clear[*regno] |= mask;
17690 current_bit = offset;
17693 /* Calculate further padding bits for inner structs/unions too. */
17694 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17696 *last_used_bit = current_bit;
17697 not_to_clear_reg_mask
17698 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17699 padding_bits_to_clear, offset,
17704 /* Update 'current_bit' with this field's size. If the
17705 'current_bit' lies in a subsequent register, update 'regno' and
17706 reset 'current_bit' to point to the current bit in that new
17708 current_bit += size;
17709 while (current_bit >= 32)
17712 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17715 *last_used_bit = current_bit;
17718 field = TREE_CHAIN (field);
17720 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17722 else if (TREE_CODE (arg_type) == UNION_TYPE)
17724 tree field, field_t;
17725 int i, regno_t, field_size;
17729 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17730 = {-1, -1, -1, -1};
17732 /* To compute the padding bits in a union we only consider bits as
17733 padding bits if they are always either a padding bit or fall outside a
17734 fields size for all fields in the union. */
17735 field = TYPE_FIELDS (arg_type);
17738 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17739 = {0U, 0U, 0U, 0U};
17740 int last_used_bit_t = *last_used_bit;
17742 field_t = TREE_TYPE (field);
17744 /* If the field's type is either a record or a union make sure to
17745 compute their padding bits too. */
17746 if (RECORD_OR_UNION_TYPE_P (field_t))
17747 not_to_clear_reg_mask
17748 |= comp_not_to_clear_mask_str_un (field_t, ®no_t,
17749 &padding_bits_to_clear_t[0],
17750 starting_bit, &last_used_bit_t);
17753 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17754 regno_t = (field_size / 32) + *regno;
17755 last_used_bit_t = (starting_bit + field_size) % 32;
17758 for (i = *regno; i < regno_t; i++)
17760 /* For all but the last register used by this field only keep the
17761 padding bits that were padding bits in this field. */
17762 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17765 /* For the last register, keep all padding bits that were padding
17766 bits in this field and any padding bits that are still valid
17767 as padding bits but fall outside of this field's size. */
17768 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17769 padding_bits_to_clear_res[regno_t]
17770 &= padding_bits_to_clear_t[regno_t] | mask;
17772 /* Update the maximum size of the fields in terms of registers used
17773 ('max_reg') and the 'last_used_bit' in said register. */
17774 if (max_reg < regno_t)
17777 max_bit = last_used_bit_t;
17779 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17780 max_bit = last_used_bit_t;
17782 field = TREE_CHAIN (field);
17785 /* Update the current padding_bits_to_clear using the intersection of the
17786 padding bits of all the fields. */
17787 for (i=*regno; i < max_reg; i++)
17788 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17790 /* Do not keep trailing padding bits, we do not know yet whether this
17791 is the end of the argument. */
17792 mask = ((uint32_t) 1 << max_bit) - 1;
17793 padding_bits_to_clear[max_reg]
17794 |= padding_bits_to_clear_res[max_reg] & mask;
17797 *last_used_bit = max_bit;
17800 /* This function should only be used for structs and unions. */
17801 gcc_unreachable ();
17803 return not_to_clear_reg_mask;
17806 /* In the context of ARMv8-M Security Extensions, this function is used for both
17807 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17808 registers are used when returning or passing arguments, which is then
17809 returned as a mask. It will also compute a mask to indicate padding/unused
17810 bits for each of these registers, and passes this through the
17811 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17812 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17813 the starting register used to pass this argument or return value is passed
17814 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17815 for struct and union types. */
17817 static unsigned HOST_WIDE_INT
17818 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17819 uint32_t * padding_bits_to_clear)
17822 int last_used_bit = 0;
17823 unsigned HOST_WIDE_INT not_to_clear_mask;
17825 if (RECORD_OR_UNION_TYPE_P (arg_type))
17828 = comp_not_to_clear_mask_str_un (arg_type, ®no,
17829 padding_bits_to_clear, 0,
17833 /* If the 'last_used_bit' is not zero, that means we are still using a
17834 part of the last 'regno'. In such cases we must clear the trailing
17835 bits. Otherwise we are not using regno and we should mark it as to
17837 if (last_used_bit != 0)
17838 padding_bits_to_clear[regno]
17839 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17841 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17845 not_to_clear_mask = 0;
17846 /* We are not dealing with structs nor unions. So these arguments may be
17847 passed in floating point registers too. In some cases a BLKmode is
17848 used when returning or passing arguments in multiple VFP registers. */
17849 if (GET_MODE (arg_rtx) == BLKmode)
17854 /* This should really only occur when dealing with the hard-float
17856 gcc_assert (TARGET_HARD_FLOAT_ABI);
17858 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17860 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17861 gcc_assert (REG_P (reg));
17863 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17865 /* If we are dealing with DF mode, make sure we don't
17866 clear either of the registers it addresses. */
17867 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17870 unsigned HOST_WIDE_INT mask;
17871 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17872 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17873 not_to_clear_mask |= mask;
17879 /* Otherwise we can rely on the MODE to determine how many registers
17880 are being used by this argument. */
17881 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17882 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17885 unsigned HOST_WIDE_INT
17886 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17887 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17888 not_to_clear_mask |= mask;
17893 return not_to_clear_mask;
17896 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17897 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17898 are to be fully cleared, using the value in register CLEARING_REG if more
17899 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17900 the bits that needs to be cleared in caller-saved core registers, with
17901 SCRATCH_REG used as a scratch register for that clearing.
17903 NOTE: one of three following assertions must hold:
17904 - SCRATCH_REG is a low register
17905 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17906 in TO_CLEAR_BITMAP)
17907 - CLEARING_REG is a low register. */
17910 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17911 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17913 bool saved_clearing = false;
17914 rtx saved_clearing_reg = NULL_RTX;
17915 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17917 gcc_assert (arm_arch_cmse);
17919 if (!bitmap_empty_p (to_clear_bitmap))
17921 minregno = bitmap_first_set_bit (to_clear_bitmap);
17922 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17924 clearing_regno = REGNO (clearing_reg);
17926 /* Clear padding bits. */
17927 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17928 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17931 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17933 if (padding_bits_to_clear[i] == 0)
17936 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17937 CLEARING_REG as scratch. */
17939 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17941 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17942 such that we can use clearing_reg to clear the unused bits in the
17944 if ((clearing_regno > maxregno
17945 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17946 && !saved_clearing)
17948 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17949 emit_move_insn (scratch_reg, clearing_reg);
17950 saved_clearing = true;
17951 saved_clearing_reg = scratch_reg;
17953 scratch_reg = clearing_reg;
17956 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17957 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17958 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17960 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17961 mask = (~padding_bits_to_clear[i]) >> 16;
17962 rtx16 = gen_int_mode (16, SImode);
17963 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17965 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17967 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17969 if (saved_clearing)
17970 emit_move_insn (clearing_reg, saved_clearing_reg);
17973 /* Clear full registers. */
17975 /* If not marked for clearing, clearing_reg already does not contain
17977 if (clearing_regno <= maxregno
17978 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17980 emit_move_insn (clearing_reg, const0_rtx);
17981 emit_use (clearing_reg);
17982 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17985 for (regno = minregno; regno <= maxregno; regno++)
17987 if (!bitmap_bit_p (to_clear_bitmap, regno))
17990 if (IS_VFP_REGNUM (regno))
17992 /* If regno is an even vfp register and its successor is also to
17993 be cleared, use vmov. */
17994 if (TARGET_VFP_DOUBLE
17995 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17996 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17998 emit_move_insn (gen_rtx_REG (DFmode, regno),
17999 CONST1_RTX (DFmode));
18000 emit_use (gen_rtx_REG (DFmode, regno));
18005 emit_move_insn (gen_rtx_REG (SFmode, regno),
18006 CONST1_RTX (SFmode));
18007 emit_use (gen_rtx_REG (SFmode, regno));
18012 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18013 emit_use (gen_rtx_REG (SImode, regno));
18018 /* Clears caller saved registers not used to pass arguments before a
18019 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
18020 registers is done in __gnu_cmse_nonsecure_call libcall.
18021 See libgcc/config/arm/cmse_nonsecure_call.S. */
18024 cmse_nonsecure_call_clear_caller_saved (void)
18028 FOR_EACH_BB_FN (bb, cfun)
18032 FOR_BB_INSNS (bb, insn)
18034 unsigned address_regnum, regno, maxregno =
18035 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
18036 auto_sbitmap to_clear_bitmap (maxregno + 1);
18038 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18040 CUMULATIVE_ARGS args_so_far_v;
18041 cumulative_args_t args_so_far;
18042 tree arg_type, fntype;
18043 bool first_param = true;
18044 function_args_iterator args_iter;
18045 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18047 if (!NONDEBUG_INSN_P (insn))
18050 if (!CALL_P (insn))
18053 pat = PATTERN (insn);
18054 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18055 call = XVECEXP (pat, 0, 0);
18057 /* Get the real call RTX if the insn sets a value, ie. returns. */
18058 if (GET_CODE (call) == SET)
18059 call = SET_SRC (call);
18061 /* Check if it is a cmse_nonsecure_call. */
18062 unspec = XEXP (call, 0);
18063 if (GET_CODE (unspec) != UNSPEC
18064 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18067 /* Determine the caller-saved registers we need to clear. */
18068 bitmap_clear (to_clear_bitmap);
18069 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
18071 /* Only look at the caller-saved floating point registers in case of
18072 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18073 lazy store and loads which clear both caller- and callee-saved
18075 if (TARGET_HARD_FLOAT_ABI)
18077 auto_sbitmap float_bitmap (maxregno + 1);
18079 bitmap_clear (float_bitmap);
18080 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18081 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
18082 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18085 /* Make sure the register used to hold the function address is not
18087 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18088 gcc_assert (MEM_P (address));
18089 gcc_assert (REG_P (XEXP (address, 0)));
18090 address_regnum = REGNO (XEXP (address, 0));
18091 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
18092 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18094 /* Set basic block of call insn so that df rescan is performed on
18095 insns inserted here. */
18096 set_block_for_insn (insn, bb);
18097 df_set_flags (DF_DEFER_INSN_RESCAN);
18100 /* Make sure the scheduler doesn't schedule other insns beyond
18102 emit_insn (gen_blockage ());
18104 /* Walk through all arguments and clear registers appropriately.
18106 fntype = TREE_TYPE (MEM_EXPR (address));
18107 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18109 args_so_far = pack_cumulative_args (&args_so_far_v);
18110 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18113 uint64_t to_clear_args_mask;
18115 if (VOID_TYPE_P (arg_type))
18118 function_arg_info arg (arg_type, /*named=*/true);
18120 /* ??? We should advance after processing the argument and pass
18121 the argument we're advancing past. */
18122 arm_function_arg_advance (args_so_far, arg);
18124 arg_rtx = arm_function_arg (args_so_far, arg);
18125 gcc_assert (REG_P (arg_rtx));
18127 = compute_not_to_clear_mask (arg_type, arg_rtx,
18129 &padding_bits_to_clear[0]);
18130 if (to_clear_args_mask)
18132 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18134 if (to_clear_args_mask & (1ULL << regno))
18135 bitmap_clear_bit (to_clear_bitmap, regno);
18139 first_param = false;
18142 /* We use right shift and left shift to clear the LSB of the address
18143 we jump to instead of using bic, to avoid having to use an extra
18144 register on Thumb-1. */
18145 clearing_reg = XEXP (address, 0);
18146 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18147 emit_insn (gen_rtx_SET (clearing_reg, shift));
18148 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18149 emit_insn (gen_rtx_SET (clearing_reg, shift));
18151 /* Clear caller-saved registers that leak before doing a non-secure
18153 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18154 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18155 NUM_ARG_REGS, ip_reg, clearing_reg);
18157 seq = get_insns ();
18159 emit_insn_before (seq, insn);
18164 /* Rewrite move insn into subtract of 0 if the condition codes will
18165 be useful in next conditional jump insn. */
18168 thumb1_reorg (void)
18172 FOR_EACH_BB_FN (bb, cfun)
18175 rtx cmp, op0, op1, set = NULL;
18176 rtx_insn *prev, *insn = BB_END (bb);
18177 bool insn_clobbered = false;
18179 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18180 insn = PREV_INSN (insn);
18182 /* Find the last cbranchsi4_insn in basic block BB. */
18183 if (insn == BB_HEAD (bb)
18184 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18187 /* Get the register with which we are comparing. */
18188 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18189 op0 = XEXP (cmp, 0);
18190 op1 = XEXP (cmp, 1);
18192 /* Check that comparison is against ZERO. */
18193 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18196 /* Find the first flag setting insn before INSN in basic block BB. */
18197 gcc_assert (insn != BB_HEAD (bb));
18198 for (prev = PREV_INSN (insn);
18200 && prev != BB_HEAD (bb)
18202 || DEBUG_INSN_P (prev)
18203 || ((set = single_set (prev)) != NULL
18204 && get_attr_conds (prev) == CONDS_NOCOND)));
18205 prev = PREV_INSN (prev))
18207 if (reg_set_p (op0, prev))
18208 insn_clobbered = true;
18211 /* Skip if op0 is clobbered by insn other than prev. */
18212 if (insn_clobbered)
18218 dest = SET_DEST (set);
18219 src = SET_SRC (set);
18220 if (!low_register_operand (dest, SImode)
18221 || !low_register_operand (src, SImode))
18224 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18225 in INSN. Both src and dest of the move insn are checked. */
18226 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18228 dest = copy_rtx (dest);
18229 src = copy_rtx (src);
18230 src = gen_rtx_MINUS (SImode, src, const0_rtx);
18231 PATTERN (prev) = gen_rtx_SET (dest, src);
18232 INSN_CODE (prev) = -1;
18233 /* Set test register in INSN to dest. */
18234 XEXP (cmp, 0) = copy_rtx (dest);
18235 INSN_CODE (insn) = -1;
18240 /* Convert instructions to their cc-clobbering variant if possible, since
18241 that allows us to use smaller encodings. */
18244 thumb2_reorg (void)
18249 INIT_REG_SET (&live);
18251 /* We are freeing block_for_insn in the toplev to keep compatibility
18252 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18253 compute_bb_for_insn ();
18256 enum Convert_Action {SKIP, CONV, SWAP_CONV};
18258 FOR_EACH_BB_FN (bb, cfun)
18260 if ((current_tune->disparage_flag_setting_t16_encodings
18261 == tune_params::DISPARAGE_FLAGS_ALL)
18262 && optimize_bb_for_speed_p (bb))
18266 Convert_Action action = SKIP;
18267 Convert_Action action_for_partial_flag_setting
18268 = ((current_tune->disparage_flag_setting_t16_encodings
18269 != tune_params::DISPARAGE_FLAGS_NEITHER)
18270 && optimize_bb_for_speed_p (bb))
18273 COPY_REG_SET (&live, DF_LR_OUT (bb));
18274 df_simulate_initialize_backwards (bb, &live);
18275 FOR_BB_INSNS_REVERSE (bb, insn)
18277 if (NONJUMP_INSN_P (insn)
18278 && !REGNO_REG_SET_P (&live, CC_REGNUM)
18279 && GET_CODE (PATTERN (insn)) == SET)
18282 rtx pat = PATTERN (insn);
18283 rtx dst = XEXP (pat, 0);
18284 rtx src = XEXP (pat, 1);
18285 rtx op0 = NULL_RTX, op1 = NULL_RTX;
18287 if (UNARY_P (src) || BINARY_P (src))
18288 op0 = XEXP (src, 0);
18290 if (BINARY_P (src))
18291 op1 = XEXP (src, 1);
18293 if (low_register_operand (dst, SImode))
18295 switch (GET_CODE (src))
18298 /* Adding two registers and storing the result
18299 in the first source is already a 16-bit
18301 if (rtx_equal_p (dst, op0)
18302 && register_operand (op1, SImode))
18305 if (low_register_operand (op0, SImode))
18307 /* ADDS <Rd>,<Rn>,<Rm> */
18308 if (low_register_operand (op1, SImode))
18310 /* ADDS <Rdn>,#<imm8> */
18311 /* SUBS <Rdn>,#<imm8> */
18312 else if (rtx_equal_p (dst, op0)
18313 && CONST_INT_P (op1)
18314 && IN_RANGE (INTVAL (op1), -255, 255))
18316 /* ADDS <Rd>,<Rn>,#<imm3> */
18317 /* SUBS <Rd>,<Rn>,#<imm3> */
18318 else if (CONST_INT_P (op1)
18319 && IN_RANGE (INTVAL (op1), -7, 7))
18322 /* ADCS <Rd>, <Rn> */
18323 else if (GET_CODE (XEXP (src, 0)) == PLUS
18324 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
18325 && low_register_operand (XEXP (XEXP (src, 0), 1),
18327 && COMPARISON_P (op1)
18328 && cc_register (XEXP (op1, 0), VOIDmode)
18329 && maybe_get_arm_condition_code (op1) == ARM_CS
18330 && XEXP (op1, 1) == const0_rtx)
18335 /* RSBS <Rd>,<Rn>,#0
18336 Not handled here: see NEG below. */
18337 /* SUBS <Rd>,<Rn>,#<imm3>
18339 Not handled here: see PLUS above. */
18340 /* SUBS <Rd>,<Rn>,<Rm> */
18341 if (low_register_operand (op0, SImode)
18342 && low_register_operand (op1, SImode))
18347 /* MULS <Rdm>,<Rn>,<Rdm>
18348 As an exception to the rule, this is only used
18349 when optimizing for size since MULS is slow on all
18350 known implementations. We do not even want to use
18351 MULS in cold code, if optimizing for speed, so we
18352 test the global flag here. */
18353 if (!optimize_size)
18355 /* Fall through. */
18359 /* ANDS <Rdn>,<Rm> */
18360 if (rtx_equal_p (dst, op0)
18361 && low_register_operand (op1, SImode))
18362 action = action_for_partial_flag_setting;
18363 else if (rtx_equal_p (dst, op1)
18364 && low_register_operand (op0, SImode))
18365 action = action_for_partial_flag_setting == SKIP
18366 ? SKIP : SWAP_CONV;
18372 /* ASRS <Rdn>,<Rm> */
18373 /* LSRS <Rdn>,<Rm> */
18374 /* LSLS <Rdn>,<Rm> */
18375 if (rtx_equal_p (dst, op0)
18376 && low_register_operand (op1, SImode))
18377 action = action_for_partial_flag_setting;
18378 /* ASRS <Rd>,<Rm>,#<imm5> */
18379 /* LSRS <Rd>,<Rm>,#<imm5> */
18380 /* LSLS <Rd>,<Rm>,#<imm5> */
18381 else if (low_register_operand (op0, SImode)
18382 && CONST_INT_P (op1)
18383 && IN_RANGE (INTVAL (op1), 0, 31))
18384 action = action_for_partial_flag_setting;
18388 /* RORS <Rdn>,<Rm> */
18389 if (rtx_equal_p (dst, op0)
18390 && low_register_operand (op1, SImode))
18391 action = action_for_partial_flag_setting;
18395 /* MVNS <Rd>,<Rm> */
18396 if (low_register_operand (op0, SImode))
18397 action = action_for_partial_flag_setting;
18401 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
18402 if (low_register_operand (op0, SImode))
18407 /* MOVS <Rd>,#<imm8> */
18408 if (CONST_INT_P (src)
18409 && IN_RANGE (INTVAL (src), 0, 255))
18410 action = action_for_partial_flag_setting;
18414 /* MOVS and MOV<c> with registers have different
18415 encodings, so are not relevant here. */
18423 if (action != SKIP)
18425 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18426 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18429 if (action == SWAP_CONV)
18431 src = copy_rtx (src);
18432 XEXP (src, 0) = op1;
18433 XEXP (src, 1) = op0;
18434 pat = gen_rtx_SET (dst, src);
18435 vec = gen_rtvec (2, pat, clobber);
18437 else /* action == CONV */
18438 vec = gen_rtvec (2, pat, clobber);
18440 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
18441 INSN_CODE (insn) = -1;
18445 if (NONDEBUG_INSN_P (insn))
18446 df_simulate_one_insn_backwards (bb, insn, &live);
18450 CLEAR_REG_SET (&live);
18453 /* Gcc puts the pool in the wrong place for ARM, since we can only
18454 load addresses a limited distance around the pc. We do some
18455 special munging to move the constant pool values to the correct
18456 point in the code. */
18461 HOST_WIDE_INT address = 0;
18465 cmse_nonsecure_call_clear_caller_saved ();
18467 /* We cannot run the Thumb passes for thunks because there is no CFG. */
18468 if (cfun->is_thunk)
18470 else if (TARGET_THUMB1)
18472 else if (TARGET_THUMB2)
18475 /* Ensure all insns that must be split have been split at this point.
18476 Otherwise, the pool placement code below may compute incorrect
18477 insn lengths. Note that when optimizing, all insns have already
18478 been split at this point. */
18480 split_all_insns_noflow ();
18482 /* Make sure we do not attempt to create a literal pool even though it should
18483 no longer be necessary to create any. */
18484 if (arm_disable_literal_pool)
18487 minipool_fix_head = minipool_fix_tail = NULL;
18489 /* The first insn must always be a note, or the code below won't
18490 scan it properly. */
18491 insn = get_insns ();
18492 gcc_assert (NOTE_P (insn));
18495 /* Scan all the insns and record the operands that will need fixing. */
18496 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
18498 if (BARRIER_P (insn))
18499 push_minipool_barrier (insn, address);
18500 else if (INSN_P (insn))
18502 rtx_jump_table_data *table;
18504 note_invalid_constants (insn, address, true);
18505 address += get_attr_length (insn);
18507 /* If the insn is a vector jump, add the size of the table
18508 and skip the table. */
18509 if (tablejump_p (insn, NULL, &table))
18511 address += get_jump_table_size (table);
18515 else if (LABEL_P (insn))
18516 /* Add the worst-case padding due to alignment. We don't add
18517 the _current_ padding because the minipool insertions
18518 themselves might change it. */
18519 address += get_label_padding (insn);
18522 fix = minipool_fix_head;
18524 /* Now scan the fixups and perform the required changes. */
18529 Mfix * last_added_fix;
18530 Mfix * last_barrier = NULL;
18533 /* Skip any further barriers before the next fix. */
18534 while (fix && BARRIER_P (fix->insn))
18537 /* No more fixes. */
18541 last_added_fix = NULL;
18543 for (ftmp = fix; ftmp; ftmp = ftmp->next)
18545 if (BARRIER_P (ftmp->insn))
18547 if (ftmp->address >= minipool_vector_head->max_address)
18550 last_barrier = ftmp;
18552 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
18555 last_added_fix = ftmp; /* Keep track of the last fix added. */
18558 /* If we found a barrier, drop back to that; any fixes that we
18559 could have reached but come after the barrier will now go in
18560 the next mini-pool. */
18561 if (last_barrier != NULL)
18563 /* Reduce the refcount for those fixes that won't go into this
18565 for (fdel = last_barrier->next;
18566 fdel && fdel != ftmp;
18569 fdel->minipool->refcount--;
18570 fdel->minipool = NULL;
18573 ftmp = last_barrier;
18577 /* ftmp is first fix that we can't fit into this pool and
18578 there no natural barriers that we could use. Insert a
18579 new barrier in the code somewhere between the previous
18580 fix and this one, and arrange to jump around it. */
18581 HOST_WIDE_INT max_address;
18583 /* The last item on the list of fixes must be a barrier, so
18584 we can never run off the end of the list of fixes without
18585 last_barrier being set. */
18588 max_address = minipool_vector_head->max_address;
18589 /* Check that there isn't another fix that is in range that
18590 we couldn't fit into this pool because the pool was
18591 already too large: we need to put the pool before such an
18592 instruction. The pool itself may come just after the
18593 fix because create_fix_barrier also allows space for a
18594 jump instruction. */
18595 if (ftmp->address < max_address)
18596 max_address = ftmp->address + 1;
18598 last_barrier = create_fix_barrier (last_added_fix, max_address);
18601 assign_minipool_offsets (last_barrier);
18605 if (!BARRIER_P (ftmp->insn)
18606 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
18613 /* Scan over the fixes we have identified for this pool, fixing them
18614 up and adding the constants to the pool itself. */
18615 for (this_fix = fix; this_fix && ftmp != this_fix;
18616 this_fix = this_fix->next)
18617 if (!BARRIER_P (this_fix->insn))
18620 = plus_constant (Pmode,
18621 gen_rtx_LABEL_REF (VOIDmode,
18622 minipool_vector_label),
18623 this_fix->minipool->offset);
18624 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
18627 dump_minipool (last_barrier->insn);
18631 /* From now on we must synthesize any constants that we can't handle
18632 directly. This can happen if the RTL gets split during final
18633 instruction generation. */
18634 cfun->machine->after_arm_reorg = 1;
18636 /* Free the minipool memory. */
18637 obstack_free (&minipool_obstack, minipool_startobj);
18640 /* Routines to output assembly language. */
18642 /* Return string representation of passed in real value. */
18643 static const char *
18644 fp_const_from_val (REAL_VALUE_TYPE *r)
18646 if (!fp_consts_inited)
18649 gcc_assert (real_equal (r, &value_fp0));
18653 /* OPERANDS[0] is the entire list of insns that constitute pop,
18654 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
18655 is in the list, UPDATE is true iff the list contains explicit
18656 update of base register. */
18658 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
18664 const char *conditional;
18665 int num_saves = XVECLEN (operands[0], 0);
18666 unsigned int regno;
18667 unsigned int regno_base = REGNO (operands[1]);
18668 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18671 offset += update ? 1 : 0;
18672 offset += return_pc ? 1 : 0;
18674 /* Is the base register in the list? */
18675 for (i = offset; i < num_saves; i++)
18677 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18678 /* If SP is in the list, then the base register must be SP. */
18679 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18680 /* If base register is in the list, there must be no explicit update. */
18681 if (regno == regno_base)
18682 gcc_assert (!update);
18685 conditional = reverse ? "%?%D0" : "%?%d0";
18686 /* Can't use POP if returning from an interrupt. */
18687 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18688 sprintf (pattern, "pop%s\t{", conditional);
18691 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18692 It's just a convention, their semantics are identical. */
18693 if (regno_base == SP_REGNUM)
18694 sprintf (pattern, "ldmfd%s\t", conditional);
18696 sprintf (pattern, "ldmia%s\t", conditional);
18698 sprintf (pattern, "ldm%s\t", conditional);
18700 strcat (pattern, reg_names[regno_base]);
18702 strcat (pattern, "!, {");
18704 strcat (pattern, ", {");
18707 /* Output the first destination register. */
18709 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18711 /* Output the rest of the destination registers. */
18712 for (i = offset + 1; i < num_saves; i++)
18714 strcat (pattern, ", ");
18716 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18719 strcat (pattern, "}");
18721 if (interrupt_p && return_pc)
18722 strcat (pattern, "^");
18724 output_asm_insn (pattern, &cond);
18728 /* Output the assembly for a store multiple. */
18731 vfp_output_vstmd (rtx * operands)
18737 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18738 ? XEXP (operands[0], 0)
18739 : XEXP (XEXP (operands[0], 0), 0);
18740 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18743 strcpy (pattern, "vpush%?.64\t{%P1");
18745 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18747 p = strlen (pattern);
18749 gcc_assert (REG_P (operands[1]));
18751 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18752 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18754 p += sprintf (&pattern[p], ", d%d", base + i);
18756 strcpy (&pattern[p], "}");
18758 output_asm_insn (pattern, operands);
18763 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18764 number of bytes pushed. */
18767 vfp_emit_fstmd (int base_reg, int count)
18774 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18775 register pairs are stored by a store multiple insn. We avoid this
18776 by pushing an extra pair. */
18777 if (count == 2 && !arm_arch6)
18779 if (base_reg == LAST_VFP_REGNUM - 3)
18784 /* FSTMD may not store more than 16 doubleword registers at once. Split
18785 larger stores into multiple parts (up to a maximum of two, in
18790 /* NOTE: base_reg is an internal register number, so each D register
18792 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18793 saved += vfp_emit_fstmd (base_reg, 16);
18797 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18798 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18800 reg = gen_rtx_REG (DFmode, base_reg);
18803 XVECEXP (par, 0, 0)
18804 = gen_rtx_SET (gen_frame_mem
18806 gen_rtx_PRE_MODIFY (Pmode,
18809 (Pmode, stack_pointer_rtx,
18812 gen_rtx_UNSPEC (BLKmode,
18813 gen_rtvec (1, reg),
18814 UNSPEC_PUSH_MULT));
18816 tmp = gen_rtx_SET (stack_pointer_rtx,
18817 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18818 RTX_FRAME_RELATED_P (tmp) = 1;
18819 XVECEXP (dwarf, 0, 0) = tmp;
18821 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18822 RTX_FRAME_RELATED_P (tmp) = 1;
18823 XVECEXP (dwarf, 0, 1) = tmp;
18825 for (i = 1; i < count; i++)
18827 reg = gen_rtx_REG (DFmode, base_reg);
18829 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18831 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18832 plus_constant (Pmode,
18836 RTX_FRAME_RELATED_P (tmp) = 1;
18837 XVECEXP (dwarf, 0, i + 1) = tmp;
18840 par = emit_insn (par);
18841 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18842 RTX_FRAME_RELATED_P (par) = 1;
18847 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18848 has the cmse_nonsecure_call attribute and returns false otherwise. */
18851 detect_cmse_nonsecure_call (tree addr)
18856 tree fntype = TREE_TYPE (addr);
18857 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18858 TYPE_ATTRIBUTES (fntype)))
18864 /* Emit a call instruction with pattern PAT. ADDR is the address of
18865 the call target. */
18868 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18872 insn = emit_call_insn (pat);
18874 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18875 If the call might use such an entry, add a use of the PIC register
18876 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18877 if (TARGET_VXWORKS_RTP
18880 && GET_CODE (addr) == SYMBOL_REF
18881 && (SYMBOL_REF_DECL (addr)
18882 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18883 : !SYMBOL_REF_LOCAL_P (addr)))
18885 require_pic_register (NULL_RTX, false /*compute_now*/);
18886 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18891 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
18892 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
18895 if (TARGET_AAPCS_BASED)
18897 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18898 linker. We need to add an IP clobber to allow setting
18899 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18900 is not needed since it's a fixed register. */
18901 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18902 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18906 /* Output a 'call' insn. */
18908 output_call (rtx *operands)
18910 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18912 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18913 if (REGNO (operands[0]) == LR_REGNUM)
18915 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18916 output_asm_insn ("mov%?\t%0, %|lr", operands);
18919 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18921 if (TARGET_INTERWORK || arm_arch4t)
18922 output_asm_insn ("bx%?\t%0", operands);
18924 output_asm_insn ("mov%?\t%|pc, %0", operands);
18929 /* Output a move from arm registers to arm registers of a long double
18930 OPERANDS[0] is the destination.
18931 OPERANDS[1] is the source. */
18933 output_mov_long_double_arm_from_arm (rtx *operands)
18935 /* We have to be careful here because the two might overlap. */
18936 int dest_start = REGNO (operands[0]);
18937 int src_start = REGNO (operands[1]);
18941 if (dest_start < src_start)
18943 for (i = 0; i < 3; i++)
18945 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18946 ops[1] = gen_rtx_REG (SImode, src_start + i);
18947 output_asm_insn ("mov%?\t%0, %1", ops);
18952 for (i = 2; i >= 0; i--)
18954 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18955 ops[1] = gen_rtx_REG (SImode, src_start + i);
18956 output_asm_insn ("mov%?\t%0, %1", ops);
18964 arm_emit_movpair (rtx dest, rtx src)
18966 /* If the src is an immediate, simplify it. */
18967 if (CONST_INT_P (src))
18969 HOST_WIDE_INT val = INTVAL (src);
18970 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18971 if ((val >> 16) & 0x0000ffff)
18973 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18975 GEN_INT ((val >> 16) & 0x0000ffff));
18976 rtx_insn *insn = get_last_insn ();
18977 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18981 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18982 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18983 rtx_insn *insn = get_last_insn ();
18984 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18987 /* Output a move between double words. It must be REG<-MEM
18990 output_move_double (rtx *operands, bool emit, int *count)
18992 enum rtx_code code0 = GET_CODE (operands[0]);
18993 enum rtx_code code1 = GET_CODE (operands[1]);
18998 /* The only case when this might happen is when
18999 you are looking at the length of a DImode instruction
19000 that has an invalid constant in it. */
19001 if (code0 == REG && code1 != MEM)
19003 gcc_assert (!emit);
19010 unsigned int reg0 = REGNO (operands[0]);
19012 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19014 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19016 switch (GET_CODE (XEXP (operands[1], 0)))
19023 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19024 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19026 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19031 gcc_assert (TARGET_LDRD);
19033 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19040 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19042 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19050 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19052 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19057 gcc_assert (TARGET_LDRD);
19059 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19064 /* Autoicrement addressing modes should never have overlapping
19065 base and destination registers, and overlapping index registers
19066 are already prohibited, so this doesn't need to worry about
19068 otherops[0] = operands[0];
19069 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19070 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19072 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19074 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19076 /* Registers overlap so split out the increment. */
19079 output_asm_insn ("add%?\t%1, %1, %2", otherops);
19080 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19087 /* Use a single insn if we can.
19088 FIXME: IWMMXT allows offsets larger than ldrd can
19089 handle, fix these up with a pair of ldr. */
19091 || !CONST_INT_P (otherops[2])
19092 || (INTVAL (otherops[2]) > -256
19093 && INTVAL (otherops[2]) < 256))
19096 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19102 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19103 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19113 /* Use a single insn if we can.
19114 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19115 fix these up with a pair of ldr. */
19117 || !CONST_INT_P (otherops[2])
19118 || (INTVAL (otherops[2]) > -256
19119 && INTVAL (otherops[2]) < 256))
19122 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19128 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19129 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19139 /* We might be able to use ldrd %0, %1 here. However the range is
19140 different to ldr/adr, and it is broken on some ARMv7-M
19141 implementations. */
19142 /* Use the second register of the pair to avoid problematic
19144 otherops[1] = operands[1];
19146 output_asm_insn ("adr%?\t%0, %1", otherops);
19147 operands[1] = otherops[0];
19151 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19153 output_asm_insn ("ldmia%?\t%1, %M0", operands);
19160 /* ??? This needs checking for thumb2. */
19162 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19163 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19165 otherops[0] = operands[0];
19166 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19167 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19169 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19171 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19173 switch ((int) INTVAL (otherops[2]))
19177 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19183 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19189 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19193 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19194 operands[1] = otherops[0];
19196 && (REG_P (otherops[2])
19198 || (CONST_INT_P (otherops[2])
19199 && INTVAL (otherops[2]) > -256
19200 && INTVAL (otherops[2]) < 256)))
19202 if (reg_overlap_mentioned_p (operands[0],
19205 /* Swap base and index registers over to
19206 avoid a conflict. */
19207 std::swap (otherops[1], otherops[2]);
19209 /* If both registers conflict, it will usually
19210 have been fixed by a splitter. */
19211 if (reg_overlap_mentioned_p (operands[0], otherops[2])
19212 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19216 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19217 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19224 otherops[0] = operands[0];
19226 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19231 if (CONST_INT_P (otherops[2]))
19235 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19236 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19238 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19244 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19250 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19257 return "ldrd%?\t%0, [%1]";
19259 return "ldmia%?\t%1, %M0";
19263 otherops[1] = adjust_address (operands[1], SImode, 4);
19264 /* Take care of overlapping base/data reg. */
19265 if (reg_mentioned_p (operands[0], operands[1]))
19269 output_asm_insn ("ldr%?\t%0, %1", otherops);
19270 output_asm_insn ("ldr%?\t%0, %1", operands);
19280 output_asm_insn ("ldr%?\t%0, %1", operands);
19281 output_asm_insn ("ldr%?\t%0, %1", otherops);
19291 /* Constraints should ensure this. */
19292 gcc_assert (code0 == MEM && code1 == REG);
19293 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
19294 || (TARGET_ARM && TARGET_LDRD));
19296 /* For TARGET_ARM the first source register of an STRD
19297 must be even. This is usually the case for double-word
19298 values but user assembly constraints can force an odd
19299 starting register. */
19300 bool allow_strd = TARGET_LDRD
19301 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
19302 switch (GET_CODE (XEXP (operands[0], 0)))
19308 output_asm_insn ("strd%?\t%1, [%m0]", operands);
19310 output_asm_insn ("stm%?\t%m0, %M1", operands);
19315 gcc_assert (allow_strd);
19317 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
19324 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
19326 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
19334 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
19336 output_asm_insn ("stm%?\t%m0!, %M1", operands);
19341 gcc_assert (allow_strd);
19343 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
19348 otherops[0] = operands[1];
19349 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
19350 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
19352 /* IWMMXT allows offsets larger than strd can handle,
19353 fix these up with a pair of str. */
19355 && CONST_INT_P (otherops[2])
19356 && (INTVAL(otherops[2]) <= -256
19357 || INTVAL(otherops[2]) >= 256))
19359 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19363 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
19364 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19373 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
19374 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
19380 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
19383 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
19388 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
19393 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
19394 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19396 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
19400 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
19407 output_asm_insn ("stmda%?\t%m0, %M1", operands);
19414 output_asm_insn ("stmib%?\t%m0, %M1", operands);
19419 && (REG_P (otherops[2])
19421 || (CONST_INT_P (otherops[2])
19422 && INTVAL (otherops[2]) > -256
19423 && INTVAL (otherops[2]) < 256)))
19425 otherops[0] = operands[1];
19426 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
19428 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
19434 otherops[0] = adjust_address (operands[0], SImode, 4);
19435 otherops[1] = operands[1];
19438 output_asm_insn ("str%?\t%1, %0", operands);
19439 output_asm_insn ("str%?\t%H1, %0", otherops);
19449 /* Output a move, load or store for quad-word vectors in ARM registers. Only
19450 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
19453 output_move_quad (rtx *operands)
19455 if (REG_P (operands[0]))
19457 /* Load, or reg->reg move. */
19459 if (MEM_P (operands[1]))
19461 switch (GET_CODE (XEXP (operands[1], 0)))
19464 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19469 output_asm_insn ("adr%?\t%0, %1", operands);
19470 output_asm_insn ("ldmia%?\t%0, %M0", operands);
19474 gcc_unreachable ();
19482 gcc_assert (REG_P (operands[1]));
19484 dest = REGNO (operands[0]);
19485 src = REGNO (operands[1]);
19487 /* This seems pretty dumb, but hopefully GCC won't try to do it
19490 for (i = 0; i < 4; i++)
19492 ops[0] = gen_rtx_REG (SImode, dest + i);
19493 ops[1] = gen_rtx_REG (SImode, src + i);
19494 output_asm_insn ("mov%?\t%0, %1", ops);
19497 for (i = 3; i >= 0; i--)
19499 ops[0] = gen_rtx_REG (SImode, dest + i);
19500 ops[1] = gen_rtx_REG (SImode, src + i);
19501 output_asm_insn ("mov%?\t%0, %1", ops);
19507 gcc_assert (MEM_P (operands[0]));
19508 gcc_assert (REG_P (operands[1]));
19509 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
19511 switch (GET_CODE (XEXP (operands[0], 0)))
19514 output_asm_insn ("stm%?\t%m0, %M1", operands);
19518 gcc_unreachable ();
19525 /* Output a VFP load or store instruction. */
19528 output_move_vfp (rtx *operands)
19530 rtx reg, mem, addr, ops[2];
19531 int load = REG_P (operands[0]);
19532 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
19533 int sp = (!TARGET_VFP_FP16INST
19534 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
19535 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
19540 reg = operands[!load];
19541 mem = operands[load];
19543 mode = GET_MODE (reg);
19545 gcc_assert (REG_P (reg));
19546 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
19547 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
19553 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
19554 gcc_assert (MEM_P (mem));
19556 addr = XEXP (mem, 0);
19558 switch (GET_CODE (addr))
19561 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
19562 ops[0] = XEXP (addr, 0);
19567 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
19568 ops[0] = XEXP (addr, 0);
19573 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
19579 sprintf (buff, templ,
19580 load ? "ld" : "st",
19581 dp ? "64" : sp ? "32" : "16",
19583 integer_p ? "\t%@ int" : "");
19584 output_asm_insn (buff, ops);
19589 /* Output a Neon double-word or quad-word load or store, or a load
19590 or store for larger structure modes.
19592 WARNING: The ordering of elements is weird in big-endian mode,
19593 because the EABI requires that vectors stored in memory appear
19594 as though they were stored by a VSTM, as required by the EABI.
19595 GCC RTL defines element ordering based on in-memory order.
19596 This can be different from the architectural ordering of elements
19597 within a NEON register. The intrinsics defined in arm_neon.h use the
19598 NEON register element ordering, not the GCC RTL element ordering.
19600 For example, the in-memory ordering of a big-endian a quadword
19601 vector with 16-bit elements when stored from register pair {d0,d1}
19602 will be (lowest address first, d0[N] is NEON register element N):
19604 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
19606 When necessary, quadword registers (dN, dN+1) are moved to ARM
19607 registers from rN in the order:
19609 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
19611 So that STM/LDM can be used on vectors in ARM registers, and the
19612 same memory layout will result as if VSTM/VLDM were used.
19614 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
19615 possible, which allows use of appropriate alignment tags.
19616 Note that the choice of "64" is independent of the actual vector
19617 element size; this size simply ensures that the behavior is
19618 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
19620 Due to limitations of those instructions, use of VST1.64/VLD1.64
19621 is not possible if:
19622 - the address contains PRE_DEC, or
19623 - the mode refers to more than 4 double-word registers
19625 In those cases, it would be possible to replace VSTM/VLDM by a
19626 sequence of instructions; this is not currently implemented since
19627 this is not certain to actually improve performance. */
19630 output_move_neon (rtx *operands)
19632 rtx reg, mem, addr, ops[2];
19633 int regno, nregs, load = REG_P (operands[0]);
19638 reg = operands[!load];
19639 mem = operands[load];
19641 mode = GET_MODE (reg);
19643 gcc_assert (REG_P (reg));
19644 regno = REGNO (reg);
19645 nregs = REG_NREGS (reg) / 2;
19646 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
19647 || NEON_REGNO_OK_FOR_QUAD (regno));
19648 gcc_assert (VALID_NEON_DREG_MODE (mode)
19649 || VALID_NEON_QREG_MODE (mode)
19650 || VALID_NEON_STRUCT_MODE (mode));
19651 gcc_assert (MEM_P (mem));
19653 addr = XEXP (mem, 0);
19655 /* Strip off const from addresses like (const (plus (...))). */
19656 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19657 addr = XEXP (addr, 0);
19659 switch (GET_CODE (addr))
19662 /* We have to use vldm / vstm for too-large modes. */
19665 templ = "v%smia%%?\t%%0!, %%h1";
19666 ops[0] = XEXP (addr, 0);
19670 templ = "v%s1.64\t%%h1, %%A0";
19677 /* We have to use vldm / vstm in this case, since there is no
19678 pre-decrement form of the vld1 / vst1 instructions. */
19679 templ = "v%smdb%%?\t%%0!, %%h1";
19680 ops[0] = XEXP (addr, 0);
19685 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19686 gcc_unreachable ();
19689 /* We have to use vldm / vstm for too-large modes. */
19693 templ = "v%smia%%?\t%%m0, %%h1";
19695 templ = "v%s1.64\t%%h1, %%A0";
19701 /* Fall through. */
19707 for (i = 0; i < nregs; i++)
19709 /* We're only using DImode here because it's a convenient size. */
19710 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19711 ops[1] = adjust_address (mem, DImode, 8 * i);
19712 if (reg_overlap_mentioned_p (ops[0], mem))
19714 gcc_assert (overlap == -1);
19719 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19720 output_asm_insn (buff, ops);
19725 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19726 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19727 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19728 output_asm_insn (buff, ops);
19735 gcc_unreachable ();
19738 sprintf (buff, templ, load ? "ld" : "st");
19739 output_asm_insn (buff, ops);
19744 /* Compute and return the length of neon_mov<mode>, where <mode> is
19745 one of VSTRUCT modes: EI, OI, CI or XI. */
19747 arm_attr_length_move_neon (rtx_insn *insn)
19749 rtx reg, mem, addr;
19753 extract_insn_cached (insn);
19755 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19757 mode = GET_MODE (recog_data.operand[0]);
19768 gcc_unreachable ();
19772 load = REG_P (recog_data.operand[0]);
19773 reg = recog_data.operand[!load];
19774 mem = recog_data.operand[load];
19776 gcc_assert (MEM_P (mem));
19778 addr = XEXP (mem, 0);
19780 /* Strip off const from addresses like (const (plus (...))). */
19781 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19782 addr = XEXP (addr, 0);
19784 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19786 int insns = REG_NREGS (reg) / 2;
19793 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19797 arm_address_offset_is_imm (rtx_insn *insn)
19801 extract_insn_cached (insn);
19803 if (REG_P (recog_data.operand[0]))
19806 mem = recog_data.operand[0];
19808 gcc_assert (MEM_P (mem));
19810 addr = XEXP (mem, 0);
19813 || (GET_CODE (addr) == PLUS
19814 && REG_P (XEXP (addr, 0))
19815 && CONST_INT_P (XEXP (addr, 1))))
19821 /* Output an ADD r, s, #n where n may be too big for one instruction.
19822 If adding zero to one register, output nothing. */
19824 output_add_immediate (rtx *operands)
19826 HOST_WIDE_INT n = INTVAL (operands[2]);
19828 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19831 output_multi_immediate (operands,
19832 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19835 output_multi_immediate (operands,
19836 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19843 /* Output a multiple immediate operation.
19844 OPERANDS is the vector of operands referred to in the output patterns.
19845 INSTR1 is the output pattern to use for the first constant.
19846 INSTR2 is the output pattern to use for subsequent constants.
19847 IMMED_OP is the index of the constant slot in OPERANDS.
19848 N is the constant value. */
19849 static const char *
19850 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19851 int immed_op, HOST_WIDE_INT n)
19853 #if HOST_BITS_PER_WIDE_INT > 32
19859 /* Quick and easy output. */
19860 operands[immed_op] = const0_rtx;
19861 output_asm_insn (instr1, operands);
19866 const char * instr = instr1;
19868 /* Note that n is never zero here (which would give no output). */
19869 for (i = 0; i < 32; i += 2)
19873 operands[immed_op] = GEN_INT (n & (255 << i));
19874 output_asm_insn (instr, operands);
19884 /* Return the name of a shifter operation. */
19885 static const char *
19886 arm_shift_nmem(enum rtx_code code)
19891 return ARM_LSL_NAME;
19907 /* Return the appropriate ARM instruction for the operation code.
19908 The returned result should not be overwritten. OP is the rtx of the
19909 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19912 arithmetic_instr (rtx op, int shift_first_arg)
19914 switch (GET_CODE (op))
19920 return shift_first_arg ? "rsb" : "sub";
19935 return arm_shift_nmem(GET_CODE(op));
19938 gcc_unreachable ();
19942 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19943 for the operation code. The returned result should not be overwritten.
19944 OP is the rtx code of the shift.
19945 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19947 static const char *
19948 shift_op (rtx op, HOST_WIDE_INT *amountp)
19951 enum rtx_code code = GET_CODE (op);
19956 if (!CONST_INT_P (XEXP (op, 1)))
19958 output_operand_lossage ("invalid shift operand");
19963 *amountp = 32 - INTVAL (XEXP (op, 1));
19971 mnem = arm_shift_nmem(code);
19972 if (CONST_INT_P (XEXP (op, 1)))
19974 *amountp = INTVAL (XEXP (op, 1));
19976 else if (REG_P (XEXP (op, 1)))
19983 output_operand_lossage ("invalid shift operand");
19989 /* We never have to worry about the amount being other than a
19990 power of 2, since this case can never be reloaded from a reg. */
19991 if (!CONST_INT_P (XEXP (op, 1)))
19993 output_operand_lossage ("invalid shift operand");
19997 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19999 /* Amount must be a power of two. */
20000 if (*amountp & (*amountp - 1))
20002 output_operand_lossage ("invalid shift operand");
20006 *amountp = exact_log2 (*amountp);
20007 gcc_assert (IN_RANGE (*amountp, 0, 31));
20008 return ARM_LSL_NAME;
20011 output_operand_lossage ("invalid shift operand");
20015 /* This is not 100% correct, but follows from the desire to merge
20016 multiplication by a power of 2 with the recognizer for a
20017 shift. >=32 is not a valid shift for "lsl", so we must try and
20018 output a shift that produces the correct arithmetical result.
20019 Using lsr #32 is identical except for the fact that the carry bit
20020 is not set correctly if we set the flags; but we never use the
20021 carry bit from such an operation, so we can ignore that. */
20022 if (code == ROTATERT)
20023 /* Rotate is just modulo 32. */
20025 else if (*amountp != (*amountp & 31))
20027 if (code == ASHIFT)
20032 /* Shifts of 0 are no-ops. */
20039 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20040 because /bin/as is horribly restrictive. The judgement about
20041 whether or not each character is 'printable' (and can be output as
20042 is) or not (and must be printed with an octal escape) must be made
20043 with reference to the *host* character set -- the situation is
20044 similar to that discussed in the comments above pp_c_char in
20045 c-pretty-print.c. */
20047 #define MAX_ASCII_LEN 51
20050 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20053 int len_so_far = 0;
20055 fputs ("\t.ascii\t\"", stream);
20057 for (i = 0; i < len; i++)
20061 if (len_so_far >= MAX_ASCII_LEN)
20063 fputs ("\"\n\t.ascii\t\"", stream);
20069 if (c == '\\' || c == '\"')
20071 putc ('\\', stream);
20079 fprintf (stream, "\\%03o", c);
20084 fputs ("\"\n", stream);
20088 /* Compute the register save mask for registers 0 through 12
20089 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20091 static unsigned long
20092 arm_compute_save_reg0_reg12_mask (void)
20094 unsigned long func_type = arm_current_func_type ();
20095 unsigned long save_reg_mask = 0;
20098 if (IS_INTERRUPT (func_type))
20100 unsigned int max_reg;
20101 /* Interrupt functions must not corrupt any registers,
20102 even call clobbered ones. If this is a leaf function
20103 we can just examine the registers used by the RTL, but
20104 otherwise we have to assume that whatever function is
20105 called might clobber anything, and so we have to save
20106 all the call-clobbered registers as well. */
20107 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20108 /* FIQ handlers have registers r8 - r12 banked, so
20109 we only need to check r0 - r7, Normal ISRs only
20110 bank r14 and r15, so we must check up to r12.
20111 r13 is the stack pointer which is always preserved,
20112 so we do not need to consider it here. */
20117 for (reg = 0; reg <= max_reg; reg++)
20118 if (df_regs_ever_live_p (reg)
20119 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg)))
20120 save_reg_mask |= (1 << reg);
20122 /* Also save the pic base register if necessary. */
20123 if (PIC_REGISTER_MAY_NEED_SAVING
20124 && crtl->uses_pic_offset_table)
20125 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20127 else if (IS_VOLATILE(func_type))
20129 /* For noreturn functions we historically omitted register saves
20130 altogether. However this really messes up debugging. As a
20131 compromise save just the frame pointers. Combined with the link
20132 register saved elsewhere this should be sufficient to get
20134 if (frame_pointer_needed)
20135 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20136 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20137 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20138 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20139 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20143 /* In the normal case we only need to save those registers
20144 which are call saved and which are used by this function. */
20145 for (reg = 0; reg <= 11; reg++)
20146 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20147 save_reg_mask |= (1 << reg);
20149 /* Handle the frame pointer as a special case. */
20150 if (frame_pointer_needed)
20151 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20153 /* If we aren't loading the PIC register,
20154 don't stack it even though it may be live. */
20155 if (PIC_REGISTER_MAY_NEED_SAVING
20156 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20157 || crtl->uses_pic_offset_table))
20158 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20160 /* The prologue will copy SP into R0, so save it. */
20161 if (IS_STACKALIGN (func_type))
20162 save_reg_mask |= 1;
20165 /* Save registers so the exception handler can modify them. */
20166 if (crtl->calls_eh_return)
20172 reg = EH_RETURN_DATA_REGNO (i);
20173 if (reg == INVALID_REGNUM)
20175 save_reg_mask |= 1 << reg;
20179 return save_reg_mask;
20182 /* Return true if r3 is live at the start of the function. */
20185 arm_r3_live_at_start_p (void)
20187 /* Just look at cfg info, which is still close enough to correct at this
20188 point. This gives false positives for broken functions that might use
20189 uninitialized data that happens to be allocated in r3, but who cares? */
20190 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20193 /* Compute the number of bytes used to store the static chain register on the
20194 stack, above the stack frame. We need to know this accurately to get the
20195 alignment of the rest of the stack frame correct. */
20198 arm_compute_static_chain_stack_bytes (void)
20200 /* Once the value is updated from the init value of -1, do not
20202 if (cfun->machine->static_chain_stack_bytes != -1)
20203 return cfun->machine->static_chain_stack_bytes;
20205 /* See the defining assertion in arm_expand_prologue. */
20206 if (IS_NESTED (arm_current_func_type ())
20207 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20208 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20209 || flag_stack_clash_protection)
20210 && !df_regs_ever_live_p (LR_REGNUM)))
20211 && arm_r3_live_at_start_p ()
20212 && crtl->args.pretend_args_size == 0)
20218 /* Compute a bit mask of which core registers need to be
20219 saved on the stack for the current function.
20220 This is used by arm_compute_frame_layout, which may add extra registers. */
20222 static unsigned long
20223 arm_compute_save_core_reg_mask (void)
20225 unsigned int save_reg_mask = 0;
20226 unsigned long func_type = arm_current_func_type ();
20229 if (IS_NAKED (func_type))
20230 /* This should never really happen. */
20233 /* If we are creating a stack frame, then we must save the frame pointer,
20234 IP (which will hold the old stack pointer), LR and the PC. */
20235 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20237 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20240 | (1 << PC_REGNUM);
20242 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20244 /* Decide if we need to save the link register.
20245 Interrupt routines have their own banked link register,
20246 so they never need to save it.
20247 Otherwise if we do not use the link register we do not need to save
20248 it. If we are pushing other registers onto the stack however, we
20249 can save an instruction in the epilogue by pushing the link register
20250 now and then popping it back into the PC. This incurs extra memory
20251 accesses though, so we only do it when optimizing for size, and only
20252 if we know that we will not need a fancy return sequence. */
20253 if (df_regs_ever_live_p (LR_REGNUM)
20256 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20257 && !crtl->tail_call_emit
20258 && !crtl->calls_eh_return))
20259 save_reg_mask |= 1 << LR_REGNUM;
20261 if (cfun->machine->lr_save_eliminated)
20262 save_reg_mask &= ~ (1 << LR_REGNUM);
20264 if (TARGET_REALLY_IWMMXT
20265 && ((bit_count (save_reg_mask)
20266 + ARM_NUM_INTS (crtl->args.pretend_args_size +
20267 arm_compute_static_chain_stack_bytes())
20270 /* The total number of registers that are going to be pushed
20271 onto the stack is odd. We need to ensure that the stack
20272 is 64-bit aligned before we start to save iWMMXt registers,
20273 and also before we start to create locals. (A local variable
20274 might be a double or long long which we will load/store using
20275 an iWMMXt instruction). Therefore we need to push another
20276 ARM register, so that the stack will be 64-bit aligned. We
20277 try to avoid using the arg registers (r0 -r3) as they might be
20278 used to pass values in a tail call. */
20279 for (reg = 4; reg <= 12; reg++)
20280 if ((save_reg_mask & (1 << reg)) == 0)
20284 save_reg_mask |= (1 << reg);
20287 cfun->machine->sibcall_blocked = 1;
20288 save_reg_mask |= (1 << 3);
20292 /* We may need to push an additional register for use initializing the
20293 PIC base register. */
20294 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
20295 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
20297 reg = thumb_find_work_register (1 << 4);
20298 if (!call_used_or_fixed_reg_p (reg))
20299 save_reg_mask |= (1 << reg);
20302 return save_reg_mask;
20305 /* Compute a bit mask of which core registers need to be
20306 saved on the stack for the current function. */
20307 static unsigned long
20308 thumb1_compute_save_core_reg_mask (void)
20310 unsigned long mask;
20314 for (reg = 0; reg < 12; reg ++)
20315 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20318 /* Handle the frame pointer as a special case. */
20319 if (frame_pointer_needed)
20320 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20323 && !TARGET_SINGLE_PIC_BASE
20324 && arm_pic_register != INVALID_REGNUM
20325 && crtl->uses_pic_offset_table)
20326 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20328 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
20329 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20330 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20332 /* LR will also be pushed if any lo regs are pushed. */
20333 if (mask & 0xff || thumb_force_lr_save ())
20334 mask |= (1 << LR_REGNUM);
20336 bool call_clobbered_scratch
20337 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
20338 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
20340 /* Make sure we have a low work register if we need one. We will
20341 need one if we are going to push a high register, but we are not
20342 currently intending to push a low register. However if both the
20343 prologue and epilogue have a spare call-clobbered low register,
20344 then we won't need to find an additional work register. It does
20345 not need to be the same register in the prologue and
20347 if ((mask & 0xff) == 0
20348 && !call_clobbered_scratch
20349 && ((mask & 0x0f00) || TARGET_BACKTRACE))
20351 /* Use thumb_find_work_register to choose which register
20352 we will use. If the register is live then we will
20353 have to push it. Use LAST_LO_REGNUM as our fallback
20354 choice for the register to select. */
20355 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
20356 /* Make sure the register returned by thumb_find_work_register is
20357 not part of the return value. */
20358 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
20359 reg = LAST_LO_REGNUM;
20361 if (callee_saved_reg_p (reg))
20365 /* The 504 below is 8 bytes less than 512 because there are two possible
20366 alignment words. We can't tell here if they will be present or not so we
20367 have to play it safe and assume that they are. */
20368 if ((CALLER_INTERWORKING_SLOT_SIZE +
20369 ROUND_UP_WORD (get_frame_size ()) +
20370 crtl->outgoing_args_size) >= 504)
20372 /* This is the same as the code in thumb1_expand_prologue() which
20373 determines which register to use for stack decrement. */
20374 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
20375 if (mask & (1 << reg))
20378 if (reg > LAST_LO_REGNUM)
20380 /* Make sure we have a register available for stack decrement. */
20381 mask |= 1 << LAST_LO_REGNUM;
20389 /* Return the number of bytes required to save VFP registers. */
20391 arm_get_vfp_saved_size (void)
20393 unsigned int regno;
20398 /* Space for saved VFP registers. */
20399 if (TARGET_HARD_FLOAT)
20402 for (regno = FIRST_VFP_REGNUM;
20403 regno < LAST_VFP_REGNUM;
20406 if ((!df_regs_ever_live_p (regno)
20407 || call_used_or_fixed_reg_p (regno))
20408 && (!df_regs_ever_live_p (regno + 1)
20409 || call_used_or_fixed_reg_p (regno + 1)))
20413 /* Workaround ARM10 VFPr1 bug. */
20414 if (count == 2 && !arm_arch6)
20416 saved += count * 8;
20425 if (count == 2 && !arm_arch6)
20427 saved += count * 8;
20434 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
20435 everything bar the final return instruction. If simple_return is true,
20436 then do not output epilogue, because it has already been emitted in RTL.
20438 Note: do not forget to update length attribute of corresponding insn pattern
20439 when changing assembly output (eg. length attribute of
20440 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
20441 register clearing sequences). */
20443 output_return_instruction (rtx operand, bool really_return, bool reverse,
20444 bool simple_return)
20446 char conditional[10];
20449 unsigned long live_regs_mask;
20450 unsigned long func_type;
20451 arm_stack_offsets *offsets;
20453 func_type = arm_current_func_type ();
20455 if (IS_NAKED (func_type))
20458 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
20460 /* If this function was declared non-returning, and we have
20461 found a tail call, then we have to trust that the called
20462 function won't return. */
20467 /* Otherwise, trap an attempted return by aborting. */
20469 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
20471 assemble_external_libcall (ops[1]);
20472 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
20478 gcc_assert (!cfun->calls_alloca || really_return);
20480 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
20482 cfun->machine->return_used_this_function = 1;
20484 offsets = arm_get_frame_offsets ();
20485 live_regs_mask = offsets->saved_regs_mask;
20487 if (!simple_return && live_regs_mask)
20489 const char * return_reg;
20491 /* If we do not have any special requirements for function exit
20492 (e.g. interworking) then we can load the return address
20493 directly into the PC. Otherwise we must load it into LR. */
20495 && !IS_CMSE_ENTRY (func_type)
20496 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
20497 return_reg = reg_names[PC_REGNUM];
20499 return_reg = reg_names[LR_REGNUM];
20501 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
20503 /* There are three possible reasons for the IP register
20504 being saved. 1) a stack frame was created, in which case
20505 IP contains the old stack pointer, or 2) an ISR routine
20506 corrupted it, or 3) it was saved to align the stack on
20507 iWMMXt. In case 1, restore IP into SP, otherwise just
20509 if (frame_pointer_needed)
20511 live_regs_mask &= ~ (1 << IP_REGNUM);
20512 live_regs_mask |= (1 << SP_REGNUM);
20515 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
20518 /* On some ARM architectures it is faster to use LDR rather than
20519 LDM to load a single register. On other architectures, the
20520 cost is the same. In 26 bit mode, or for exception handlers,
20521 we have to use LDM to load the PC so that the CPSR is also
20523 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
20524 if (live_regs_mask == (1U << reg))
20527 if (reg <= LAST_ARM_REGNUM
20528 && (reg != LR_REGNUM
20530 || ! IS_INTERRUPT (func_type)))
20532 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
20533 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
20540 /* Generate the load multiple instruction to restore the
20541 registers. Note we can get here, even if
20542 frame_pointer_needed is true, but only if sp already
20543 points to the base of the saved core registers. */
20544 if (live_regs_mask & (1 << SP_REGNUM))
20546 unsigned HOST_WIDE_INT stack_adjust;
20548 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
20549 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
20551 if (stack_adjust && arm_arch5t && TARGET_ARM)
20552 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
20555 /* If we can't use ldmib (SA110 bug),
20556 then try to pop r3 instead. */
20558 live_regs_mask |= 1 << 3;
20560 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
20563 /* For interrupt returns we have to use an LDM rather than
20564 a POP so that we can use the exception return variant. */
20565 else if (IS_INTERRUPT (func_type))
20566 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
20568 sprintf (instr, "pop%s\t{", conditional);
20570 p = instr + strlen (instr);
20572 for (reg = 0; reg <= SP_REGNUM; reg++)
20573 if (live_regs_mask & (1 << reg))
20575 int l = strlen (reg_names[reg]);
20581 memcpy (p, ", ", 2);
20585 memcpy (p, "%|", 2);
20586 memcpy (p + 2, reg_names[reg], l);
20590 if (live_regs_mask & (1 << LR_REGNUM))
20592 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
20593 /* If returning from an interrupt, restore the CPSR. */
20594 if (IS_INTERRUPT (func_type))
20601 output_asm_insn (instr, & operand);
20603 /* See if we need to generate an extra instruction to
20604 perform the actual function return. */
20606 && func_type != ARM_FT_INTERWORKED
20607 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
20609 /* The return has already been handled
20610 by loading the LR into the PC. */
20617 switch ((int) ARM_FUNC_TYPE (func_type))
20621 /* ??? This is wrong for unified assembly syntax. */
20622 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
20625 case ARM_FT_INTERWORKED:
20626 gcc_assert (arm_arch5t || arm_arch4t);
20627 sprintf (instr, "bx%s\t%%|lr", conditional);
20630 case ARM_FT_EXCEPTION:
20631 /* ??? This is wrong for unified assembly syntax. */
20632 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
20636 if (IS_CMSE_ENTRY (func_type))
20638 /* Check if we have to clear the 'GE bits' which is only used if
20639 parallel add and subtraction instructions are available. */
20640 if (TARGET_INT_SIMD)
20641 snprintf (instr, sizeof (instr),
20642 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
20644 snprintf (instr, sizeof (instr),
20645 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
20647 output_asm_insn (instr, & operand);
20648 if (TARGET_HARD_FLOAT)
20650 /* Clear the cumulative exception-status bits (0-4,7) and the
20651 condition code bits (28-31) of the FPSCR. We need to
20652 remember to clear the first scratch register used (IP) and
20653 save and restore the second (r4). */
20654 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
20655 output_asm_insn (instr, & operand);
20656 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
20657 output_asm_insn (instr, & operand);
20658 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
20659 output_asm_insn (instr, & operand);
20660 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
20661 output_asm_insn (instr, & operand);
20662 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
20663 output_asm_insn (instr, & operand);
20664 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
20665 output_asm_insn (instr, & operand);
20666 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
20667 output_asm_insn (instr, & operand);
20668 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20669 output_asm_insn (instr, & operand);
20671 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20673 /* Use bx if it's available. */
20674 else if (arm_arch5t || arm_arch4t)
20675 sprintf (instr, "bx%s\t%%|lr", conditional);
20677 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20681 output_asm_insn (instr, & operand);
20687 /* Output in FILE asm statements needed to declare the NAME of the function
20688 defined by its DECL node. */
20691 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20693 size_t cmse_name_len;
20694 char *cmse_name = 0;
20695 char cmse_prefix[] = "__acle_se_";
20697 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20698 extra function label for each function with the 'cmse_nonsecure_entry'
20699 attribute. This extra function label should be prepended with
20700 '__acle_se_', telling the linker that it needs to create secure gateway
20701 veneers for this function. */
20702 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20703 DECL_ATTRIBUTES (decl)))
20705 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20706 cmse_name = XALLOCAVEC (char, cmse_name_len);
20707 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20708 targetm.asm_out.globalize_label (file, cmse_name);
20710 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20711 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20714 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20715 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20716 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20717 ASM_OUTPUT_LABEL (file, name);
20720 ASM_OUTPUT_LABEL (file, cmse_name);
20722 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20725 /* Write the function name into the code section, directly preceding
20726 the function prologue.
20728 Code will be output similar to this:
20730 .ascii "arm_poke_function_name", 0
20733 .word 0xff000000 + (t1 - t0)
20734 arm_poke_function_name
20736 stmfd sp!, {fp, ip, lr, pc}
20739 When performing a stack backtrace, code can inspect the value
20740 of 'pc' stored at 'fp' + 0. If the trace function then looks
20741 at location pc - 12 and the top 8 bits are set, then we know
20742 that there is a function name embedded immediately preceding this
20743 location and has length ((pc[-3]) & 0xff000000).
20745 We assume that pc is declared as a pointer to an unsigned long.
20747 It is of no benefit to output the function name if we are assembling
20748 a leaf function. These function types will not contain a stack
20749 backtrace structure, therefore it is not possible to determine the
20752 arm_poke_function_name (FILE *stream, const char *name)
20754 unsigned long alignlength;
20755 unsigned long length;
20758 length = strlen (name) + 1;
20759 alignlength = ROUND_UP_WORD (length);
20761 ASM_OUTPUT_ASCII (stream, name, length);
20762 ASM_OUTPUT_ALIGN (stream, 2);
20763 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20764 assemble_aligned_integer (UNITS_PER_WORD, x);
20767 /* Place some comments into the assembler stream
20768 describing the current function. */
20770 arm_output_function_prologue (FILE *f)
20772 unsigned long func_type;
20774 /* Sanity check. */
20775 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20777 func_type = arm_current_func_type ();
20779 switch ((int) ARM_FUNC_TYPE (func_type))
20782 case ARM_FT_NORMAL:
20784 case ARM_FT_INTERWORKED:
20785 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20788 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20791 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20793 case ARM_FT_EXCEPTION:
20794 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20798 if (IS_NAKED (func_type))
20799 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20801 if (IS_VOLATILE (func_type))
20802 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20804 if (IS_NESTED (func_type))
20805 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20806 if (IS_STACKALIGN (func_type))
20807 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20808 if (IS_CMSE_ENTRY (func_type))
20809 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20811 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20812 (HOST_WIDE_INT) crtl->args.size,
20813 crtl->args.pretend_args_size,
20814 (HOST_WIDE_INT) get_frame_size ());
20816 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20817 frame_pointer_needed,
20818 cfun->machine->uses_anonymous_args);
20820 if (cfun->machine->lr_save_eliminated)
20821 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20823 if (crtl->calls_eh_return)
20824 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20829 arm_output_function_epilogue (FILE *)
20831 arm_stack_offsets *offsets;
20837 /* Emit any call-via-reg trampolines that are needed for v4t support
20838 of call_reg and call_value_reg type insns. */
20839 for (regno = 0; regno < LR_REGNUM; regno++)
20841 rtx label = cfun->machine->call_via[regno];
20845 switch_to_section (function_section (current_function_decl));
20846 targetm.asm_out.internal_label (asm_out_file, "L",
20847 CODE_LABEL_NUMBER (label));
20848 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20852 /* ??? Probably not safe to set this here, since it assumes that a
20853 function will be emitted as assembly immediately after we generate
20854 RTL for it. This does not happen for inline functions. */
20855 cfun->machine->return_used_this_function = 0;
20857 else /* TARGET_32BIT */
20859 /* We need to take into account any stack-frame rounding. */
20860 offsets = arm_get_frame_offsets ();
20862 gcc_assert (!use_return_insn (FALSE, NULL)
20863 || (cfun->machine->return_used_this_function != 0)
20864 || offsets->saved_regs == offsets->outgoing_args
20865 || frame_pointer_needed);
20869 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20870 STR and STRD. If an even number of registers are being pushed, one
20871 or more STRD patterns are created for each register pair. If an
20872 odd number of registers are pushed, emit an initial STR followed by
20873 as many STRD instructions as are needed. This works best when the
20874 stack is initially 64-bit aligned (the normal case), since it
20875 ensures that each STRD is also 64-bit aligned. */
20877 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20882 rtx par = NULL_RTX;
20883 rtx dwarf = NULL_RTX;
20887 num_regs = bit_count (saved_regs_mask);
20889 /* Must be at least one register to save, and can't save SP or PC. */
20890 gcc_assert (num_regs > 0 && num_regs <= 14);
20891 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20892 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20894 /* Create sequence for DWARF info. All the frame-related data for
20895 debugging is held in this wrapper. */
20896 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20898 /* Describe the stack adjustment. */
20899 tmp = gen_rtx_SET (stack_pointer_rtx,
20900 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20901 RTX_FRAME_RELATED_P (tmp) = 1;
20902 XVECEXP (dwarf, 0, 0) = tmp;
20904 /* Find the first register. */
20905 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20910 /* If there's an odd number of registers to push. Start off by
20911 pushing a single register. This ensures that subsequent strd
20912 operations are dword aligned (assuming that SP was originally
20913 64-bit aligned). */
20914 if ((num_regs & 1) != 0)
20916 rtx reg, mem, insn;
20918 reg = gen_rtx_REG (SImode, regno);
20920 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20921 stack_pointer_rtx));
20923 mem = gen_frame_mem (Pmode,
20925 (Pmode, stack_pointer_rtx,
20926 plus_constant (Pmode, stack_pointer_rtx,
20929 tmp = gen_rtx_SET (mem, reg);
20930 RTX_FRAME_RELATED_P (tmp) = 1;
20931 insn = emit_insn (tmp);
20932 RTX_FRAME_RELATED_P (insn) = 1;
20933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20934 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20935 RTX_FRAME_RELATED_P (tmp) = 1;
20938 XVECEXP (dwarf, 0, i) = tmp;
20942 while (i < num_regs)
20943 if (saved_regs_mask & (1 << regno))
20945 rtx reg1, reg2, mem1, mem2;
20946 rtx tmp0, tmp1, tmp2;
20949 /* Find the register to pair with this one. */
20950 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20954 reg1 = gen_rtx_REG (SImode, regno);
20955 reg2 = gen_rtx_REG (SImode, regno2);
20962 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20965 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20967 -4 * (num_regs - 1)));
20968 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20969 plus_constant (Pmode, stack_pointer_rtx,
20971 tmp1 = gen_rtx_SET (mem1, reg1);
20972 tmp2 = gen_rtx_SET (mem2, reg2);
20973 RTX_FRAME_RELATED_P (tmp0) = 1;
20974 RTX_FRAME_RELATED_P (tmp1) = 1;
20975 RTX_FRAME_RELATED_P (tmp2) = 1;
20976 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20977 XVECEXP (par, 0, 0) = tmp0;
20978 XVECEXP (par, 0, 1) = tmp1;
20979 XVECEXP (par, 0, 2) = tmp2;
20980 insn = emit_insn (par);
20981 RTX_FRAME_RELATED_P (insn) = 1;
20982 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20986 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20989 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20992 tmp1 = gen_rtx_SET (mem1, reg1);
20993 tmp2 = gen_rtx_SET (mem2, reg2);
20994 RTX_FRAME_RELATED_P (tmp1) = 1;
20995 RTX_FRAME_RELATED_P (tmp2) = 1;
20996 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20997 XVECEXP (par, 0, 0) = tmp1;
20998 XVECEXP (par, 0, 1) = tmp2;
21002 /* Create unwind information. This is an approximation. */
21003 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21004 plus_constant (Pmode,
21008 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21009 plus_constant (Pmode,
21014 RTX_FRAME_RELATED_P (tmp1) = 1;
21015 RTX_FRAME_RELATED_P (tmp2) = 1;
21016 XVECEXP (dwarf, 0, i + 1) = tmp1;
21017 XVECEXP (dwarf, 0, i + 2) = tmp2;
21019 regno = regno2 + 1;
21027 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21028 whenever possible, otherwise it emits single-word stores. The first store
21029 also allocates stack space for all saved registers, using writeback with
21030 post-addressing mode. All other stores use offset addressing. If no STRD
21031 can be emitted, this function emits a sequence of single-word stores,
21032 and not an STM as before, because single-word stores provide more freedom
21033 scheduling and can be turned into an STM by peephole optimizations. */
21035 arm_emit_strd_push (unsigned long saved_regs_mask)
21038 int i, j, dwarf_index = 0;
21040 rtx dwarf = NULL_RTX;
21041 rtx insn = NULL_RTX;
21044 /* TODO: A more efficient code can be emitted by changing the
21045 layout, e.g., first push all pairs that can use STRD to keep the
21046 stack aligned, and then push all other registers. */
21047 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21048 if (saved_regs_mask & (1 << i))
21051 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21052 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21053 gcc_assert (num_regs > 0);
21055 /* Create sequence for DWARF info. */
21056 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21058 /* For dwarf info, we generate explicit stack update. */
21059 tmp = gen_rtx_SET (stack_pointer_rtx,
21060 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21061 RTX_FRAME_RELATED_P (tmp) = 1;
21062 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21064 /* Save registers. */
21065 offset = - 4 * num_regs;
21067 while (j <= LAST_ARM_REGNUM)
21068 if (saved_regs_mask & (1 << j))
21071 && (saved_regs_mask & (1 << (j + 1))))
21073 /* Current register and previous register form register pair for
21074 which STRD can be generated. */
21077 /* Allocate stack space for all saved registers. */
21078 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21079 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21080 mem = gen_frame_mem (DImode, tmp);
21083 else if (offset > 0)
21084 mem = gen_frame_mem (DImode,
21085 plus_constant (Pmode,
21089 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21091 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21092 RTX_FRAME_RELATED_P (tmp) = 1;
21093 tmp = emit_insn (tmp);
21095 /* Record the first store insn. */
21096 if (dwarf_index == 1)
21099 /* Generate dwarf info. */
21100 mem = gen_frame_mem (SImode,
21101 plus_constant (Pmode,
21104 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21105 RTX_FRAME_RELATED_P (tmp) = 1;
21106 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21108 mem = gen_frame_mem (SImode,
21109 plus_constant (Pmode,
21112 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21113 RTX_FRAME_RELATED_P (tmp) = 1;
21114 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21121 /* Emit a single word store. */
21124 /* Allocate stack space for all saved registers. */
21125 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21126 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21127 mem = gen_frame_mem (SImode, tmp);
21130 else if (offset > 0)
21131 mem = gen_frame_mem (SImode,
21132 plus_constant (Pmode,
21136 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21138 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21139 RTX_FRAME_RELATED_P (tmp) = 1;
21140 tmp = emit_insn (tmp);
21142 /* Record the first store insn. */
21143 if (dwarf_index == 1)
21146 /* Generate dwarf info. */
21147 mem = gen_frame_mem (SImode,
21148 plus_constant(Pmode,
21151 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21152 RTX_FRAME_RELATED_P (tmp) = 1;
21153 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21162 /* Attach dwarf info to the first insn we generate. */
21163 gcc_assert (insn != NULL_RTX);
21164 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21165 RTX_FRAME_RELATED_P (insn) = 1;
21168 /* Generate and emit an insn that we will recognize as a push_multi.
21169 Unfortunately, since this insn does not reflect very well the actual
21170 semantics of the operation, we need to annotate the insn for the benefit
21171 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21172 MASK for registers that should be annotated for DWARF2 frame unwind
21175 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21178 int num_dwarf_regs = 0;
21182 int dwarf_par_index;
21185 /* We don't record the PC in the dwarf frame information. */
21186 dwarf_regs_mask &= ~(1 << PC_REGNUM);
21188 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21190 if (mask & (1 << i))
21192 if (dwarf_regs_mask & (1 << i))
21196 gcc_assert (num_regs && num_regs <= 16);
21197 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21199 /* For the body of the insn we are going to generate an UNSPEC in
21200 parallel with several USEs. This allows the insn to be recognized
21201 by the push_multi pattern in the arm.md file.
21203 The body of the insn looks something like this:
21206 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21207 (const_int:SI <num>)))
21208 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21214 For the frame note however, we try to be more explicit and actually
21215 show each register being stored into the stack frame, plus a (single)
21216 decrement of the stack pointer. We do it this way in order to be
21217 friendly to the stack unwinding code, which only wants to see a single
21218 stack decrement per instruction. The RTL we generate for the note looks
21219 something like this:
21222 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21223 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21224 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21225 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21229 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21230 instead we'd have a parallel expression detailing all
21231 the stores to the various memory addresses so that debug
21232 information is more up-to-date. Remember however while writing
21233 this to take care of the constraints with the push instruction.
21235 Note also that this has to be taken care of for the VFP registers.
21237 For more see PR43399. */
21239 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21240 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21241 dwarf_par_index = 1;
21243 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21245 if (mask & (1 << i))
21247 reg = gen_rtx_REG (SImode, i);
21249 XVECEXP (par, 0, 0)
21250 = gen_rtx_SET (gen_frame_mem
21252 gen_rtx_PRE_MODIFY (Pmode,
21255 (Pmode, stack_pointer_rtx,
21258 gen_rtx_UNSPEC (BLKmode,
21259 gen_rtvec (1, reg),
21260 UNSPEC_PUSH_MULT));
21262 if (dwarf_regs_mask & (1 << i))
21264 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21266 RTX_FRAME_RELATED_P (tmp) = 1;
21267 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21274 for (j = 1, i++; j < num_regs; i++)
21276 if (mask & (1 << i))
21278 reg = gen_rtx_REG (SImode, i);
21280 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
21282 if (dwarf_regs_mask & (1 << i))
21285 = gen_rtx_SET (gen_frame_mem
21287 plus_constant (Pmode, stack_pointer_rtx,
21290 RTX_FRAME_RELATED_P (tmp) = 1;
21291 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21298 par = emit_insn (par);
21300 tmp = gen_rtx_SET (stack_pointer_rtx,
21301 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21302 RTX_FRAME_RELATED_P (tmp) = 1;
21303 XVECEXP (dwarf, 0, 0) = tmp;
21305 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
21310 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
21311 SIZE is the offset to be adjusted.
21312 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
21314 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
21318 RTX_FRAME_RELATED_P (insn) = 1;
21319 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
21320 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
21323 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
21324 SAVED_REGS_MASK shows which registers need to be restored.
21326 Unfortunately, since this insn does not reflect very well the actual
21327 semantics of the operation, we need to annotate the insn for the benefit
21328 of DWARF2 frame unwind information. */
21330 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
21335 rtx dwarf = NULL_RTX;
21337 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21341 offset_adj = return_in_pc ? 1 : 0;
21342 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21343 if (saved_regs_mask & (1 << i))
21346 gcc_assert (num_regs && num_regs <= 16);
21348 /* If SP is in reglist, then we don't emit SP update insn. */
21349 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
21351 /* The parallel needs to hold num_regs SETs
21352 and one SET for the stack update. */
21353 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
21356 XVECEXP (par, 0, 0) = ret_rtx;
21360 /* Increment the stack pointer, based on there being
21361 num_regs 4-byte registers to restore. */
21362 tmp = gen_rtx_SET (stack_pointer_rtx,
21363 plus_constant (Pmode,
21366 RTX_FRAME_RELATED_P (tmp) = 1;
21367 XVECEXP (par, 0, offset_adj) = tmp;
21370 /* Now restore every reg, which may include PC. */
21371 for (j = 0, i = 0; j < num_regs; i++)
21372 if (saved_regs_mask & (1 << i))
21374 reg = gen_rtx_REG (SImode, i);
21375 if ((num_regs == 1) && emit_update && !return_in_pc)
21377 /* Emit single load with writeback. */
21378 tmp = gen_frame_mem (SImode,
21379 gen_rtx_POST_INC (Pmode,
21380 stack_pointer_rtx));
21381 tmp = emit_insn (gen_rtx_SET (reg, tmp));
21382 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21386 tmp = gen_rtx_SET (reg,
21389 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
21390 RTX_FRAME_RELATED_P (tmp) = 1;
21391 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
21393 /* We need to maintain a sequence for DWARF info too. As dwarf info
21394 should not have PC, skip PC. */
21395 if (i != PC_REGNUM)
21396 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21402 par = emit_jump_insn (par);
21404 par = emit_insn (par);
21406 REG_NOTES (par) = dwarf;
21408 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
21409 stack_pointer_rtx, stack_pointer_rtx);
21412 /* Generate and emit an insn pattern that we will recognize as a pop_multi
21413 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
21415 Unfortunately, since this insn does not reflect very well the actual
21416 semantics of the operation, we need to annotate the insn for the benefit
21417 of DWARF2 frame unwind information. */
21419 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
21423 rtx dwarf = NULL_RTX;
21426 gcc_assert (num_regs && num_regs <= 32);
21428 /* Workaround ARM10 VFPr1 bug. */
21429 if (num_regs == 2 && !arm_arch6)
21431 if (first_reg == 15)
21437 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
21438 there could be up to 32 D-registers to restore.
21439 If there are more than 16 D-registers, make two recursive calls,
21440 each of which emits one pop_multi instruction. */
21443 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
21444 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
21448 /* The parallel needs to hold num_regs SETs
21449 and one SET for the stack update. */
21450 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
21452 /* Increment the stack pointer, based on there being
21453 num_regs 8-byte registers to restore. */
21454 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
21455 RTX_FRAME_RELATED_P (tmp) = 1;
21456 XVECEXP (par, 0, 0) = tmp;
21458 /* Now show every reg that will be restored, using a SET for each. */
21459 for (j = 0, i=first_reg; j < num_regs; i += 2)
21461 reg = gen_rtx_REG (DFmode, i);
21463 tmp = gen_rtx_SET (reg,
21466 plus_constant (Pmode, base_reg, 8 * j)));
21467 RTX_FRAME_RELATED_P (tmp) = 1;
21468 XVECEXP (par, 0, j + 1) = tmp;
21470 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21475 par = emit_insn (par);
21476 REG_NOTES (par) = dwarf;
21478 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
21479 if (REGNO (base_reg) == IP_REGNUM)
21481 RTX_FRAME_RELATED_P (par) = 1;
21482 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
21485 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
21486 base_reg, base_reg);
21489 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
21490 number of registers are being popped, multiple LDRD patterns are created for
21491 all register pairs. If odd number of registers are popped, last register is
21492 loaded by using LDR pattern. */
21494 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
21498 rtx par = NULL_RTX;
21499 rtx dwarf = NULL_RTX;
21500 rtx tmp, reg, tmp1;
21501 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
21503 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21504 if (saved_regs_mask & (1 << i))
21507 gcc_assert (num_regs && num_regs <= 16);
21509 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
21510 to be popped. So, if num_regs is even, now it will become odd,
21511 and we can generate pop with PC. If num_regs is odd, it will be
21512 even now, and ldr with return can be generated for PC. */
21516 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21518 /* Var j iterates over all the registers to gather all the registers in
21519 saved_regs_mask. Var i gives index of saved registers in stack frame.
21520 A PARALLEL RTX of register-pair is created here, so that pattern for
21521 LDRD can be matched. As PC is always last register to be popped, and
21522 we have already decremented num_regs if PC, we don't have to worry
21523 about PC in this loop. */
21524 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
21525 if (saved_regs_mask & (1 << j))
21527 /* Create RTX for memory load. */
21528 reg = gen_rtx_REG (SImode, j);
21529 tmp = gen_rtx_SET (reg,
21530 gen_frame_mem (SImode,
21531 plus_constant (Pmode,
21532 stack_pointer_rtx, 4 * i)));
21533 RTX_FRAME_RELATED_P (tmp) = 1;
21537 /* When saved-register index (i) is even, the RTX to be emitted is
21538 yet to be created. Hence create it first. The LDRD pattern we
21539 are generating is :
21540 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
21541 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
21542 where target registers need not be consecutive. */
21543 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21547 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
21548 added as 0th element and if i is odd, reg_i is added as 1st element
21549 of LDRD pattern shown above. */
21550 XVECEXP (par, 0, (i % 2)) = tmp;
21551 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21555 /* When saved-register index (i) is odd, RTXs for both the registers
21556 to be loaded are generated in above given LDRD pattern, and the
21557 pattern can be emitted now. */
21558 par = emit_insn (par);
21559 REG_NOTES (par) = dwarf;
21560 RTX_FRAME_RELATED_P (par) = 1;
21566 /* If the number of registers pushed is odd AND return_in_pc is false OR
21567 number of registers are even AND return_in_pc is true, last register is
21568 popped using LDR. It can be PC as well. Hence, adjust the stack first and
21569 then LDR with post increment. */
21571 /* Increment the stack pointer, based on there being
21572 num_regs 4-byte registers to restore. */
21573 tmp = gen_rtx_SET (stack_pointer_rtx,
21574 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
21575 RTX_FRAME_RELATED_P (tmp) = 1;
21576 tmp = emit_insn (tmp);
21579 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
21580 stack_pointer_rtx, stack_pointer_rtx);
21585 if (((num_regs % 2) == 1 && !return_in_pc)
21586 || ((num_regs % 2) == 0 && return_in_pc))
21588 /* Scan for the single register to be popped. Skip until the saved
21589 register is found. */
21590 for (; (saved_regs_mask & (1 << j)) == 0; j++);
21592 /* Gen LDR with post increment here. */
21593 tmp1 = gen_rtx_MEM (SImode,
21594 gen_rtx_POST_INC (SImode,
21595 stack_pointer_rtx));
21596 set_mem_alias_set (tmp1, get_frame_alias_set ());
21598 reg = gen_rtx_REG (SImode, j);
21599 tmp = gen_rtx_SET (reg, tmp1);
21600 RTX_FRAME_RELATED_P (tmp) = 1;
21601 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
21605 /* If return_in_pc, j must be PC_REGNUM. */
21606 gcc_assert (j == PC_REGNUM);
21607 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21608 XVECEXP (par, 0, 0) = ret_rtx;
21609 XVECEXP (par, 0, 1) = tmp;
21610 par = emit_jump_insn (par);
21614 par = emit_insn (tmp);
21615 REG_NOTES (par) = dwarf;
21616 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21617 stack_pointer_rtx, stack_pointer_rtx);
21621 else if ((num_regs % 2) == 1 && return_in_pc)
21623 /* There are 2 registers to be popped. So, generate the pattern
21624 pop_multiple_with_stack_update_and_return to pop in PC. */
21625 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
21631 /* LDRD in ARM mode needs consecutive registers as operands. This function
21632 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
21633 offset addressing and then generates one separate stack udpate. This provides
21634 more scheduling freedom, compared to writeback on every load. However,
21635 if the function returns using load into PC directly
21636 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
21637 before the last load. TODO: Add a peephole optimization to recognize
21638 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
21639 peephole optimization to merge the load at stack-offset zero
21640 with the stack update instruction using load with writeback
21641 in post-index addressing mode. */
21643 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
21647 rtx par = NULL_RTX;
21648 rtx dwarf = NULL_RTX;
21651 /* Restore saved registers. */
21652 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
21654 while (j <= LAST_ARM_REGNUM)
21655 if (saved_regs_mask & (1 << j))
21658 && (saved_regs_mask & (1 << (j + 1)))
21659 && (j + 1) != PC_REGNUM)
21661 /* Current register and next register form register pair for which
21662 LDRD can be generated. PC is always the last register popped, and
21663 we handle it separately. */
21665 mem = gen_frame_mem (DImode,
21666 plus_constant (Pmode,
21670 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21672 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21673 tmp = emit_insn (tmp);
21674 RTX_FRAME_RELATED_P (tmp) = 1;
21676 /* Generate dwarf info. */
21678 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21679 gen_rtx_REG (SImode, j),
21681 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21682 gen_rtx_REG (SImode, j + 1),
21685 REG_NOTES (tmp) = dwarf;
21690 else if (j != PC_REGNUM)
21692 /* Emit a single word load. */
21694 mem = gen_frame_mem (SImode,
21695 plus_constant (Pmode,
21699 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21701 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21702 tmp = emit_insn (tmp);
21703 RTX_FRAME_RELATED_P (tmp) = 1;
21705 /* Generate dwarf info. */
21706 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21707 gen_rtx_REG (SImode, j),
21713 else /* j == PC_REGNUM */
21719 /* Update the stack. */
21722 tmp = gen_rtx_SET (stack_pointer_rtx,
21723 plus_constant (Pmode,
21726 tmp = emit_insn (tmp);
21727 arm_add_cfa_adjust_cfa_note (tmp, offset,
21728 stack_pointer_rtx, stack_pointer_rtx);
21732 if (saved_regs_mask & (1 << PC_REGNUM))
21734 /* Only PC is to be popped. */
21735 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21736 XVECEXP (par, 0, 0) = ret_rtx;
21737 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21738 gen_frame_mem (SImode,
21739 gen_rtx_POST_INC (SImode,
21740 stack_pointer_rtx)));
21741 RTX_FRAME_RELATED_P (tmp) = 1;
21742 XVECEXP (par, 0, 1) = tmp;
21743 par = emit_jump_insn (par);
21745 /* Generate dwarf info. */
21746 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21747 gen_rtx_REG (SImode, PC_REGNUM),
21749 REG_NOTES (par) = dwarf;
21750 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21751 stack_pointer_rtx, stack_pointer_rtx);
21755 /* Calculate the size of the return value that is passed in registers. */
21757 arm_size_return_regs (void)
21761 if (crtl->return_rtx != 0)
21762 mode = GET_MODE (crtl->return_rtx);
21764 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21766 return GET_MODE_SIZE (mode);
21769 /* Return true if the current function needs to save/restore LR. */
21771 thumb_force_lr_save (void)
21773 return !cfun->machine->lr_save_eliminated
21775 || thumb_far_jump_used_p ()
21776 || df_regs_ever_live_p (LR_REGNUM));
21779 /* We do not know if r3 will be available because
21780 we do have an indirect tailcall happening in this
21781 particular case. */
21783 is_indirect_tailcall_p (rtx call)
21785 rtx pat = PATTERN (call);
21787 /* Indirect tail call. */
21788 pat = XVECEXP (pat, 0, 0);
21789 if (GET_CODE (pat) == SET)
21790 pat = SET_SRC (pat);
21792 pat = XEXP (XEXP (pat, 0), 0);
21793 return REG_P (pat);
21796 /* Return true if r3 is used by any of the tail call insns in the
21797 current function. */
21799 any_sibcall_could_use_r3 (void)
21804 if (!crtl->tail_call_emit)
21806 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21807 if (e->flags & EDGE_SIBCALL)
21809 rtx_insn *call = BB_END (e->src);
21810 if (!CALL_P (call))
21811 call = prev_nonnote_nondebug_insn (call);
21812 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21813 if (find_regno_fusage (call, USE, 3)
21814 || is_indirect_tailcall_p (call))
21821 /* Compute the distance from register FROM to register TO.
21822 These can be the arg pointer (26), the soft frame pointer (25),
21823 the stack pointer (13) or the hard frame pointer (11).
21824 In thumb mode r7 is used as the soft frame pointer, if needed.
21825 Typical stack layout looks like this:
21827 old stack pointer -> | |
21830 | | saved arguments for
21831 | | vararg functions
21834 hard FP & arg pointer -> | | \
21842 soft frame pointer -> | | /
21847 locals base pointer -> | | /
21852 current stack pointer -> | | /
21855 For a given function some or all of these stack components
21856 may not be needed, giving rise to the possibility of
21857 eliminating some of the registers.
21859 The values returned by this function must reflect the behavior
21860 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21862 The sign of the number returned reflects the direction of stack
21863 growth, so the values are positive for all eliminations except
21864 from the soft frame pointer to the hard frame pointer.
21866 SFP may point just inside the local variables block to ensure correct
21870 /* Return cached stack offsets. */
21872 static arm_stack_offsets *
21873 arm_get_frame_offsets (void)
21875 struct arm_stack_offsets *offsets;
21877 offsets = &cfun->machine->stack_offsets;
21883 /* Calculate stack offsets. These are used to calculate register elimination
21884 offsets and in prologue/epilogue code. Also calculates which registers
21885 should be saved. */
21888 arm_compute_frame_layout (void)
21890 struct arm_stack_offsets *offsets;
21891 unsigned long func_type;
21894 HOST_WIDE_INT frame_size;
21897 offsets = &cfun->machine->stack_offsets;
21899 /* Initially this is the size of the local variables. It will translated
21900 into an offset once we have determined the size of preceding data. */
21901 frame_size = ROUND_UP_WORD (get_frame_size ());
21903 /* Space for variadic functions. */
21904 offsets->saved_args = crtl->args.pretend_args_size;
21906 /* In Thumb mode this is incorrect, but never used. */
21908 = (offsets->saved_args
21909 + arm_compute_static_chain_stack_bytes ()
21910 + (frame_pointer_needed ? 4 : 0));
21914 unsigned int regno;
21916 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21917 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21918 saved = core_saved;
21920 /* We know that SP will be doubleword aligned on entry, and we must
21921 preserve that condition at any subroutine call. We also require the
21922 soft frame pointer to be doubleword aligned. */
21924 if (TARGET_REALLY_IWMMXT)
21926 /* Check for the call-saved iWMMXt registers. */
21927 for (regno = FIRST_IWMMXT_REGNUM;
21928 regno <= LAST_IWMMXT_REGNUM;
21930 if (df_regs_ever_live_p (regno)
21931 && !call_used_or_fixed_reg_p (regno))
21935 func_type = arm_current_func_type ();
21936 /* Space for saved VFP registers. */
21937 if (! IS_VOLATILE (func_type)
21938 && TARGET_HARD_FLOAT)
21939 saved += arm_get_vfp_saved_size ();
21941 else /* TARGET_THUMB1 */
21943 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21944 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21945 saved = core_saved;
21946 if (TARGET_BACKTRACE)
21950 /* Saved registers include the stack frame. */
21951 offsets->saved_regs
21952 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21953 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21955 /* A leaf function does not need any stack alignment if it has nothing
21957 if (crtl->is_leaf && frame_size == 0
21958 /* However if it calls alloca(), we have a dynamically allocated
21959 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21960 && ! cfun->calls_alloca)
21962 offsets->outgoing_args = offsets->soft_frame;
21963 offsets->locals_base = offsets->soft_frame;
21967 /* Ensure SFP has the correct alignment. */
21968 if (ARM_DOUBLEWORD_ALIGN
21969 && (offsets->soft_frame & 7))
21971 offsets->soft_frame += 4;
21972 /* Try to align stack by pushing an extra reg. Don't bother doing this
21973 when there is a stack frame as the alignment will be rolled into
21974 the normal stack adjustment. */
21975 if (frame_size + crtl->outgoing_args_size == 0)
21979 /* Register r3 is caller-saved. Normally it does not need to be
21980 saved on entry by the prologue. However if we choose to save
21981 it for padding then we may confuse the compiler into thinking
21982 a prologue sequence is required when in fact it is not. This
21983 will occur when shrink-wrapping if r3 is used as a scratch
21984 register and there are no other callee-saved writes.
21986 This situation can be avoided when other callee-saved registers
21987 are available and r3 is not mandatory if we choose a callee-saved
21988 register for padding. */
21989 bool prefer_callee_reg_p = false;
21991 /* If it is safe to use r3, then do so. This sometimes
21992 generates better code on Thumb-2 by avoiding the need to
21993 use 32-bit push/pop instructions. */
21994 if (! any_sibcall_could_use_r3 ()
21995 && arm_size_return_regs () <= 12
21996 && (offsets->saved_regs_mask & (1 << 3)) == 0
21998 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22001 if (!TARGET_THUMB2)
22002 prefer_callee_reg_p = true;
22005 || prefer_callee_reg_p)
22007 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22009 /* Avoid fixed registers; they may be changed at
22010 arbitrary times so it's unsafe to restore them
22011 during the epilogue. */
22013 && (offsets->saved_regs_mask & (1 << i)) == 0)
22023 offsets->saved_regs += 4;
22024 offsets->saved_regs_mask |= (1 << reg);
22029 offsets->locals_base = offsets->soft_frame + frame_size;
22030 offsets->outgoing_args = (offsets->locals_base
22031 + crtl->outgoing_args_size);
22033 if (ARM_DOUBLEWORD_ALIGN)
22035 /* Ensure SP remains doubleword aligned. */
22036 if (offsets->outgoing_args & 7)
22037 offsets->outgoing_args += 4;
22038 gcc_assert (!(offsets->outgoing_args & 7));
22043 /* Calculate the relative offsets for the different stack pointers. Positive
22044 offsets are in the direction of stack growth. */
22047 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22049 arm_stack_offsets *offsets;
22051 offsets = arm_get_frame_offsets ();
22053 /* OK, now we have enough information to compute the distances.
22054 There must be an entry in these switch tables for each pair
22055 of registers in ELIMINABLE_REGS, even if some of the entries
22056 seem to be redundant or useless. */
22059 case ARG_POINTER_REGNUM:
22062 case THUMB_HARD_FRAME_POINTER_REGNUM:
22065 case FRAME_POINTER_REGNUM:
22066 /* This is the reverse of the soft frame pointer
22067 to hard frame pointer elimination below. */
22068 return offsets->soft_frame - offsets->saved_args;
22070 case ARM_HARD_FRAME_POINTER_REGNUM:
22071 /* This is only non-zero in the case where the static chain register
22072 is stored above the frame. */
22073 return offsets->frame - offsets->saved_args - 4;
22075 case STACK_POINTER_REGNUM:
22076 /* If nothing has been pushed on the stack at all
22077 then this will return -4. This *is* correct! */
22078 return offsets->outgoing_args - (offsets->saved_args + 4);
22081 gcc_unreachable ();
22083 gcc_unreachable ();
22085 case FRAME_POINTER_REGNUM:
22088 case THUMB_HARD_FRAME_POINTER_REGNUM:
22091 case ARM_HARD_FRAME_POINTER_REGNUM:
22092 /* The hard frame pointer points to the top entry in the
22093 stack frame. The soft frame pointer to the bottom entry
22094 in the stack frame. If there is no stack frame at all,
22095 then they are identical. */
22097 return offsets->frame - offsets->soft_frame;
22099 case STACK_POINTER_REGNUM:
22100 return offsets->outgoing_args - offsets->soft_frame;
22103 gcc_unreachable ();
22105 gcc_unreachable ();
22108 /* You cannot eliminate from the stack pointer.
22109 In theory you could eliminate from the hard frame
22110 pointer to the stack pointer, but this will never
22111 happen, since if a stack frame is not needed the
22112 hard frame pointer will never be used. */
22113 gcc_unreachable ();
22117 /* Given FROM and TO register numbers, say whether this elimination is
22118 allowed. Frame pointer elimination is automatically handled.
22120 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22121 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22122 pointer, we must eliminate FRAME_POINTER_REGNUM into
22123 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22124 ARG_POINTER_REGNUM. */
22127 arm_can_eliminate (const int from, const int to)
22129 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22130 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22131 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22132 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22136 /* Emit RTL to save coprocessor registers on function entry. Returns the
22137 number of bytes pushed. */
22140 arm_save_coproc_regs(void)
22142 int saved_size = 0;
22144 unsigned start_reg;
22147 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22148 if (df_regs_ever_live_p (reg) && !call_used_or_fixed_reg_p (reg))
22150 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22151 insn = gen_rtx_MEM (V2SImode, insn);
22152 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22153 RTX_FRAME_RELATED_P (insn) = 1;
22157 if (TARGET_HARD_FLOAT)
22159 start_reg = FIRST_VFP_REGNUM;
22161 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22163 if ((!df_regs_ever_live_p (reg) || call_used_or_fixed_reg_p (reg))
22164 && (!df_regs_ever_live_p (reg + 1)
22165 || call_used_or_fixed_reg_p (reg + 1)))
22167 if (start_reg != reg)
22168 saved_size += vfp_emit_fstmd (start_reg,
22169 (reg - start_reg) / 2);
22170 start_reg = reg + 2;
22173 if (start_reg != reg)
22174 saved_size += vfp_emit_fstmd (start_reg,
22175 (reg - start_reg) / 2);
22181 /* Set the Thumb frame pointer from the stack pointer. */
22184 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22186 HOST_WIDE_INT amount;
22189 amount = offsets->outgoing_args - offsets->locals_base;
22191 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22192 stack_pointer_rtx, GEN_INT (amount)));
22195 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22196 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22197 expects the first two operands to be the same. */
22200 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22202 hard_frame_pointer_rtx));
22206 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22207 hard_frame_pointer_rtx,
22208 stack_pointer_rtx));
22210 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22211 plus_constant (Pmode, stack_pointer_rtx, amount));
22212 RTX_FRAME_RELATED_P (dwarf) = 1;
22213 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22216 RTX_FRAME_RELATED_P (insn) = 1;
22219 struct scratch_reg {
22224 /* Return a short-lived scratch register for use as a 2nd scratch register on
22225 function entry after the registers are saved in the prologue. This register
22226 must be released by means of release_scratch_register_on_entry. IP is not
22227 considered since it is always used as the 1st scratch register if available.
22229 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22230 mask of live registers. */
22233 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22234 unsigned long live_regs)
22240 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22246 for (i = 4; i < 11; i++)
22247 if (regno1 != i && (live_regs & (1 << i)) != 0)
22255 /* If IP is used as the 1st scratch register for a nested function,
22256 then either r3 wasn't available or is used to preserve IP. */
22257 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22259 regno = (regno1 == 3 ? 2 : 3);
22261 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22266 sr->reg = gen_rtx_REG (SImode, regno);
22269 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22270 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
22271 rtx x = gen_rtx_SET (stack_pointer_rtx,
22272 plus_constant (Pmode, stack_pointer_rtx, -4));
22273 RTX_FRAME_RELATED_P (insn) = 1;
22274 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22278 /* Release a scratch register obtained from the preceding function. */
22281 release_scratch_register_on_entry (struct scratch_reg *sr)
22285 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
22286 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
22287 rtx x = gen_rtx_SET (stack_pointer_rtx,
22288 plus_constant (Pmode, stack_pointer_rtx, 4));
22289 RTX_FRAME_RELATED_P (insn) = 1;
22290 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22294 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22296 #if PROBE_INTERVAL > 4096
22297 #error Cannot use indexed addressing mode for stack probing
22300 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22301 inclusive. These are offsets from the current stack pointer. REGNO1
22302 is the index number of the 1st scratch register and LIVE_REGS is the
22303 mask of live registers. */
22306 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
22307 unsigned int regno1, unsigned long live_regs)
22309 rtx reg1 = gen_rtx_REG (Pmode, regno1);
22311 /* See if we have a constant small number of probes to generate. If so,
22312 that's the easy case. */
22313 if (size <= PROBE_INTERVAL)
22315 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22316 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22317 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
22320 /* The run-time loop is made up of 10 insns in the generic case while the
22321 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
22322 else if (size <= 5 * PROBE_INTERVAL)
22324 HOST_WIDE_INT i, rem;
22326 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
22327 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22328 emit_stack_probe (reg1);
22330 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
22331 it exceeds SIZE. If only two probes are needed, this will not
22332 generate any code. Then probe at FIRST + SIZE. */
22333 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22335 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22336 emit_stack_probe (reg1);
22339 rem = size - (i - PROBE_INTERVAL);
22340 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22342 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
22343 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
22346 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
22349 /* Otherwise, do the same as above, but in a loop. Note that we must be
22350 extra careful with variables wrapping around because we might be at
22351 the very top (or the very bottom) of the address space and we have
22352 to be able to handle this case properly; in particular, we use an
22353 equality test for the loop condition. */
22356 HOST_WIDE_INT rounded_size;
22357 struct scratch_reg sr;
22359 get_scratch_register_on_entry (&sr, regno1, live_regs);
22361 emit_move_insn (reg1, GEN_INT (first));
22364 /* Step 1: round SIZE to the previous multiple of the interval. */
22366 rounded_size = size & -PROBE_INTERVAL;
22367 emit_move_insn (sr.reg, GEN_INT (rounded_size));
22370 /* Step 2: compute initial and final value of the loop counter. */
22372 /* TEST_ADDR = SP + FIRST. */
22373 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
22375 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22376 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
22379 /* Step 3: the loop
22383 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22386 while (TEST_ADDR != LAST_ADDR)
22388 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22389 until it is equal to ROUNDED_SIZE. */
22391 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
22394 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22395 that SIZE is equal to ROUNDED_SIZE. */
22397 if (size != rounded_size)
22399 HOST_WIDE_INT rem = size - rounded_size;
22401 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
22403 emit_set_insn (sr.reg,
22404 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
22405 emit_stack_probe (plus_constant (Pmode, sr.reg,
22406 PROBE_INTERVAL - rem));
22409 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
22412 release_scratch_register_on_entry (&sr);
22415 /* Make sure nothing is scheduled before we are done. */
22416 emit_insn (gen_blockage ());
22419 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22420 absolute addresses. */
22423 output_probe_stack_range (rtx reg1, rtx reg2)
22425 static int labelno = 0;
22429 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
22432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22434 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22436 xops[1] = GEN_INT (PROBE_INTERVAL);
22437 output_asm_insn ("sub\t%0, %0, %1", xops);
22439 /* Probe at TEST_ADDR. */
22440 output_asm_insn ("str\tr0, [%0, #0]", xops);
22442 /* Test if TEST_ADDR == LAST_ADDR. */
22444 output_asm_insn ("cmp\t%0, %1", xops);
22447 fputs ("\tbne\t", asm_out_file);
22448 assemble_name_raw (asm_out_file, loop_lab);
22449 fputc ('\n', asm_out_file);
22454 /* Generate the prologue instructions for entry into an ARM or Thumb-2
22457 arm_expand_prologue (void)
22462 unsigned long live_regs_mask;
22463 unsigned long func_type;
22465 int saved_pretend_args = 0;
22466 int saved_regs = 0;
22467 unsigned HOST_WIDE_INT args_to_push;
22468 HOST_WIDE_INT size;
22469 arm_stack_offsets *offsets;
22472 func_type = arm_current_func_type ();
22474 /* Naked functions don't have prologues. */
22475 if (IS_NAKED (func_type))
22477 if (flag_stack_usage_info)
22478 current_function_static_stack_size = 0;
22482 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
22483 args_to_push = crtl->args.pretend_args_size;
22485 /* Compute which register we will have to save onto the stack. */
22486 offsets = arm_get_frame_offsets ();
22487 live_regs_mask = offsets->saved_regs_mask;
22489 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
22491 if (IS_STACKALIGN (func_type))
22495 /* Handle a word-aligned stack pointer. We generate the following:
22500 <save and restore r0 in normal prologue/epilogue>
22504 The unwinder doesn't need to know about the stack realignment.
22505 Just tell it we saved SP in r0. */
22506 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
22508 r0 = gen_rtx_REG (SImode, R0_REGNUM);
22509 r1 = gen_rtx_REG (SImode, R1_REGNUM);
22511 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
22512 RTX_FRAME_RELATED_P (insn) = 1;
22513 add_reg_note (insn, REG_CFA_REGISTER, NULL);
22515 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
22517 /* ??? The CFA changes here, which may cause GDB to conclude that it
22518 has entered a different function. That said, the unwind info is
22519 correct, individually, before and after this instruction because
22520 we've described the save of SP, which will override the default
22521 handling of SP as restoring from the CFA. */
22522 emit_insn (gen_movsi (stack_pointer_rtx, r1));
22525 /* Let's compute the static_chain_stack_bytes required and store it. Right
22526 now the value must be -1 as stored by arm_init_machine_status (). */
22527 cfun->machine->static_chain_stack_bytes
22528 = arm_compute_static_chain_stack_bytes ();
22530 /* The static chain register is the same as the IP register. If it is
22531 clobbered when creating the frame, we need to save and restore it. */
22532 clobber_ip = IS_NESTED (func_type)
22533 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22534 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22535 || flag_stack_clash_protection)
22536 && !df_regs_ever_live_p (LR_REGNUM)
22537 && arm_r3_live_at_start_p ()));
22539 /* Find somewhere to store IP whilst the frame is being created.
22540 We try the following places in order:
22542 1. The last argument register r3 if it is available.
22543 2. A slot on the stack above the frame if there are no
22544 arguments to push onto the stack.
22545 3. Register r3 again, after pushing the argument registers
22546 onto the stack, if this is a varargs function.
22547 4. The last slot on the stack created for the arguments to
22548 push, if this isn't a varargs function.
22550 Note - we only need to tell the dwarf2 backend about the SP
22551 adjustment in the second variant; the static chain register
22552 doesn't need to be unwound, as it doesn't contain a value
22553 inherited from the caller. */
22556 if (!arm_r3_live_at_start_p ())
22557 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22558 else if (args_to_push == 0)
22562 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
22565 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22566 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22569 /* Just tell the dwarf backend that we adjusted SP. */
22570 dwarf = gen_rtx_SET (stack_pointer_rtx,
22571 plus_constant (Pmode, stack_pointer_rtx,
22573 RTX_FRAME_RELATED_P (insn) = 1;
22574 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22578 /* Store the args on the stack. */
22579 if (cfun->machine->uses_anonymous_args)
22581 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
22582 (0xf0 >> (args_to_push / 4)) & 0xf);
22583 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
22584 saved_pretend_args = 1;
22590 if (args_to_push == 4)
22591 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22593 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
22594 plus_constant (Pmode,
22598 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
22600 /* Just tell the dwarf backend that we adjusted SP. */
22601 dwarf = gen_rtx_SET (stack_pointer_rtx,
22602 plus_constant (Pmode, stack_pointer_rtx,
22604 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22607 RTX_FRAME_RELATED_P (insn) = 1;
22608 fp_offset = args_to_push;
22613 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22615 if (IS_INTERRUPT (func_type))
22617 /* Interrupt functions must not corrupt any registers.
22618 Creating a frame pointer however, corrupts the IP
22619 register, so we must push it first. */
22620 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
22622 /* Do not set RTX_FRAME_RELATED_P on this insn.
22623 The dwarf stack unwinding code only wants to see one
22624 stack decrement per function, and this is not it. If
22625 this instruction is labeled as being part of the frame
22626 creation sequence then dwarf2out_frame_debug_expr will
22627 die when it encounters the assignment of IP to FP
22628 later on, since the use of SP here establishes SP as
22629 the CFA register and not IP.
22631 Anyway this instruction is not really part of the stack
22632 frame creation although it is part of the prologue. */
22635 insn = emit_set_insn (ip_rtx,
22636 plus_constant (Pmode, stack_pointer_rtx,
22638 RTX_FRAME_RELATED_P (insn) = 1;
22643 /* Push the argument registers, or reserve space for them. */
22644 if (cfun->machine->uses_anonymous_args)
22645 insn = emit_multi_reg_push
22646 ((0xf0 >> (args_to_push / 4)) & 0xf,
22647 (0xf0 >> (args_to_push / 4)) & 0xf);
22650 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22651 GEN_INT (- args_to_push)));
22652 RTX_FRAME_RELATED_P (insn) = 1;
22655 /* If this is an interrupt service routine, and the link register
22656 is going to be pushed, and we're not generating extra
22657 push of IP (needed when frame is needed and frame layout if apcs),
22658 subtracting four from LR now will mean that the function return
22659 can be done with a single instruction. */
22660 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
22661 && (live_regs_mask & (1 << LR_REGNUM)) != 0
22662 && !(frame_pointer_needed && TARGET_APCS_FRAME)
22665 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
22667 emit_set_insn (lr, plus_constant (SImode, lr, -4));
22670 if (live_regs_mask)
22672 unsigned long dwarf_regs_mask = live_regs_mask;
22674 saved_regs += bit_count (live_regs_mask) * 4;
22675 if (optimize_size && !frame_pointer_needed
22676 && saved_regs == offsets->saved_regs - offsets->saved_args)
22678 /* If no coprocessor registers are being pushed and we don't have
22679 to worry about a frame pointer then push extra registers to
22680 create the stack frame. This is done in a way that does not
22681 alter the frame layout, so is independent of the epilogue. */
22685 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22687 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22688 if (frame && n * 4 >= frame)
22691 live_regs_mask |= (1 << n) - 1;
22692 saved_regs += frame;
22697 && current_tune->prefer_ldrd_strd
22698 && !optimize_function_for_size_p (cfun))
22700 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22702 thumb2_emit_strd_push (live_regs_mask);
22703 else if (TARGET_ARM
22704 && !TARGET_APCS_FRAME
22705 && !IS_INTERRUPT (func_type))
22706 arm_emit_strd_push (live_regs_mask);
22709 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22710 RTX_FRAME_RELATED_P (insn) = 1;
22715 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22716 RTX_FRAME_RELATED_P (insn) = 1;
22720 if (! IS_VOLATILE (func_type))
22721 saved_regs += arm_save_coproc_regs ();
22723 if (frame_pointer_needed && TARGET_ARM)
22725 /* Create the new frame pointer. */
22726 if (TARGET_APCS_FRAME)
22728 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22729 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22730 RTX_FRAME_RELATED_P (insn) = 1;
22734 insn = GEN_INT (saved_regs - (4 + fp_offset));
22735 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22736 stack_pointer_rtx, insn));
22737 RTX_FRAME_RELATED_P (insn) = 1;
22741 size = offsets->outgoing_args - offsets->saved_args;
22742 if (flag_stack_usage_info)
22743 current_function_static_stack_size = size;
22745 /* If this isn't an interrupt service routine and we have a frame, then do
22746 stack checking. We use IP as the first scratch register, except for the
22747 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22748 if (!IS_INTERRUPT (func_type)
22749 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22750 || flag_stack_clash_protection))
22752 unsigned int regno;
22754 if (!IS_NESTED (func_type) || clobber_ip)
22756 else if (df_regs_ever_live_p (LR_REGNUM))
22761 if (crtl->is_leaf && !cfun->calls_alloca)
22763 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22764 arm_emit_probe_stack_range (get_stack_check_protect (),
22765 size - get_stack_check_protect (),
22766 regno, live_regs_mask);
22769 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22770 regno, live_regs_mask);
22773 /* Recover the static chain register. */
22776 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22777 insn = gen_rtx_REG (SImode, 3);
22780 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22781 insn = gen_frame_mem (SImode, insn);
22783 emit_set_insn (ip_rtx, insn);
22784 emit_insn (gen_force_register_use (ip_rtx));
22787 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22789 /* This add can produce multiple insns for a large constant, so we
22790 need to get tricky. */
22791 rtx_insn *last = get_last_insn ();
22793 amount = GEN_INT (offsets->saved_args + saved_regs
22794 - offsets->outgoing_args);
22796 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22800 last = last ? NEXT_INSN (last) : get_insns ();
22801 RTX_FRAME_RELATED_P (last) = 1;
22803 while (last != insn);
22805 /* If the frame pointer is needed, emit a special barrier that
22806 will prevent the scheduler from moving stores to the frame
22807 before the stack adjustment. */
22808 if (frame_pointer_needed)
22809 emit_insn (gen_stack_tie (stack_pointer_rtx,
22810 hard_frame_pointer_rtx));
22814 if (frame_pointer_needed && TARGET_THUMB2)
22815 thumb_set_frame_pointer (offsets);
22817 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22819 unsigned long mask;
22821 mask = live_regs_mask;
22822 mask &= THUMB2_WORK_REGS;
22823 if (!IS_NESTED (func_type))
22824 mask |= (1 << IP_REGNUM);
22825 arm_load_pic_register (mask, NULL_RTX);
22828 /* If we are profiling, make sure no instructions are scheduled before
22829 the call to mcount. Similarly if the user has requested no
22830 scheduling in the prolog. Similarly if we want non-call exceptions
22831 using the EABI unwinder, to prevent faulting instructions from being
22832 swapped with a stack adjustment. */
22833 if (crtl->profile || !TARGET_SCHED_PROLOG
22834 || (arm_except_unwind_info (&global_options) == UI_TARGET
22835 && cfun->can_throw_non_call_exceptions))
22836 emit_insn (gen_blockage ());
22838 /* If the link register is being kept alive, with the return address in it,
22839 then make sure that it does not get reused by the ce2 pass. */
22840 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22841 cfun->machine->lr_save_eliminated = 1;
22844 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22846 arm_print_condition (FILE *stream)
22848 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22850 /* Branch conversion is not implemented for Thumb-2. */
22853 output_operand_lossage ("predicated Thumb instruction");
22856 if (current_insn_predicate != NULL)
22858 output_operand_lossage
22859 ("predicated instruction in conditional sequence");
22863 fputs (arm_condition_codes[arm_current_cc], stream);
22865 else if (current_insn_predicate)
22867 enum arm_cond_code code;
22871 output_operand_lossage ("predicated Thumb instruction");
22875 code = get_arm_condition_code (current_insn_predicate);
22876 fputs (arm_condition_codes[code], stream);
22881 /* Globally reserved letters: acln
22882 Puncutation letters currently used: @_|?().!#
22883 Lower case letters currently used: bcdefhimpqtvwxyz
22884 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22885 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22887 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22889 If CODE is 'd', then the X is a condition operand and the instruction
22890 should only be executed if the condition is true.
22891 if CODE is 'D', then the X is a condition operand and the instruction
22892 should only be executed if the condition is false: however, if the mode
22893 of the comparison is CCFPEmode, then always execute the instruction -- we
22894 do this because in these circumstances !GE does not necessarily imply LT;
22895 in these cases the instruction pattern will take care to make sure that
22896 an instruction containing %d will follow, thereby undoing the effects of
22897 doing this instruction unconditionally.
22898 If CODE is 'N' then X is a floating point operand that must be negated
22900 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22901 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22903 arm_print_operand (FILE *stream, rtx x, int code)
22908 fputs (ASM_COMMENT_START, stream);
22912 fputs (user_label_prefix, stream);
22916 fputs (REGISTER_PREFIX, stream);
22920 arm_print_condition (stream);
22924 /* The current condition code for a condition code setting instruction.
22925 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22926 fputc('s', stream);
22927 arm_print_condition (stream);
22931 /* If the instruction is conditionally executed then print
22932 the current condition code, otherwise print 's'. */
22933 gcc_assert (TARGET_THUMB2);
22934 if (current_insn_predicate)
22935 arm_print_condition (stream);
22937 fputc('s', stream);
22940 /* %# is a "break" sequence. It doesn't output anything, but is used to
22941 separate e.g. operand numbers from following text, if that text consists
22942 of further digits which we don't want to be part of the operand
22950 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22951 fprintf (stream, "%s", fp_const_from_val (&r));
22955 /* An integer or symbol address without a preceding # sign. */
22957 switch (GET_CODE (x))
22960 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22964 output_addr_const (stream, x);
22968 if (GET_CODE (XEXP (x, 0)) == PLUS
22969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22971 output_addr_const (stream, x);
22974 /* Fall through. */
22977 output_operand_lossage ("Unsupported operand for code '%c'", code);
22981 /* An integer that we want to print in HEX. */
22983 switch (GET_CODE (x))
22986 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22990 output_operand_lossage ("Unsupported operand for code '%c'", code);
22995 if (CONST_INT_P (x))
22998 val = ARM_SIGN_EXTEND (~INTVAL (x));
22999 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23003 putc ('~', stream);
23004 output_addr_const (stream, x);
23009 /* Print the log2 of a CONST_INT. */
23013 if (!CONST_INT_P (x)
23014 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23015 output_operand_lossage ("Unsupported operand for code '%c'", code);
23017 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23022 /* The low 16 bits of an immediate constant. */
23023 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23027 fprintf (stream, "%s", arithmetic_instr (x, 1));
23031 fprintf (stream, "%s", arithmetic_instr (x, 0));
23039 shift = shift_op (x, &val);
23043 fprintf (stream, ", %s ", shift);
23045 arm_print_operand (stream, XEXP (x, 1), 0);
23047 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23052 /* An explanation of the 'Q', 'R' and 'H' register operands:
23054 In a pair of registers containing a DI or DF value the 'Q'
23055 operand returns the register number of the register containing
23056 the least significant part of the value. The 'R' operand returns
23057 the register number of the register containing the most
23058 significant part of the value.
23060 The 'H' operand returns the higher of the two register numbers.
23061 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23062 same as the 'Q' operand, since the most significant part of the
23063 value is held in the lower number register. The reverse is true
23064 on systems where WORDS_BIG_ENDIAN is false.
23066 The purpose of these operands is to distinguish between cases
23067 where the endian-ness of the values is important (for example
23068 when they are added together), and cases where the endian-ness
23069 is irrelevant, but the order of register operations is important.
23070 For example when loading a value from memory into a register
23071 pair, the endian-ness does not matter. Provided that the value
23072 from the lower memory address is put into the lower numbered
23073 register, and the value from the higher address is put into the
23074 higher numbered register, the load will work regardless of whether
23075 the value being loaded is big-wordian or little-wordian. The
23076 order of the two register loads can matter however, if the address
23077 of the memory location is actually held in one of the registers
23078 being overwritten by the load.
23080 The 'Q' and 'R' constraints are also available for 64-bit
23083 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23085 rtx part = gen_lowpart (SImode, x);
23086 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23090 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23092 output_operand_lossage ("invalid operand for code '%c'", code);
23096 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23100 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23102 machine_mode mode = GET_MODE (x);
23105 if (mode == VOIDmode)
23107 part = gen_highpart_mode (SImode, mode, x);
23108 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23112 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23114 output_operand_lossage ("invalid operand for code '%c'", code);
23118 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23122 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23124 output_operand_lossage ("invalid operand for code '%c'", code);
23128 asm_fprintf (stream, "%r", REGNO (x) + 1);
23132 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23134 output_operand_lossage ("invalid operand for code '%c'", code);
23138 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23142 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23144 output_operand_lossage ("invalid operand for code '%c'", code);
23148 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23152 asm_fprintf (stream, "%r",
23153 REG_P (XEXP (x, 0))
23154 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23158 asm_fprintf (stream, "{%r-%r}",
23160 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23163 /* Like 'M', but writing doubleword vector registers, for use by Neon
23167 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23168 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23170 asm_fprintf (stream, "{d%d}", regno);
23172 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23177 /* CONST_TRUE_RTX means always -- that's the default. */
23178 if (x == const_true_rtx)
23181 if (!COMPARISON_P (x))
23183 output_operand_lossage ("invalid operand for code '%c'", code);
23187 fputs (arm_condition_codes[get_arm_condition_code (x)],
23192 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23193 want to do that. */
23194 if (x == const_true_rtx)
23196 output_operand_lossage ("instruction never executed");
23199 if (!COMPARISON_P (x))
23201 output_operand_lossage ("invalid operand for code '%c'", code);
23205 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23206 (get_arm_condition_code (x))],
23216 /* Former Maverick support, removed after GCC-4.7. */
23217 output_operand_lossage ("obsolete Maverick format code '%c'", code);
23222 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23223 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23224 /* Bad value for wCG register number. */
23226 output_operand_lossage ("invalid operand for code '%c'", code);
23231 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23234 /* Print an iWMMXt control register name. */
23236 if (!CONST_INT_P (x)
23238 || INTVAL (x) >= 16)
23239 /* Bad value for wC register number. */
23241 output_operand_lossage ("invalid operand for code '%c'", code);
23247 static const char * wc_reg_names [16] =
23249 "wCID", "wCon", "wCSSF", "wCASF",
23250 "wC4", "wC5", "wC6", "wC7",
23251 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23252 "wC12", "wC13", "wC14", "wC15"
23255 fputs (wc_reg_names [INTVAL (x)], stream);
23259 /* Print the high single-precision register of a VFP double-precision
23263 machine_mode mode = GET_MODE (x);
23266 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
23268 output_operand_lossage ("invalid operand for code '%c'", code);
23273 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
23275 output_operand_lossage ("invalid operand for code '%c'", code);
23279 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
23283 /* Print a VFP/Neon double precision or quad precision register name. */
23287 machine_mode mode = GET_MODE (x);
23288 int is_quad = (code == 'q');
23291 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
23293 output_operand_lossage ("invalid operand for code '%c'", code);
23298 || !IS_VFP_REGNUM (REGNO (x)))
23300 output_operand_lossage ("invalid operand for code '%c'", code);
23305 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
23306 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
23308 output_operand_lossage ("invalid operand for code '%c'", code);
23312 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
23313 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
23317 /* These two codes print the low/high doubleword register of a Neon quad
23318 register, respectively. For pair-structure types, can also print
23319 low/high quadword registers. */
23323 machine_mode mode = GET_MODE (x);
23326 if ((GET_MODE_SIZE (mode) != 16
23327 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
23329 output_operand_lossage ("invalid operand for code '%c'", code);
23334 if (!NEON_REGNO_OK_FOR_QUAD (regno))
23336 output_operand_lossage ("invalid operand for code '%c'", code);
23340 if (GET_MODE_SIZE (mode) == 16)
23341 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
23342 + (code == 'f' ? 1 : 0));
23344 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
23345 + (code == 'f' ? 1 : 0));
23349 /* Print a VFPv3 floating-point constant, represented as an integer
23353 int index = vfp3_const_double_index (x);
23354 gcc_assert (index != -1);
23355 fprintf (stream, "%d", index);
23359 /* Print bits representing opcode features for Neon.
23361 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
23362 and polynomials as unsigned.
23364 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
23366 Bit 2 is 1 for rounding functions, 0 otherwise. */
23368 /* Identify the type as 's', 'u', 'p' or 'f'. */
23371 HOST_WIDE_INT bits = INTVAL (x);
23372 fputc ("uspf"[bits & 3], stream);
23376 /* Likewise, but signed and unsigned integers are both 'i'. */
23379 HOST_WIDE_INT bits = INTVAL (x);
23380 fputc ("iipf"[bits & 3], stream);
23384 /* As for 'T', but emit 'u' instead of 'p'. */
23387 HOST_WIDE_INT bits = INTVAL (x);
23388 fputc ("usuf"[bits & 3], stream);
23392 /* Bit 2: rounding (vs none). */
23395 HOST_WIDE_INT bits = INTVAL (x);
23396 fputs ((bits & 4) != 0 ? "r" : "", stream);
23400 /* Memory operand for vld1/vst1 instruction. */
23404 bool postinc = FALSE;
23405 rtx postinc_reg = NULL;
23406 unsigned align, memsize, align_bits;
23408 gcc_assert (MEM_P (x));
23409 addr = XEXP (x, 0);
23410 if (GET_CODE (addr) == POST_INC)
23413 addr = XEXP (addr, 0);
23415 if (GET_CODE (addr) == POST_MODIFY)
23417 postinc_reg = XEXP( XEXP (addr, 1), 1);
23418 addr = XEXP (addr, 0);
23420 asm_fprintf (stream, "[%r", REGNO (addr));
23422 /* We know the alignment of this access, so we can emit a hint in the
23423 instruction (for some alignments) as an aid to the memory subsystem
23425 align = MEM_ALIGN (x) >> 3;
23426 memsize = MEM_SIZE (x);
23428 /* Only certain alignment specifiers are supported by the hardware. */
23429 if (memsize == 32 && (align % 32) == 0)
23431 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
23433 else if (memsize >= 8 && (align % 8) == 0)
23438 if (align_bits != 0)
23439 asm_fprintf (stream, ":%d", align_bits);
23441 asm_fprintf (stream, "]");
23444 fputs("!", stream);
23446 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
23454 gcc_assert (MEM_P (x));
23455 addr = XEXP (x, 0);
23456 gcc_assert (REG_P (addr));
23457 asm_fprintf (stream, "[%r]", REGNO (addr));
23461 /* Translate an S register number into a D register number and element index. */
23464 machine_mode mode = GET_MODE (x);
23467 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
23469 output_operand_lossage ("invalid operand for code '%c'", code);
23474 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23476 output_operand_lossage ("invalid operand for code '%c'", code);
23480 regno = regno - FIRST_VFP_REGNUM;
23481 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
23486 gcc_assert (CONST_DOUBLE_P (x));
23488 result = vfp3_const_double_for_fract_bits (x);
23490 result = vfp3_const_double_for_bits (x);
23491 fprintf (stream, "#%d", result);
23494 /* Register specifier for vld1.16/vst1.16. Translate the S register
23495 number into a D register number and element index. */
23498 machine_mode mode = GET_MODE (x);
23501 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
23503 output_operand_lossage ("invalid operand for code '%c'", code);
23508 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
23510 output_operand_lossage ("invalid operand for code '%c'", code);
23514 regno = regno - FIRST_VFP_REGNUM;
23515 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
23522 output_operand_lossage ("missing operand");
23526 switch (GET_CODE (x))
23529 asm_fprintf (stream, "%r", REGNO (x));
23533 output_address (GET_MODE (x), XEXP (x, 0));
23539 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
23540 sizeof (fpstr), 0, 1);
23541 fprintf (stream, "#%s", fpstr);
23546 gcc_assert (GET_CODE (x) != NEG);
23547 fputc ('#', stream);
23548 if (GET_CODE (x) == HIGH)
23550 fputs (":lower16:", stream);
23554 output_addr_const (stream, x);
23560 /* Target hook for printing a memory address. */
23562 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
23566 int is_minus = GET_CODE (x) == MINUS;
23569 asm_fprintf (stream, "[%r]", REGNO (x));
23570 else if (GET_CODE (x) == PLUS || is_minus)
23572 rtx base = XEXP (x, 0);
23573 rtx index = XEXP (x, 1);
23574 HOST_WIDE_INT offset = 0;
23576 || (REG_P (index) && REGNO (index) == SP_REGNUM))
23578 /* Ensure that BASE is a register. */
23579 /* (one of them must be). */
23580 /* Also ensure the SP is not used as in index register. */
23581 std::swap (base, index);
23583 switch (GET_CODE (index))
23586 offset = INTVAL (index);
23589 asm_fprintf (stream, "[%r, #%wd]",
23590 REGNO (base), offset);
23594 asm_fprintf (stream, "[%r, %s%r]",
23595 REGNO (base), is_minus ? "-" : "",
23605 asm_fprintf (stream, "[%r, %s%r",
23606 REGNO (base), is_minus ? "-" : "",
23607 REGNO (XEXP (index, 0)));
23608 arm_print_operand (stream, index, 'S');
23609 fputs ("]", stream);
23614 gcc_unreachable ();
23617 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
23618 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
23620 gcc_assert (REG_P (XEXP (x, 0)));
23622 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
23623 asm_fprintf (stream, "[%r, #%s%d]!",
23624 REGNO (XEXP (x, 0)),
23625 GET_CODE (x) == PRE_DEC ? "-" : "",
23626 GET_MODE_SIZE (mode));
23628 asm_fprintf (stream, "[%r], #%s%d",
23629 REGNO (XEXP (x, 0)),
23630 GET_CODE (x) == POST_DEC ? "-" : "",
23631 GET_MODE_SIZE (mode));
23633 else if (GET_CODE (x) == PRE_MODIFY)
23635 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
23636 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23637 asm_fprintf (stream, "#%wd]!",
23638 INTVAL (XEXP (XEXP (x, 1), 1)));
23640 asm_fprintf (stream, "%r]!",
23641 REGNO (XEXP (XEXP (x, 1), 1)));
23643 else if (GET_CODE (x) == POST_MODIFY)
23645 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
23646 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
23647 asm_fprintf (stream, "#%wd",
23648 INTVAL (XEXP (XEXP (x, 1), 1)));
23650 asm_fprintf (stream, "%r",
23651 REGNO (XEXP (XEXP (x, 1), 1)));
23653 else output_addr_const (stream, x);
23658 asm_fprintf (stream, "[%r]", REGNO (x));
23659 else if (GET_CODE (x) == POST_INC)
23660 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
23661 else if (GET_CODE (x) == PLUS)
23663 gcc_assert (REG_P (XEXP (x, 0)));
23664 if (CONST_INT_P (XEXP (x, 1)))
23665 asm_fprintf (stream, "[%r, #%wd]",
23666 REGNO (XEXP (x, 0)),
23667 INTVAL (XEXP (x, 1)));
23669 asm_fprintf (stream, "[%r, %r]",
23670 REGNO (XEXP (x, 0)),
23671 REGNO (XEXP (x, 1)));
23674 output_addr_const (stream, x);
23678 /* Target hook for indicating whether a punctuation character for
23679 TARGET_PRINT_OPERAND is valid. */
23681 arm_print_operand_punct_valid_p (unsigned char code)
23683 return (code == '@' || code == '|' || code == '.'
23684 || code == '(' || code == ')' || code == '#'
23685 || (TARGET_32BIT && (code == '?'))
23686 || (TARGET_THUMB2 && (code == '!'))
23687 || (TARGET_THUMB && (code == '_')));
23690 /* Target hook for assembling integer objects. The ARM version needs to
23691 handle word-sized values specially. */
23693 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23697 if (size == UNITS_PER_WORD && aligned_p)
23699 fputs ("\t.word\t", asm_out_file);
23700 output_addr_const (asm_out_file, x);
23702 /* Mark symbols as position independent. We only do this in the
23703 .text segment, not in the .data segment. */
23704 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23705 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23707 /* See legitimize_pic_address for an explanation of the
23708 TARGET_VXWORKS_RTP check. */
23709 /* References to weak symbols cannot be resolved locally:
23710 they may be overridden by a non-weak definition at link
23712 if (!arm_pic_data_is_text_relative
23713 || (GET_CODE (x) == SYMBOL_REF
23714 && (!SYMBOL_REF_LOCAL_P (x)
23715 || (SYMBOL_REF_DECL (x)
23716 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
23717 || (SYMBOL_REF_FUNCTION_P (x)
23718 && !arm_fdpic_local_funcdesc_p (x)))))
23720 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23721 fputs ("(GOTFUNCDESC)", asm_out_file);
23723 fputs ("(GOT)", asm_out_file);
23727 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
23728 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
23734 || arm_is_segment_info_known (x, &is_readonly))
23735 fputs ("(GOTOFF)", asm_out_file);
23737 fputs ("(GOT)", asm_out_file);
23742 /* For FDPIC we also have to mark symbol for .data section. */
23744 && !making_const_table
23745 && SYMBOL_REF_P (x)
23746 && SYMBOL_REF_FUNCTION_P (x))
23747 fputs ("(FUNCDESC)", asm_out_file);
23749 fputc ('\n', asm_out_file);
23753 mode = GET_MODE (x);
23755 if (arm_vector_mode_supported_p (mode))
23759 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23761 units = CONST_VECTOR_NUNITS (x);
23762 size = GET_MODE_UNIT_SIZE (mode);
23764 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23765 for (i = 0; i < units; i++)
23767 rtx elt = CONST_VECTOR_ELT (x, i);
23769 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23772 for (i = 0; i < units; i++)
23774 rtx elt = CONST_VECTOR_ELT (x, i);
23776 (*CONST_DOUBLE_REAL_VALUE (elt),
23777 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23778 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23784 return default_assemble_integer (x, size, aligned_p);
23788 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23792 if (!TARGET_AAPCS_BASED)
23795 default_named_section_asm_out_constructor
23796 : default_named_section_asm_out_destructor) (symbol, priority);
23800 /* Put these in the .init_array section, using a special relocation. */
23801 if (priority != DEFAULT_INIT_PRIORITY)
23804 sprintf (buf, "%s.%.5u",
23805 is_ctor ? ".init_array" : ".fini_array",
23807 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23814 switch_to_section (s);
23815 assemble_align (POINTER_SIZE);
23816 fputs ("\t.word\t", asm_out_file);
23817 output_addr_const (asm_out_file, symbol);
23818 fputs ("(target1)\n", asm_out_file);
23821 /* Add a function to the list of static constructors. */
23824 arm_elf_asm_constructor (rtx symbol, int priority)
23826 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23829 /* Add a function to the list of static destructors. */
23832 arm_elf_asm_destructor (rtx symbol, int priority)
23834 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23837 /* A finite state machine takes care of noticing whether or not instructions
23838 can be conditionally executed, and thus decrease execution time and code
23839 size by deleting branch instructions. The fsm is controlled by
23840 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23842 /* The state of the fsm controlling condition codes are:
23843 0: normal, do nothing special
23844 1: make ASM_OUTPUT_OPCODE not output this instruction
23845 2: make ASM_OUTPUT_OPCODE not output this instruction
23846 3: make instructions conditional
23847 4: make instructions conditional
23849 State transitions (state->state by whom under condition):
23850 0 -> 1 final_prescan_insn if the `target' is a label
23851 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23852 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23853 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23854 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23855 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23856 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23857 (the target insn is arm_target_insn).
23859 If the jump clobbers the conditions then we use states 2 and 4.
23861 A similar thing can be done with conditional return insns.
23863 XXX In case the `target' is an unconditional branch, this conditionalising
23864 of the instructions always reduces code size, but not always execution
23865 time. But then, I want to reduce the code size to somewhere near what
23866 /bin/cc produces. */
23868 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23869 instructions. When a COND_EXEC instruction is seen the subsequent
23870 instructions are scanned so that multiple conditional instructions can be
23871 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23872 specify the length and true/false mask for the IT block. These will be
23873 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23875 /* Returns the index of the ARM condition code string in
23876 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23877 COMPARISON should be an rtx like `(eq (...) (...))'. */
23880 maybe_get_arm_condition_code (rtx comparison)
23882 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23883 enum arm_cond_code code;
23884 enum rtx_code comp_code = GET_CODE (comparison);
23886 if (GET_MODE_CLASS (mode) != MODE_CC)
23887 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23888 XEXP (comparison, 1));
23892 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23893 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23894 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23895 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23896 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23897 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23898 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23899 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23900 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23901 case E_CC_DLTUmode: code = ARM_CC;
23904 if (comp_code == EQ)
23905 return ARM_INVERSE_CONDITION_CODE (code);
23906 if (comp_code == NE)
23913 case NE: return ARM_NE;
23914 case EQ: return ARM_EQ;
23915 case GE: return ARM_PL;
23916 case LT: return ARM_MI;
23917 default: return ARM_NV;
23923 case NE: return ARM_NE;
23924 case EQ: return ARM_EQ;
23925 default: return ARM_NV;
23931 case NE: return ARM_MI;
23932 case EQ: return ARM_PL;
23933 default: return ARM_NV;
23938 /* We can handle all cases except UNEQ and LTGT. */
23941 case GE: return ARM_GE;
23942 case GT: return ARM_GT;
23943 case LE: return ARM_LS;
23944 case LT: return ARM_MI;
23945 case NE: return ARM_NE;
23946 case EQ: return ARM_EQ;
23947 case ORDERED: return ARM_VC;
23948 case UNORDERED: return ARM_VS;
23949 case UNLT: return ARM_LT;
23950 case UNLE: return ARM_LE;
23951 case UNGT: return ARM_HI;
23952 case UNGE: return ARM_PL;
23953 /* UNEQ and LTGT do not have a representation. */
23954 case UNEQ: /* Fall through. */
23955 case LTGT: /* Fall through. */
23956 default: return ARM_NV;
23962 case NE: return ARM_NE;
23963 case EQ: return ARM_EQ;
23964 case GE: return ARM_LE;
23965 case GT: return ARM_LT;
23966 case LE: return ARM_GE;
23967 case LT: return ARM_GT;
23968 case GEU: return ARM_LS;
23969 case GTU: return ARM_CC;
23970 case LEU: return ARM_CS;
23971 case LTU: return ARM_HI;
23972 default: return ARM_NV;
23978 case LTU: return ARM_CS;
23979 case GEU: return ARM_CC;
23980 default: return ARM_NV;
23986 case GE: return ARM_GE;
23987 case LT: return ARM_LT;
23988 default: return ARM_NV;
23994 case GEU: return ARM_CS;
23995 case LTU: return ARM_CC;
23996 default: return ARM_NV;
24002 case NE: return ARM_VS;
24003 case EQ: return ARM_VC;
24004 default: return ARM_NV;
24010 case GEU: return ARM_CS;
24011 case LTU: return ARM_CC;
24012 default: return ARM_NV;
24019 case NE: return ARM_NE;
24020 case EQ: return ARM_EQ;
24021 case GE: return ARM_GE;
24022 case GT: return ARM_GT;
24023 case LE: return ARM_LE;
24024 case LT: return ARM_LT;
24025 case GEU: return ARM_CS;
24026 case GTU: return ARM_HI;
24027 case LEU: return ARM_LS;
24028 case LTU: return ARM_CC;
24029 default: return ARM_NV;
24032 default: gcc_unreachable ();
24036 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24037 static enum arm_cond_code
24038 get_arm_condition_code (rtx comparison)
24040 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24041 gcc_assert (code != ARM_NV);
24045 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24046 code registers when not targetting Thumb1. The VFP condition register
24047 only exists when generating hard-float code. */
24049 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24055 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
24059 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24062 thumb2_final_prescan_insn (rtx_insn *insn)
24064 rtx_insn *first_insn = insn;
24065 rtx body = PATTERN (insn);
24067 enum arm_cond_code code;
24072 /* max_insns_skipped in the tune was already taken into account in the
24073 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24074 just emit the IT blocks as we can. It does not make sense to split
24076 max = MAX_INSN_PER_IT_BLOCK;
24078 /* Remove the previous insn from the count of insns to be output. */
24079 if (arm_condexec_count)
24080 arm_condexec_count--;
24082 /* Nothing to do if we are already inside a conditional block. */
24083 if (arm_condexec_count)
24086 if (GET_CODE (body) != COND_EXEC)
24089 /* Conditional jumps are implemented directly. */
24093 predicate = COND_EXEC_TEST (body);
24094 arm_current_cc = get_arm_condition_code (predicate);
24096 n = get_attr_ce_count (insn);
24097 arm_condexec_count = 1;
24098 arm_condexec_mask = (1 << n) - 1;
24099 arm_condexec_masklen = n;
24100 /* See if subsequent instructions can be combined into the same block. */
24103 insn = next_nonnote_insn (insn);
24105 /* Jumping into the middle of an IT block is illegal, so a label or
24106 barrier terminates the block. */
24107 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24110 body = PATTERN (insn);
24111 /* USE and CLOBBER aren't really insns, so just skip them. */
24112 if (GET_CODE (body) == USE
24113 || GET_CODE (body) == CLOBBER)
24116 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24117 if (GET_CODE (body) != COND_EXEC)
24119 /* Maximum number of conditionally executed instructions in a block. */
24120 n = get_attr_ce_count (insn);
24121 if (arm_condexec_masklen + n > max)
24124 predicate = COND_EXEC_TEST (body);
24125 code = get_arm_condition_code (predicate);
24126 mask = (1 << n) - 1;
24127 if (arm_current_cc == code)
24128 arm_condexec_mask |= (mask << arm_condexec_masklen);
24129 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24132 arm_condexec_count++;
24133 arm_condexec_masklen += n;
24135 /* A jump must be the last instruction in a conditional block. */
24139 /* Restore recog_data (getting the attributes of other insns can
24140 destroy this array, but final.c assumes that it remains intact
24141 across this call). */
24142 extract_constrain_insn_cached (first_insn);
24146 arm_final_prescan_insn (rtx_insn *insn)
24148 /* BODY will hold the body of INSN. */
24149 rtx body = PATTERN (insn);
24151 /* This will be 1 if trying to repeat the trick, and things need to be
24152 reversed if it appears to fail. */
24155 /* If we start with a return insn, we only succeed if we find another one. */
24156 int seeking_return = 0;
24157 enum rtx_code return_code = UNKNOWN;
24159 /* START_INSN will hold the insn from where we start looking. This is the
24160 first insn after the following code_label if REVERSE is true. */
24161 rtx_insn *start_insn = insn;
24163 /* If in state 4, check if the target branch is reached, in order to
24164 change back to state 0. */
24165 if (arm_ccfsm_state == 4)
24167 if (insn == arm_target_insn)
24169 arm_target_insn = NULL;
24170 arm_ccfsm_state = 0;
24175 /* If in state 3, it is possible to repeat the trick, if this insn is an
24176 unconditional branch to a label, and immediately following this branch
24177 is the previous target label which is only used once, and the label this
24178 branch jumps to is not too far off. */
24179 if (arm_ccfsm_state == 3)
24181 if (simplejump_p (insn))
24183 start_insn = next_nonnote_insn (start_insn);
24184 if (BARRIER_P (start_insn))
24186 /* XXX Isn't this always a barrier? */
24187 start_insn = next_nonnote_insn (start_insn);
24189 if (LABEL_P (start_insn)
24190 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24191 && LABEL_NUSES (start_insn) == 1)
24196 else if (ANY_RETURN_P (body))
24198 start_insn = next_nonnote_insn (start_insn);
24199 if (BARRIER_P (start_insn))
24200 start_insn = next_nonnote_insn (start_insn);
24201 if (LABEL_P (start_insn)
24202 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24203 && LABEL_NUSES (start_insn) == 1)
24206 seeking_return = 1;
24207 return_code = GET_CODE (body);
24216 gcc_assert (!arm_ccfsm_state || reverse);
24217 if (!JUMP_P (insn))
24220 /* This jump might be paralleled with a clobber of the condition codes
24221 the jump should always come first */
24222 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
24223 body = XVECEXP (body, 0, 0);
24226 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
24227 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
24230 int fail = FALSE, succeed = FALSE;
24231 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
24232 int then_not_else = TRUE;
24233 rtx_insn *this_insn = start_insn;
24236 /* Register the insn jumped to. */
24239 if (!seeking_return)
24240 label = XEXP (SET_SRC (body), 0);
24242 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
24243 label = XEXP (XEXP (SET_SRC (body), 1), 0);
24244 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
24246 label = XEXP (XEXP (SET_SRC (body), 2), 0);
24247 then_not_else = FALSE;
24249 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
24251 seeking_return = 1;
24252 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
24254 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
24256 seeking_return = 1;
24257 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
24258 then_not_else = FALSE;
24261 gcc_unreachable ();
24263 /* See how many insns this branch skips, and what kind of insns. If all
24264 insns are okay, and the label or unconditional branch to the same
24265 label is not too far away, succeed. */
24266 for (insns_skipped = 0;
24267 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
24271 this_insn = next_nonnote_insn (this_insn);
24275 switch (GET_CODE (this_insn))
24278 /* Succeed if it is the target label, otherwise fail since
24279 control falls in from somewhere else. */
24280 if (this_insn == label)
24282 arm_ccfsm_state = 1;
24290 /* Succeed if the following insn is the target label.
24292 If return insns are used then the last insn in a function
24293 will be a barrier. */
24294 this_insn = next_nonnote_insn (this_insn);
24295 if (this_insn && this_insn == label)
24297 arm_ccfsm_state = 1;
24305 /* The AAPCS says that conditional calls should not be
24306 used since they make interworking inefficient (the
24307 linker can't transform BL<cond> into BLX). That's
24308 only a problem if the machine has BLX. */
24315 /* Succeed if the following insn is the target label, or
24316 if the following two insns are a barrier and the
24318 this_insn = next_nonnote_insn (this_insn);
24319 if (this_insn && BARRIER_P (this_insn))
24320 this_insn = next_nonnote_insn (this_insn);
24322 if (this_insn && this_insn == label
24323 && insns_skipped < max_insns_skipped)
24325 arm_ccfsm_state = 1;
24333 /* If this is an unconditional branch to the same label, succeed.
24334 If it is to another label, do nothing. If it is conditional,
24336 /* XXX Probably, the tests for SET and the PC are
24339 scanbody = PATTERN (this_insn);
24340 if (GET_CODE (scanbody) == SET
24341 && GET_CODE (SET_DEST (scanbody)) == PC)
24343 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
24344 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
24346 arm_ccfsm_state = 2;
24349 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
24352 /* Fail if a conditional return is undesirable (e.g. on a
24353 StrongARM), but still allow this if optimizing for size. */
24354 else if (GET_CODE (scanbody) == return_code
24355 && !use_return_insn (TRUE, NULL)
24358 else if (GET_CODE (scanbody) == return_code)
24360 arm_ccfsm_state = 2;
24363 else if (GET_CODE (scanbody) == PARALLEL)
24365 switch (get_attr_conds (this_insn))
24375 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
24380 /* Instructions using or affecting the condition codes make it
24382 scanbody = PATTERN (this_insn);
24383 if (!(GET_CODE (scanbody) == SET
24384 || GET_CODE (scanbody) == PARALLEL)
24385 || get_attr_conds (this_insn) != CONDS_NOCOND)
24395 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
24396 arm_target_label = CODE_LABEL_NUMBER (label);
24399 gcc_assert (seeking_return || arm_ccfsm_state == 2);
24401 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
24403 this_insn = next_nonnote_insn (this_insn);
24404 gcc_assert (!this_insn
24405 || (!BARRIER_P (this_insn)
24406 && !LABEL_P (this_insn)));
24410 /* Oh, dear! we ran off the end.. give up. */
24411 extract_constrain_insn_cached (insn);
24412 arm_ccfsm_state = 0;
24413 arm_target_insn = NULL;
24416 arm_target_insn = this_insn;
24419 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
24422 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
24424 if (reverse || then_not_else)
24425 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
24428 /* Restore recog_data (getting the attributes of other insns can
24429 destroy this array, but final.c assumes that it remains intact
24430 across this call. */
24431 extract_constrain_insn_cached (insn);
24435 /* Output IT instructions. */
24437 thumb2_asm_output_opcode (FILE * stream)
24442 if (arm_condexec_mask)
24444 for (n = 0; n < arm_condexec_masklen; n++)
24445 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
24447 asm_fprintf(stream, "i%s\t%s\n\t", buff,
24448 arm_condition_codes[arm_current_cc]);
24449 arm_condexec_mask = 0;
24453 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
24454 UNITS_PER_WORD bytes wide. */
24455 static unsigned int
24456 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
24459 && regno > PC_REGNUM
24460 && regno != FRAME_POINTER_REGNUM
24461 && regno != ARG_POINTER_REGNUM
24462 && !IS_VFP_REGNUM (regno))
24465 return ARM_NUM_REGS (mode);
24468 /* Implement TARGET_HARD_REGNO_MODE_OK. */
24470 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
24472 if (GET_MODE_CLASS (mode) == MODE_CC)
24473 return (regno == CC_REGNUM
24474 || (TARGET_HARD_FLOAT
24475 && regno == VFPCC_REGNUM));
24477 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
24481 /* For the Thumb we only allow values bigger than SImode in
24482 registers 0 - 6, so that there is always a second low
24483 register available to hold the upper part of the value.
24484 We probably we ought to ensure that the register is the
24485 start of an even numbered register pair. */
24486 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
24488 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
24490 if (mode == SFmode || mode == SImode)
24491 return VFP_REGNO_OK_FOR_SINGLE (regno);
24493 if (mode == DFmode)
24494 return VFP_REGNO_OK_FOR_DOUBLE (regno);
24496 if (mode == HFmode)
24497 return VFP_REGNO_OK_FOR_SINGLE (regno);
24499 /* VFP registers can hold HImode values. */
24500 if (mode == HImode)
24501 return VFP_REGNO_OK_FOR_SINGLE (regno);
24504 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
24505 || (VALID_NEON_QREG_MODE (mode)
24506 && NEON_REGNO_OK_FOR_QUAD (regno))
24507 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
24508 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
24509 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
24510 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
24511 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
24516 if (TARGET_REALLY_IWMMXT)
24518 if (IS_IWMMXT_GR_REGNUM (regno))
24519 return mode == SImode;
24521 if (IS_IWMMXT_REGNUM (regno))
24522 return VALID_IWMMXT_REG_MODE (mode);
24525 /* We allow almost any value to be stored in the general registers.
24526 Restrict doubleword quantities to even register pairs in ARM state
24527 so that we can use ldrd. Do not allow very large Neon structure
24528 opaque modes in general registers; they would use too many. */
24529 if (regno <= LAST_ARM_REGNUM)
24531 if (ARM_NUM_REGS (mode) > 4)
24537 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
24540 if (regno == FRAME_POINTER_REGNUM
24541 || regno == ARG_POINTER_REGNUM)
24542 /* We only allow integers in the fake hard registers. */
24543 return GET_MODE_CLASS (mode) == MODE_INT;
24548 /* Implement TARGET_MODES_TIEABLE_P. */
24551 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
24553 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
24556 /* We specifically want to allow elements of "structure" modes to
24557 be tieable to the structure. This more general condition allows
24558 other rarer situations too. */
24560 && (VALID_NEON_DREG_MODE (mode1)
24561 || VALID_NEON_QREG_MODE (mode1)
24562 || VALID_NEON_STRUCT_MODE (mode1))
24563 && (VALID_NEON_DREG_MODE (mode2)
24564 || VALID_NEON_QREG_MODE (mode2)
24565 || VALID_NEON_STRUCT_MODE (mode2)))
24571 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
24572 not used in arm mode. */
24575 arm_regno_class (int regno)
24577 if (regno == PC_REGNUM)
24582 if (regno == STACK_POINTER_REGNUM)
24584 if (regno == CC_REGNUM)
24591 if (TARGET_THUMB2 && regno < 8)
24594 if ( regno <= LAST_ARM_REGNUM
24595 || regno == FRAME_POINTER_REGNUM
24596 || regno == ARG_POINTER_REGNUM)
24597 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
24599 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
24600 return TARGET_THUMB2 ? CC_REG : NO_REGS;
24602 if (IS_VFP_REGNUM (regno))
24604 if (regno <= D7_VFP_REGNUM)
24605 return VFP_D0_D7_REGS;
24606 else if (regno <= LAST_LO_VFP_REGNUM)
24607 return VFP_LO_REGS;
24609 return VFP_HI_REGS;
24612 if (IS_IWMMXT_REGNUM (regno))
24613 return IWMMXT_REGS;
24615 if (IS_IWMMXT_GR_REGNUM (regno))
24616 return IWMMXT_GR_REGS;
24621 /* Handle a special case when computing the offset
24622 of an argument from the frame pointer. */
24624 arm_debugger_arg_offset (int value, rtx addr)
24628 /* We are only interested if dbxout_parms() failed to compute the offset. */
24632 /* We can only cope with the case where the address is held in a register. */
24636 /* If we are using the frame pointer to point at the argument, then
24637 an offset of 0 is correct. */
24638 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
24641 /* If we are using the stack pointer to point at the
24642 argument, then an offset of 0 is correct. */
24643 /* ??? Check this is consistent with thumb2 frame layout. */
24644 if ((TARGET_THUMB || !frame_pointer_needed)
24645 && REGNO (addr) == SP_REGNUM)
24648 /* Oh dear. The argument is pointed to by a register rather
24649 than being held in a register, or being stored at a known
24650 offset from the frame pointer. Since GDB only understands
24651 those two kinds of argument we must translate the address
24652 held in the register into an offset from the frame pointer.
24653 We do this by searching through the insns for the function
24654 looking to see where this register gets its value. If the
24655 register is initialized from the frame pointer plus an offset
24656 then we are in luck and we can continue, otherwise we give up.
24658 This code is exercised by producing debugging information
24659 for a function with arguments like this:
24661 double func (double a, double b, int c, double d) {return d;}
24663 Without this code the stab for parameter 'd' will be set to
24664 an offset of 0 from the frame pointer, rather than 8. */
24666 /* The if() statement says:
24668 If the insn is a normal instruction
24669 and if the insn is setting the value in a register
24670 and if the register being set is the register holding the address of the argument
24671 and if the address is computing by an addition
24672 that involves adding to a register
24673 which is the frame pointer
24678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24680 if ( NONJUMP_INSN_P (insn)
24681 && GET_CODE (PATTERN (insn)) == SET
24682 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
24683 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
24684 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
24685 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
24686 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
24689 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
24698 warning (0, "unable to compute real location of stacked parameter");
24699 value = 8; /* XXX magic hack */
24705 /* Implement TARGET_PROMOTED_TYPE. */
24708 arm_promoted_type (const_tree t)
24710 if (SCALAR_FLOAT_TYPE_P (t)
24711 && TYPE_PRECISION (t) == 16
24712 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24713 return float_type_node;
24717 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24718 This simply adds HFmode as a supported mode; even though we don't
24719 implement arithmetic on this type directly, it's supported by
24720 optabs conversions, much the way the double-word arithmetic is
24721 special-cased in the default hook. */
24724 arm_scalar_mode_supported_p (scalar_mode mode)
24726 if (mode == HFmode)
24727 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24728 else if (ALL_FIXED_POINT_MODE_P (mode))
24731 return default_scalar_mode_supported_p (mode);
24734 /* Set the value of FLT_EVAL_METHOD.
24735 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24737 0: evaluate all operations and constants, whose semantic type has at
24738 most the range and precision of type float, to the range and
24739 precision of float; evaluate all other operations and constants to
24740 the range and precision of the semantic type;
24742 N, where _FloatN is a supported interchange floating type
24743 evaluate all operations and constants, whose semantic type has at
24744 most the range and precision of _FloatN type, to the range and
24745 precision of the _FloatN type; evaluate all other operations and
24746 constants to the range and precision of the semantic type;
24748 If we have the ARMv8.2-A extensions then we support _Float16 in native
24749 precision, so we should set this to 16. Otherwise, we support the type,
24750 but want to evaluate expressions in float precision, so set this to
24753 static enum flt_eval_method
24754 arm_excess_precision (enum excess_precision_type type)
24758 case EXCESS_PRECISION_TYPE_FAST:
24759 case EXCESS_PRECISION_TYPE_STANDARD:
24760 /* We can calculate either in 16-bit range and precision or
24761 32-bit range and precision. Make that decision based on whether
24762 we have native support for the ARMv8.2-A 16-bit floating-point
24763 instructions or not. */
24764 return (TARGET_VFP_FP16INST
24765 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24766 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24767 case EXCESS_PRECISION_TYPE_IMPLICIT:
24768 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24770 gcc_unreachable ();
24772 return FLT_EVAL_METHOD_UNPREDICTABLE;
24776 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24777 _Float16 if we are using anything other than ieee format for 16-bit
24778 floating point. Otherwise, punt to the default implementation. */
24779 static opt_scalar_float_mode
24780 arm_floatn_mode (int n, bool extended)
24782 if (!extended && n == 16)
24784 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24786 return opt_scalar_float_mode ();
24789 return default_floatn_mode (n, extended);
24793 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24794 not to early-clobber SRC registers in the process.
24796 We assume that the operands described by SRC and DEST represent a
24797 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24798 number of components into which the copy has been decomposed. */
24800 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24804 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24805 || REGNO (operands[0]) < REGNO (operands[1]))
24807 for (i = 0; i < count; i++)
24809 operands[2 * i] = dest[i];
24810 operands[2 * i + 1] = src[i];
24815 for (i = 0; i < count; i++)
24817 operands[2 * i] = dest[count - i - 1];
24818 operands[2 * i + 1] = src[count - i - 1];
24823 /* Split operands into moves from op[1] + op[2] into op[0]. */
24826 neon_split_vcombine (rtx operands[3])
24828 unsigned int dest = REGNO (operands[0]);
24829 unsigned int src1 = REGNO (operands[1]);
24830 unsigned int src2 = REGNO (operands[2]);
24831 machine_mode halfmode = GET_MODE (operands[1]);
24832 unsigned int halfregs = REG_NREGS (operands[1]);
24833 rtx destlo, desthi;
24835 if (src1 == dest && src2 == dest + halfregs)
24837 /* No-op move. Can't split to nothing; emit something. */
24838 emit_note (NOTE_INSN_DELETED);
24842 /* Preserve register attributes for variable tracking. */
24843 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24844 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24845 GET_MODE_SIZE (halfmode));
24847 /* Special case of reversed high/low parts. Use VSWP. */
24848 if (src2 == dest && src1 == dest + halfregs)
24850 rtx x = gen_rtx_SET (destlo, operands[1]);
24851 rtx y = gen_rtx_SET (desthi, operands[2]);
24852 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24856 if (!reg_overlap_mentioned_p (operands[2], destlo))
24858 /* Try to avoid unnecessary moves if part of the result
24859 is in the right place already. */
24861 emit_move_insn (destlo, operands[1]);
24862 if (src2 != dest + halfregs)
24863 emit_move_insn (desthi, operands[2]);
24867 if (src2 != dest + halfregs)
24868 emit_move_insn (desthi, operands[2]);
24870 emit_move_insn (destlo, operands[1]);
24874 /* Return the number (counting from 0) of
24875 the least significant set bit in MASK. */
24878 number_of_first_bit_set (unsigned mask)
24880 return ctz_hwi (mask);
24883 /* Like emit_multi_reg_push, but allowing for a different set of
24884 registers to be described as saved. MASK is the set of registers
24885 to be saved; REAL_REGS is the set of registers to be described as
24886 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24889 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24891 unsigned long regno;
24892 rtx par[10], tmp, reg;
24896 /* Build the parallel of the registers actually being stored. */
24897 for (i = 0; mask; ++i, mask &= mask - 1)
24899 regno = ctz_hwi (mask);
24900 reg = gen_rtx_REG (SImode, regno);
24903 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24905 tmp = gen_rtx_USE (VOIDmode, reg);
24910 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24911 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24912 tmp = gen_frame_mem (BLKmode, tmp);
24913 tmp = gen_rtx_SET (tmp, par[0]);
24916 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24917 insn = emit_insn (tmp);
24919 /* Always build the stack adjustment note for unwind info. */
24920 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24921 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24924 /* Build the parallel of the registers recorded as saved for unwind. */
24925 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24927 regno = ctz_hwi (real_regs);
24928 reg = gen_rtx_REG (SImode, regno);
24930 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24931 tmp = gen_frame_mem (SImode, tmp);
24932 tmp = gen_rtx_SET (tmp, reg);
24933 RTX_FRAME_RELATED_P (tmp) = 1;
24941 RTX_FRAME_RELATED_P (par[0]) = 1;
24942 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24945 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24950 /* Emit code to push or pop registers to or from the stack. F is the
24951 assembly file. MASK is the registers to pop. */
24953 thumb_pop (FILE *f, unsigned long mask)
24956 int lo_mask = mask & 0xFF;
24960 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24962 /* Special case. Do not generate a POP PC statement here, do it in
24964 thumb_exit (f, -1);
24968 fprintf (f, "\tpop\t{");
24970 /* Look at the low registers first. */
24971 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24975 asm_fprintf (f, "%r", regno);
24977 if ((lo_mask & ~1) != 0)
24982 if (mask & (1 << PC_REGNUM))
24984 /* Catch popping the PC. */
24985 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24986 || IS_CMSE_ENTRY (arm_current_func_type ()))
24988 /* The PC is never poped directly, instead
24989 it is popped into r3 and then BX is used. */
24990 fprintf (f, "}\n");
24992 thumb_exit (f, -1);
25001 asm_fprintf (f, "%r", PC_REGNUM);
25005 fprintf (f, "}\n");
25008 /* Generate code to return from a thumb function.
25009 If 'reg_containing_return_addr' is -1, then the return address is
25010 actually on the stack, at the stack pointer.
25012 Note: do not forget to update length attribute of corresponding insn pattern
25013 when changing assembly output (eg. length attribute of epilogue_insns when
25014 updating Armv8-M Baseline Security Extensions register clearing
25017 thumb_exit (FILE *f, int reg_containing_return_addr)
25019 unsigned regs_available_for_popping;
25020 unsigned regs_to_pop;
25022 unsigned available;
25026 int restore_a4 = FALSE;
25028 /* Compute the registers we need to pop. */
25032 if (reg_containing_return_addr == -1)
25034 regs_to_pop |= 1 << LR_REGNUM;
25038 if (TARGET_BACKTRACE)
25040 /* Restore the (ARM) frame pointer and stack pointer. */
25041 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25045 /* If there is nothing to pop then just emit the BX instruction and
25047 if (pops_needed == 0)
25049 if (crtl->calls_eh_return)
25050 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25052 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25054 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25055 reg_containing_return_addr);
25056 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25059 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25062 /* Otherwise if we are not supporting interworking and we have not created
25063 a backtrace structure and the function was not entered in ARM mode then
25064 just pop the return address straight into the PC. */
25065 else if (!TARGET_INTERWORK
25066 && !TARGET_BACKTRACE
25067 && !is_called_in_ARM_mode (current_function_decl)
25068 && !crtl->calls_eh_return
25069 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25071 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25075 /* Find out how many of the (return) argument registers we can corrupt. */
25076 regs_available_for_popping = 0;
25078 /* If returning via __builtin_eh_return, the bottom three registers
25079 all contain information needed for the return. */
25080 if (crtl->calls_eh_return)
25084 /* If we can deduce the registers used from the function's
25085 return value. This is more reliable that examining
25086 df_regs_ever_live_p () because that will be set if the register is
25087 ever used in the function, not just if the register is used
25088 to hold a return value. */
25090 if (crtl->return_rtx != 0)
25091 mode = GET_MODE (crtl->return_rtx);
25093 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25095 size = GET_MODE_SIZE (mode);
25099 /* In a void function we can use any argument register.
25100 In a function that returns a structure on the stack
25101 we can use the second and third argument registers. */
25102 if (mode == VOIDmode)
25103 regs_available_for_popping =
25104 (1 << ARG_REGISTER (1))
25105 | (1 << ARG_REGISTER (2))
25106 | (1 << ARG_REGISTER (3));
25108 regs_available_for_popping =
25109 (1 << ARG_REGISTER (2))
25110 | (1 << ARG_REGISTER (3));
25112 else if (size <= 4)
25113 regs_available_for_popping =
25114 (1 << ARG_REGISTER (2))
25115 | (1 << ARG_REGISTER (3));
25116 else if (size <= 8)
25117 regs_available_for_popping =
25118 (1 << ARG_REGISTER (3));
25121 /* Match registers to be popped with registers into which we pop them. */
25122 for (available = regs_available_for_popping,
25123 required = regs_to_pop;
25124 required != 0 && available != 0;
25125 available &= ~(available & - available),
25126 required &= ~(required & - required))
25129 /* If we have any popping registers left over, remove them. */
25131 regs_available_for_popping &= ~available;
25133 /* Otherwise if we need another popping register we can use
25134 the fourth argument register. */
25135 else if (pops_needed)
25137 /* If we have not found any free argument registers and
25138 reg a4 contains the return address, we must move it. */
25139 if (regs_available_for_popping == 0
25140 && reg_containing_return_addr == LAST_ARG_REGNUM)
25142 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25143 reg_containing_return_addr = LR_REGNUM;
25145 else if (size > 12)
25147 /* Register a4 is being used to hold part of the return value,
25148 but we have dire need of a free, low register. */
25151 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25154 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25156 /* The fourth argument register is available. */
25157 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25163 /* Pop as many registers as we can. */
25164 thumb_pop (f, regs_available_for_popping);
25166 /* Process the registers we popped. */
25167 if (reg_containing_return_addr == -1)
25169 /* The return address was popped into the lowest numbered register. */
25170 regs_to_pop &= ~(1 << LR_REGNUM);
25172 reg_containing_return_addr =
25173 number_of_first_bit_set (regs_available_for_popping);
25175 /* Remove this register for the mask of available registers, so that
25176 the return address will not be corrupted by further pops. */
25177 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25180 /* If we popped other registers then handle them here. */
25181 if (regs_available_for_popping)
25185 /* Work out which register currently contains the frame pointer. */
25186 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25188 /* Move it into the correct place. */
25189 asm_fprintf (f, "\tmov\t%r, %r\n",
25190 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25192 /* (Temporarily) remove it from the mask of popped registers. */
25193 regs_available_for_popping &= ~(1 << frame_pointer);
25194 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25196 if (regs_available_for_popping)
25200 /* We popped the stack pointer as well,
25201 find the register that contains it. */
25202 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25204 /* Move it into the stack register. */
25205 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25207 /* At this point we have popped all necessary registers, so
25208 do not worry about restoring regs_available_for_popping
25209 to its correct value:
25211 assert (pops_needed == 0)
25212 assert (regs_available_for_popping == (1 << frame_pointer))
25213 assert (regs_to_pop == (1 << STACK_POINTER)) */
25217 /* Since we have just move the popped value into the frame
25218 pointer, the popping register is available for reuse, and
25219 we know that we still have the stack pointer left to pop. */
25220 regs_available_for_popping |= (1 << frame_pointer);
25224 /* If we still have registers left on the stack, but we no longer have
25225 any registers into which we can pop them, then we must move the return
25226 address into the link register and make available the register that
25228 if (regs_available_for_popping == 0 && pops_needed > 0)
25230 regs_available_for_popping |= 1 << reg_containing_return_addr;
25232 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
25233 reg_containing_return_addr);
25235 reg_containing_return_addr = LR_REGNUM;
25238 /* If we have registers left on the stack then pop some more.
25239 We know that at most we will want to pop FP and SP. */
25240 if (pops_needed > 0)
25245 thumb_pop (f, regs_available_for_popping);
25247 /* We have popped either FP or SP.
25248 Move whichever one it is into the correct register. */
25249 popped_into = number_of_first_bit_set (regs_available_for_popping);
25250 move_to = number_of_first_bit_set (regs_to_pop);
25252 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
25256 /* If we still have not popped everything then we must have only
25257 had one register available to us and we are now popping the SP. */
25258 if (pops_needed > 0)
25262 thumb_pop (f, regs_available_for_popping);
25264 popped_into = number_of_first_bit_set (regs_available_for_popping);
25266 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
25268 assert (regs_to_pop == (1 << STACK_POINTER))
25269 assert (pops_needed == 1)
25273 /* If necessary restore the a4 register. */
25276 if (reg_containing_return_addr != LR_REGNUM)
25278 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25279 reg_containing_return_addr = LR_REGNUM;
25282 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
25285 if (crtl->calls_eh_return)
25286 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25288 /* Return to caller. */
25289 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25291 /* This is for the cases where LR is not being used to contain the return
25292 address. It may therefore contain information that we might not want
25293 to leak, hence it must be cleared. The value in R0 will never be a
25294 secret at this point, so it is safe to use it, see the clearing code
25295 in 'cmse_nonsecure_entry_clear_before_return'. */
25296 if (reg_containing_return_addr != LR_REGNUM)
25297 asm_fprintf (f, "\tmov\tlr, r0\n");
25299 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
25300 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25303 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25306 /* Scan INSN just before assembler is output for it.
25307 For Thumb-1, we track the status of the condition codes; this
25308 information is used in the cbranchsi4_insn pattern. */
25310 thumb1_final_prescan_insn (rtx_insn *insn)
25312 if (flag_print_asm_name)
25313 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25314 INSN_ADDRESSES (INSN_UID (insn)));
25315 /* Don't overwrite the previous setter when we get to a cbranch. */
25316 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25318 enum attr_conds conds;
25320 if (cfun->machine->thumb1_cc_insn)
25322 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25323 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25326 conds = get_attr_conds (insn);
25327 if (conds == CONDS_SET)
25329 rtx set = single_set (insn);
25330 cfun->machine->thumb1_cc_insn = insn;
25331 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25332 cfun->machine->thumb1_cc_op1 = const0_rtx;
25333 cfun->machine->thumb1_cc_mode = CC_NZmode;
25334 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25336 rtx src1 = XEXP (SET_SRC (set), 1);
25337 if (src1 == const0_rtx)
25338 cfun->machine->thumb1_cc_mode = CCmode;
25340 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25342 /* Record the src register operand instead of dest because
25343 cprop_hardreg pass propagates src. */
25344 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25347 else if (conds != CONDS_NOCOND)
25348 cfun->machine->thumb1_cc_insn = NULL_RTX;
25351 /* Check if unexpected far jump is used. */
25352 if (cfun->machine->lr_save_eliminated
25353 && get_attr_far_jump (insn) == FAR_JUMP_YES)
25354 internal_error("Unexpected thumb1 far jump");
25358 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25360 unsigned HOST_WIDE_INT mask = 0xff;
25363 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25364 if (val == 0) /* XXX */
25367 for (i = 0; i < 25; i++)
25368 if ((val & (mask << i)) == val)
25374 /* Returns nonzero if the current function contains,
25375 or might contain a far jump. */
25377 thumb_far_jump_used_p (void)
25380 bool far_jump = false;
25381 unsigned int func_size = 0;
25383 /* If we have already decided that far jumps may be used,
25384 do not bother checking again, and always return true even if
25385 it turns out that they are not being used. Once we have made
25386 the decision that far jumps are present (and that hence the link
25387 register will be pushed onto the stack) we cannot go back on it. */
25388 if (cfun->machine->far_jump_used)
25391 /* If this function is not being called from the prologue/epilogue
25392 generation code then it must be being called from the
25393 INITIAL_ELIMINATION_OFFSET macro. */
25394 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25396 /* In this case we know that we are being asked about the elimination
25397 of the arg pointer register. If that register is not being used,
25398 then there are no arguments on the stack, and we do not have to
25399 worry that a far jump might force the prologue to push the link
25400 register, changing the stack offsets. In this case we can just
25401 return false, since the presence of far jumps in the function will
25402 not affect stack offsets.
25404 If the arg pointer is live (or if it was live, but has now been
25405 eliminated and so set to dead) then we do have to test to see if
25406 the function might contain a far jump. This test can lead to some
25407 false negatives, since before reload is completed, then length of
25408 branch instructions is not known, so gcc defaults to returning their
25409 longest length, which in turn sets the far jump attribute to true.
25411 A false negative will not result in bad code being generated, but it
25412 will result in a needless push and pop of the link register. We
25413 hope that this does not occur too often.
25415 If we need doubleword stack alignment this could affect the other
25416 elimination offsets so we can't risk getting it wrong. */
25417 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25418 cfun->machine->arg_pointer_live = 1;
25419 else if (!cfun->machine->arg_pointer_live)
25423 /* We should not change far_jump_used during or after reload, as there is
25424 no chance to change stack frame layout. */
25425 if (reload_in_progress || reload_completed)
25428 /* Check to see if the function contains a branch
25429 insn with the far jump attribute set. */
25430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25432 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25436 func_size += get_attr_length (insn);
25439 /* Attribute far_jump will always be true for thumb1 before
25440 shorten_branch pass. So checking far_jump attribute before
25441 shorten_branch isn't much useful.
25443 Following heuristic tries to estimate more accurately if a far jump
25444 may finally be used. The heuristic is very conservative as there is
25445 no chance to roll-back the decision of not to use far jump.
25447 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25448 2-byte insn is associated with a 4 byte constant pool. Using
25449 function size 2048/3 as the threshold is conservative enough. */
25452 if ((func_size * 3) >= 2048)
25454 /* Record the fact that we have decided that
25455 the function does use far jumps. */
25456 cfun->machine->far_jump_used = 1;
25464 /* Return nonzero if FUNC must be entered in ARM mode. */
25466 is_called_in_ARM_mode (tree func)
25468 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25470 /* Ignore the problem about functions whose address is taken. */
25471 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25475 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25481 /* Given the stack offsets and register mask in OFFSETS, decide how
25482 many additional registers to push instead of subtracting a constant
25483 from SP. For epilogues the principle is the same except we use pop.
25484 FOR_PROLOGUE indicates which we're generating. */
25486 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25488 HOST_WIDE_INT amount;
25489 unsigned long live_regs_mask = offsets->saved_regs_mask;
25490 /* Extract a mask of the ones we can give to the Thumb's push/pop
25492 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25493 /* Then count how many other high registers will need to be pushed. */
25494 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25495 int n_free, reg_base, size;
25497 if (!for_prologue && frame_pointer_needed)
25498 amount = offsets->locals_base - offsets->saved_regs;
25500 amount = offsets->outgoing_args - offsets->saved_regs;
25502 /* If the stack frame size is 512 exactly, we can save one load
25503 instruction, which should make this a win even when optimizing
25505 if (!optimize_size && amount != 512)
25508 /* Can't do this if there are high registers to push. */
25509 if (high_regs_pushed != 0)
25512 /* Shouldn't do it in the prologue if no registers would normally
25513 be pushed at all. In the epilogue, also allow it if we'll have
25514 a pop insn for the PC. */
25517 || TARGET_BACKTRACE
25518 || (live_regs_mask & 1 << LR_REGNUM) == 0
25519 || TARGET_INTERWORK
25520 || crtl->args.pretend_args_size != 0))
25523 /* Don't do this if thumb_expand_prologue wants to emit instructions
25524 between the push and the stack frame allocation. */
25526 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
25527 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
25534 size = arm_size_return_regs ();
25535 reg_base = ARM_NUM_INTS (size);
25536 live_regs_mask >>= reg_base;
25539 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
25540 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
25542 live_regs_mask >>= 1;
25548 gcc_assert (amount / 4 * 4 == amount);
25550 if (amount >= 512 && (amount - n_free * 4) < 512)
25551 return (amount - 508) / 4;
25552 if (amount <= n_free * 4)
25557 /* The bits which aren't usefully expanded as rtl. */
25559 thumb1_unexpanded_epilogue (void)
25561 arm_stack_offsets *offsets;
25563 unsigned long live_regs_mask = 0;
25564 int high_regs_pushed = 0;
25566 int had_to_push_lr;
25569 if (cfun->machine->return_used_this_function != 0)
25572 if (IS_NAKED (arm_current_func_type ()))
25575 offsets = arm_get_frame_offsets ();
25576 live_regs_mask = offsets->saved_regs_mask;
25577 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25579 /* If we can deduce the registers used from the function's return value.
25580 This is more reliable that examining df_regs_ever_live_p () because that
25581 will be set if the register is ever used in the function, not just if
25582 the register is used to hold a return value. */
25583 size = arm_size_return_regs ();
25585 extra_pop = thumb1_extra_regs_pushed (offsets, false);
25588 unsigned long extra_mask = (1 << extra_pop) - 1;
25589 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
25592 /* The prolog may have pushed some high registers to use as
25593 work registers. e.g. the testsuite file:
25594 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
25595 compiles to produce:
25596 push {r4, r5, r6, r7, lr}
25600 as part of the prolog. We have to undo that pushing here. */
25602 if (high_regs_pushed)
25604 unsigned long mask = live_regs_mask & 0xff;
25607 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
25610 /* Oh dear! We have no low registers into which we can pop
25613 ("no low registers available for popping high registers");
25615 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25616 if (live_regs_mask & (1 << next_hi_reg))
25619 while (high_regs_pushed)
25621 /* Find lo register(s) into which the high register(s) can
25623 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25625 if (mask & (1 << regno))
25626 high_regs_pushed--;
25627 if (high_regs_pushed == 0)
25631 if (high_regs_pushed == 0 && regno >= 0)
25632 mask &= ~((1 << regno) - 1);
25634 /* Pop the values into the low register(s). */
25635 thumb_pop (asm_out_file, mask);
25637 /* Move the value(s) into the high registers. */
25638 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
25640 if (mask & (1 << regno))
25642 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
25645 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
25647 if (live_regs_mask & (1 << next_hi_reg))
25652 live_regs_mask &= ~0x0f00;
25655 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
25656 live_regs_mask &= 0xff;
25658 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
25660 /* Pop the return address into the PC. */
25661 if (had_to_push_lr)
25662 live_regs_mask |= 1 << PC_REGNUM;
25664 /* Either no argument registers were pushed or a backtrace
25665 structure was created which includes an adjusted stack
25666 pointer, so just pop everything. */
25667 if (live_regs_mask)
25668 thumb_pop (asm_out_file, live_regs_mask);
25670 /* We have either just popped the return address into the
25671 PC or it is was kept in LR for the entire function.
25672 Note that thumb_pop has already called thumb_exit if the
25673 PC was in the list. */
25674 if (!had_to_push_lr)
25675 thumb_exit (asm_out_file, LR_REGNUM);
25679 /* Pop everything but the return address. */
25680 if (live_regs_mask)
25681 thumb_pop (asm_out_file, live_regs_mask);
25683 if (had_to_push_lr)
25687 /* We have no free low regs, so save one. */
25688 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
25692 /* Get the return address into a temporary register. */
25693 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
25697 /* Move the return address to lr. */
25698 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
25700 /* Restore the low register. */
25701 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25706 regno = LAST_ARG_REGNUM;
25711 /* Remove the argument registers that were pushed onto the stack. */
25712 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25713 SP_REGNUM, SP_REGNUM,
25714 crtl->args.pretend_args_size);
25716 thumb_exit (asm_out_file, regno);
25722 /* Functions to save and restore machine-specific function data. */
25723 static struct machine_function *
25724 arm_init_machine_status (void)
25726 struct machine_function *machine;
25727 machine = ggc_cleared_alloc<machine_function> ();
25729 #if ARM_FT_UNKNOWN != 0
25730 machine->func_type = ARM_FT_UNKNOWN;
25732 machine->static_chain_stack_bytes = -1;
25736 /* Return an RTX indicating where the return address to the
25737 calling function can be found. */
25739 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25744 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25747 /* Do anything needed before RTL is emitted for each function. */
25749 arm_init_expanders (void)
25751 /* Arrange to initialize and mark the machine per-function status. */
25752 init_machine_status = arm_init_machine_status;
25754 /* This is to stop the combine pass optimizing away the alignment
25755 adjustment of va_arg. */
25756 /* ??? It is claimed that this should not be necessary. */
25758 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25761 /* Check that FUNC is called with a different mode. */
25764 arm_change_mode_p (tree func)
25766 if (TREE_CODE (func) != FUNCTION_DECL)
25769 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25772 callee_tree = target_option_default_node;
25774 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25775 int flags = callee_opts->x_target_flags;
25777 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25780 /* Like arm_compute_initial_elimination offset. Simpler because there
25781 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25782 to point at the base of the local variables after static stack
25783 space for a function has been allocated. */
25786 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25788 arm_stack_offsets *offsets;
25790 offsets = arm_get_frame_offsets ();
25794 case ARG_POINTER_REGNUM:
25797 case STACK_POINTER_REGNUM:
25798 return offsets->outgoing_args - offsets->saved_args;
25800 case FRAME_POINTER_REGNUM:
25801 return offsets->soft_frame - offsets->saved_args;
25803 case ARM_HARD_FRAME_POINTER_REGNUM:
25804 return offsets->saved_regs - offsets->saved_args;
25806 case THUMB_HARD_FRAME_POINTER_REGNUM:
25807 return offsets->locals_base - offsets->saved_args;
25810 gcc_unreachable ();
25814 case FRAME_POINTER_REGNUM:
25817 case STACK_POINTER_REGNUM:
25818 return offsets->outgoing_args - offsets->soft_frame;
25820 case ARM_HARD_FRAME_POINTER_REGNUM:
25821 return offsets->saved_regs - offsets->soft_frame;
25823 case THUMB_HARD_FRAME_POINTER_REGNUM:
25824 return offsets->locals_base - offsets->soft_frame;
25827 gcc_unreachable ();
25832 gcc_unreachable ();
25836 /* Generate the function's prologue. */
25839 thumb1_expand_prologue (void)
25843 HOST_WIDE_INT amount;
25844 HOST_WIDE_INT size;
25845 arm_stack_offsets *offsets;
25846 unsigned long func_type;
25848 unsigned long live_regs_mask;
25849 unsigned long l_mask;
25850 unsigned high_regs_pushed = 0;
25851 bool lr_needs_saving;
25853 func_type = arm_current_func_type ();
25855 /* Naked functions don't have prologues. */
25856 if (IS_NAKED (func_type))
25858 if (flag_stack_usage_info)
25859 current_function_static_stack_size = 0;
25863 if (IS_INTERRUPT (func_type))
25865 error ("interrupt Service Routines cannot be coded in Thumb mode");
25869 if (is_called_in_ARM_mode (current_function_decl))
25870 emit_insn (gen_prologue_thumb1_interwork ());
25872 offsets = arm_get_frame_offsets ();
25873 live_regs_mask = offsets->saved_regs_mask;
25874 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25876 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25877 l_mask = live_regs_mask & 0x40ff;
25878 /* Then count how many other high registers will need to be pushed. */
25879 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25881 if (crtl->args.pretend_args_size)
25883 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25885 if (cfun->machine->uses_anonymous_args)
25887 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25888 unsigned long mask;
25890 mask = 1ul << (LAST_ARG_REGNUM + 1);
25891 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25893 insn = thumb1_emit_multi_reg_push (mask, 0);
25897 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25898 stack_pointer_rtx, x));
25900 RTX_FRAME_RELATED_P (insn) = 1;
25903 if (TARGET_BACKTRACE)
25905 HOST_WIDE_INT offset = 0;
25906 unsigned work_register;
25907 rtx work_reg, x, arm_hfp_rtx;
25909 /* We have been asked to create a stack backtrace structure.
25910 The code looks like this:
25914 0 sub SP, #16 Reserve space for 4 registers.
25915 2 push {R7} Push low registers.
25916 4 add R7, SP, #20 Get the stack pointer before the push.
25917 6 str R7, [SP, #8] Store the stack pointer
25918 (before reserving the space).
25919 8 mov R7, PC Get hold of the start of this code + 12.
25920 10 str R7, [SP, #16] Store it.
25921 12 mov R7, FP Get hold of the current frame pointer.
25922 14 str R7, [SP, #4] Store it.
25923 16 mov R7, LR Get hold of the current return address.
25924 18 str R7, [SP, #12] Store it.
25925 20 add R7, SP, #16 Point at the start of the
25926 backtrace structure.
25927 22 mov FP, R7 Put this value into the frame pointer. */
25929 work_register = thumb_find_work_register (live_regs_mask);
25930 work_reg = gen_rtx_REG (SImode, work_register);
25931 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25933 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25934 stack_pointer_rtx, GEN_INT (-16)));
25935 RTX_FRAME_RELATED_P (insn) = 1;
25939 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25940 RTX_FRAME_RELATED_P (insn) = 1;
25941 lr_needs_saving = false;
25943 offset = bit_count (l_mask) * UNITS_PER_WORD;
25946 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25947 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25949 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25950 x = gen_frame_mem (SImode, x);
25951 emit_move_insn (x, work_reg);
25953 /* Make sure that the instruction fetching the PC is in the right place
25954 to calculate "start of backtrace creation code + 12". */
25955 /* ??? The stores using the common WORK_REG ought to be enough to
25956 prevent the scheduler from doing anything weird. Failing that
25957 we could always move all of the following into an UNSPEC_VOLATILE. */
25960 x = gen_rtx_REG (SImode, PC_REGNUM);
25961 emit_move_insn (work_reg, x);
25963 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25964 x = gen_frame_mem (SImode, x);
25965 emit_move_insn (x, work_reg);
25967 emit_move_insn (work_reg, arm_hfp_rtx);
25969 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25970 x = gen_frame_mem (SImode, x);
25971 emit_move_insn (x, work_reg);
25975 emit_move_insn (work_reg, arm_hfp_rtx);
25977 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25978 x = gen_frame_mem (SImode, x);
25979 emit_move_insn (x, work_reg);
25981 x = gen_rtx_REG (SImode, PC_REGNUM);
25982 emit_move_insn (work_reg, x);
25984 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25985 x = gen_frame_mem (SImode, x);
25986 emit_move_insn (x, work_reg);
25989 x = gen_rtx_REG (SImode, LR_REGNUM);
25990 emit_move_insn (work_reg, x);
25992 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25993 x = gen_frame_mem (SImode, x);
25994 emit_move_insn (x, work_reg);
25996 x = GEN_INT (offset + 12);
25997 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25999 emit_move_insn (arm_hfp_rtx, work_reg);
26001 /* Optimization: If we are not pushing any low registers but we are going
26002 to push some high registers then delay our first push. This will just
26003 be a push of LR and we can combine it with the push of the first high
26005 else if ((l_mask & 0xff) != 0
26006 || (high_regs_pushed == 0 && lr_needs_saving))
26008 unsigned long mask = l_mask;
26009 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26010 insn = thumb1_emit_multi_reg_push (mask, mask);
26011 RTX_FRAME_RELATED_P (insn) = 1;
26012 lr_needs_saving = false;
26015 if (high_regs_pushed)
26017 unsigned pushable_regs;
26018 unsigned next_hi_reg;
26019 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26020 : crtl->args.info.nregs;
26021 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26023 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26024 if (live_regs_mask & (1 << next_hi_reg))
26027 /* Here we need to mask out registers used for passing arguments
26028 even if they can be pushed. This is to avoid using them to
26029 stash the high registers. Such kind of stash may clobber the
26030 use of arguments. */
26031 pushable_regs = l_mask & (~arg_regs_mask);
26032 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26034 /* Normally, LR can be used as a scratch register once it has been
26035 saved; but if the function examines its own return address then
26036 the value is still live and we need to avoid using it. */
26037 bool return_addr_live
26038 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26041 if (lr_needs_saving || return_addr_live)
26042 pushable_regs &= ~(1 << LR_REGNUM);
26044 if (pushable_regs == 0)
26045 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26047 while (high_regs_pushed > 0)
26049 unsigned long real_regs_mask = 0;
26050 unsigned long push_mask = 0;
26052 for (regno = LR_REGNUM; regno >= 0; regno --)
26054 if (pushable_regs & (1 << regno))
26056 emit_move_insn (gen_rtx_REG (SImode, regno),
26057 gen_rtx_REG (SImode, next_hi_reg));
26059 high_regs_pushed --;
26060 real_regs_mask |= (1 << next_hi_reg);
26061 push_mask |= (1 << regno);
26063 if (high_regs_pushed)
26065 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26067 if (live_regs_mask & (1 << next_hi_reg))
26075 /* If we had to find a work register and we have not yet
26076 saved the LR then add it to the list of regs to push. */
26077 if (lr_needs_saving)
26079 push_mask |= 1 << LR_REGNUM;
26080 real_regs_mask |= 1 << LR_REGNUM;
26081 lr_needs_saving = false;
26082 /* If the return address is not live at this point, we
26083 can add LR to the list of registers that we can use
26085 if (!return_addr_live)
26086 pushable_regs |= 1 << LR_REGNUM;
26089 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26090 RTX_FRAME_RELATED_P (insn) = 1;
26094 /* Load the pic register before setting the frame pointer,
26095 so we can use r7 as a temporary work register. */
26096 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26097 arm_load_pic_register (live_regs_mask, NULL_RTX);
26099 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26100 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26101 stack_pointer_rtx);
26103 size = offsets->outgoing_args - offsets->saved_args;
26104 if (flag_stack_usage_info)
26105 current_function_static_stack_size = size;
26107 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26108 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26109 || flag_stack_clash_protection)
26111 sorry ("%<-fstack-check=specific%> for Thumb-1");
26113 amount = offsets->outgoing_args - offsets->saved_regs;
26114 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26119 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26120 GEN_INT (- amount)));
26121 RTX_FRAME_RELATED_P (insn) = 1;
26127 /* The stack decrement is too big for an immediate value in a single
26128 insn. In theory we could issue multiple subtracts, but after
26129 three of them it becomes more space efficient to place the full
26130 value in the constant pool and load into a register. (Also the
26131 ARM debugger really likes to see only one stack decrement per
26132 function). So instead we look for a scratch register into which
26133 we can load the decrement, and then we subtract this from the
26134 stack pointer. Unfortunately on the thumb the only available
26135 scratch registers are the argument registers, and we cannot use
26136 these as they may hold arguments to the function. Instead we
26137 attempt to locate a call preserved register which is used by this
26138 function. If we can find one, then we know that it will have
26139 been pushed at the start of the prologue and so we can corrupt
26141 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26142 if (live_regs_mask & (1 << regno))
26145 gcc_assert(regno <= LAST_LO_REGNUM);
26147 reg = gen_rtx_REG (SImode, regno);
26149 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26151 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26152 stack_pointer_rtx, reg));
26154 dwarf = gen_rtx_SET (stack_pointer_rtx,
26155 plus_constant (Pmode, stack_pointer_rtx,
26157 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26158 RTX_FRAME_RELATED_P (insn) = 1;
26162 if (frame_pointer_needed)
26163 thumb_set_frame_pointer (offsets);
26165 /* If we are profiling, make sure no instructions are scheduled before
26166 the call to mcount. Similarly if the user has requested no
26167 scheduling in the prolog. Similarly if we want non-call exceptions
26168 using the EABI unwinder, to prevent faulting instructions from being
26169 swapped with a stack adjustment. */
26170 if (crtl->profile || !TARGET_SCHED_PROLOG
26171 || (arm_except_unwind_info (&global_options) == UI_TARGET
26172 && cfun->can_throw_non_call_exceptions))
26173 emit_insn (gen_blockage ());
26175 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26176 if (live_regs_mask & 0xff)
26177 cfun->machine->lr_save_eliminated = 0;
26180 /* Clear caller saved registers not used to pass return values and leaked
26181 condition flags before exiting a cmse_nonsecure_entry function. */
26184 cmse_nonsecure_entry_clear_before_return (void)
26186 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
26187 uint32_t padding_bits_to_clear = 0;
26188 auto_sbitmap to_clear_bitmap (maxregno + 1);
26189 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
26192 bitmap_clear (to_clear_bitmap);
26193 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
26194 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
26196 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
26198 if (TARGET_HARD_FLOAT)
26200 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
26202 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
26204 /* Make sure we don't clear the two scratch registers used to clear the
26205 relevant FPSCR bits in output_return_instruction. */
26206 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
26207 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
26208 emit_use (gen_rtx_REG (SImode, 4));
26209 bitmap_clear_bit (to_clear_bitmap, 4);
26212 /* If the user has defined registers to be caller saved, these are no longer
26213 restored by the function before returning and must thus be cleared for
26214 security purposes. */
26215 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
26217 /* We do not touch registers that can be used to pass arguments as per
26218 the AAPCS, since these should never be made callee-saved by user
26220 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
26222 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
26224 if (call_used_or_fixed_reg_p (regno))
26225 bitmap_set_bit (to_clear_bitmap, regno);
26228 /* Make sure we do not clear the registers used to return the result in. */
26229 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
26230 if (!VOID_TYPE_P (result_type))
26232 uint64_t to_clear_return_mask;
26233 result_rtl = arm_function_value (result_type, current_function_decl, 0);
26235 /* No need to check that we return in registers, because we don't
26236 support returning on stack yet. */
26237 gcc_assert (REG_P (result_rtl));
26238 to_clear_return_mask
26239 = compute_not_to_clear_mask (result_type, result_rtl, 0,
26240 &padding_bits_to_clear);
26241 if (to_clear_return_mask)
26243 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
26244 for (regno = R0_REGNUM; regno <= maxregno; regno++)
26246 if (to_clear_return_mask & (1ULL << regno))
26247 bitmap_clear_bit (to_clear_bitmap, regno);
26252 if (padding_bits_to_clear != 0)
26254 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
26255 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
26257 /* Padding_bits_to_clear is not 0 so we know we are dealing with
26258 returning a composite type, which only uses r0. Let's make sure that
26259 r1-r3 is cleared too. */
26260 bitmap_clear (to_clear_arg_regs_bitmap);
26261 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
26262 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
26265 /* Clear full registers that leak before returning. */
26266 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
26267 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
26268 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
26272 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26273 POP instruction can be generated. LR should be replaced by PC. All
26274 the checks required are already done by USE_RETURN_INSN (). Hence,
26275 all we really need to check here is if single register is to be
26276 returned, or multiple register return. */
26278 thumb2_expand_return (bool simple_return)
26281 unsigned long saved_regs_mask;
26282 arm_stack_offsets *offsets;
26284 offsets = arm_get_frame_offsets ();
26285 saved_regs_mask = offsets->saved_regs_mask;
26287 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26288 if (saved_regs_mask & (1 << i))
26291 if (!simple_return && saved_regs_mask)
26293 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
26294 functions or adapt code to handle according to ACLE. This path should
26295 not be reachable for cmse_nonsecure_entry functions though we prefer
26296 to assert it for now to ensure that future code changes do not silently
26297 change this behavior. */
26298 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
26301 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26302 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26303 rtx addr = gen_rtx_MEM (SImode,
26304 gen_rtx_POST_INC (SImode,
26305 stack_pointer_rtx));
26306 set_mem_alias_set (addr, get_frame_alias_set ());
26307 XVECEXP (par, 0, 0) = ret_rtx;
26308 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
26309 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26310 emit_jump_insn (par);
26314 saved_regs_mask &= ~ (1 << LR_REGNUM);
26315 saved_regs_mask |= (1 << PC_REGNUM);
26316 arm_emit_multi_reg_pop (saved_regs_mask);
26321 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26322 cmse_nonsecure_entry_clear_before_return ();
26323 emit_jump_insn (simple_return_rtx);
26328 thumb1_expand_epilogue (void)
26330 HOST_WIDE_INT amount;
26331 arm_stack_offsets *offsets;
26334 /* Naked functions don't have prologues. */
26335 if (IS_NAKED (arm_current_func_type ()))
26338 offsets = arm_get_frame_offsets ();
26339 amount = offsets->outgoing_args - offsets->saved_regs;
26341 if (frame_pointer_needed)
26343 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26344 amount = offsets->locals_base - offsets->saved_regs;
26346 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26348 gcc_assert (amount >= 0);
26351 emit_insn (gen_blockage ());
26354 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26355 GEN_INT (amount)));
26358 /* r3 is always free in the epilogue. */
26359 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26361 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26362 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26366 /* Emit a USE (stack_pointer_rtx), so that
26367 the stack adjustment will not be deleted. */
26368 emit_insn (gen_force_register_use (stack_pointer_rtx));
26370 if (crtl->profile || !TARGET_SCHED_PROLOG)
26371 emit_insn (gen_blockage ());
26373 /* Emit a clobber for each insn that will be restored in the epilogue,
26374 so that flow2 will get register lifetimes correct. */
26375 for (regno = 0; regno < 13; regno++)
26376 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
26377 emit_clobber (gen_rtx_REG (SImode, regno));
26379 if (! df_regs_ever_live_p (LR_REGNUM))
26380 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26382 /* Clear all caller-saved regs that are not used to return. */
26383 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26384 cmse_nonsecure_entry_clear_before_return ();
26387 /* Epilogue code for APCS frame. */
26389 arm_expand_epilogue_apcs_frame (bool really_return)
26391 unsigned long func_type;
26392 unsigned long saved_regs_mask;
26395 int floats_from_frame = 0;
26396 arm_stack_offsets *offsets;
26398 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26399 func_type = arm_current_func_type ();
26401 /* Get frame offsets for ARM. */
26402 offsets = arm_get_frame_offsets ();
26403 saved_regs_mask = offsets->saved_regs_mask;
26405 /* Find the offset of the floating-point save area in the frame. */
26407 = (offsets->saved_args
26408 + arm_compute_static_chain_stack_bytes ()
26411 /* Compute how many core registers saved and how far away the floats are. */
26412 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26413 if (saved_regs_mask & (1 << i))
26416 floats_from_frame += 4;
26419 if (TARGET_HARD_FLOAT)
26422 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
26424 /* The offset is from IP_REGNUM. */
26425 int saved_size = arm_get_vfp_saved_size ();
26426 if (saved_size > 0)
26429 floats_from_frame += saved_size;
26430 insn = emit_insn (gen_addsi3 (ip_rtx,
26431 hard_frame_pointer_rtx,
26432 GEN_INT (-floats_from_frame)));
26433 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
26434 ip_rtx, hard_frame_pointer_rtx);
26437 /* Generate VFP register multi-pop. */
26438 start_reg = FIRST_VFP_REGNUM;
26440 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26441 /* Look for a case where a reg does not need restoring. */
26442 if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26443 && (!df_regs_ever_live_p (i + 1)
26444 || call_used_or_fixed_reg_p (i + 1)))
26446 if (start_reg != i)
26447 arm_emit_vfp_multi_reg_pop (start_reg,
26448 (i - start_reg) / 2,
26449 gen_rtx_REG (SImode,
26454 /* Restore the remaining regs that we have discovered (or possibly
26455 even all of them, if the conditional in the for loop never
26457 if (start_reg != i)
26458 arm_emit_vfp_multi_reg_pop (start_reg,
26459 (i - start_reg) / 2,
26460 gen_rtx_REG (SImode, IP_REGNUM));
26465 /* The frame pointer is guaranteed to be non-double-word aligned, as
26466 it is set to double-word-aligned old_stack_pointer - 4. */
26468 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26470 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26471 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26473 rtx addr = gen_frame_mem (V2SImode,
26474 plus_constant (Pmode, hard_frame_pointer_rtx,
26476 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26477 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26478 gen_rtx_REG (V2SImode, i),
26484 /* saved_regs_mask should contain IP which contains old stack pointer
26485 at the time of activation creation. Since SP and IP are adjacent registers,
26486 we can restore the value directly into SP. */
26487 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26488 saved_regs_mask &= ~(1 << IP_REGNUM);
26489 saved_regs_mask |= (1 << SP_REGNUM);
26491 /* There are two registers left in saved_regs_mask - LR and PC. We
26492 only need to restore LR (the return address), but to
26493 save time we can load it directly into PC, unless we need a
26494 special function exit sequence, or we are not really returning. */
26496 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26497 && !crtl->calls_eh_return)
26498 /* Delete LR from the register mask, so that LR on
26499 the stack is loaded into the PC in the register mask. */
26500 saved_regs_mask &= ~(1 << LR_REGNUM);
26502 saved_regs_mask &= ~(1 << PC_REGNUM);
26504 num_regs = bit_count (saved_regs_mask);
26505 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26508 emit_insn (gen_blockage ());
26509 /* Unwind the stack to just below the saved registers. */
26510 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26511 hard_frame_pointer_rtx,
26512 GEN_INT (- 4 * num_regs)));
26514 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
26515 stack_pointer_rtx, hard_frame_pointer_rtx);
26518 arm_emit_multi_reg_pop (saved_regs_mask);
26520 if (IS_INTERRUPT (func_type))
26522 /* Interrupt handlers will have pushed the
26523 IP onto the stack, so restore it now. */
26525 rtx addr = gen_rtx_MEM (SImode,
26526 gen_rtx_POST_INC (SImode,
26527 stack_pointer_rtx));
26528 set_mem_alias_set (addr, get_frame_alias_set ());
26529 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26530 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26531 gen_rtx_REG (SImode, IP_REGNUM),
26535 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26538 if (crtl->calls_eh_return)
26539 emit_insn (gen_addsi3 (stack_pointer_rtx,
26541 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26543 if (IS_STACKALIGN (func_type))
26544 /* Restore the original stack pointer. Before prologue, the stack was
26545 realigned and the original stack pointer saved in r0. For details,
26546 see comment in arm_expand_prologue. */
26547 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26549 emit_jump_insn (simple_return_rtx);
26552 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26553 function is not a sibcall. */
26555 arm_expand_epilogue (bool really_return)
26557 unsigned long func_type;
26558 unsigned long saved_regs_mask;
26562 arm_stack_offsets *offsets;
26564 func_type = arm_current_func_type ();
26566 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26567 let output_return_instruction take care of instruction emission if any. */
26568 if (IS_NAKED (func_type)
26569 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26572 emit_jump_insn (simple_return_rtx);
26576 /* If we are throwing an exception, then we really must be doing a
26577 return, so we can't tail-call. */
26578 gcc_assert (!crtl->calls_eh_return || really_return);
26580 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26582 arm_expand_epilogue_apcs_frame (really_return);
26586 /* Get frame offsets for ARM. */
26587 offsets = arm_get_frame_offsets ();
26588 saved_regs_mask = offsets->saved_regs_mask;
26589 num_regs = bit_count (saved_regs_mask);
26591 if (frame_pointer_needed)
26594 /* Restore stack pointer if necessary. */
26597 /* In ARM mode, frame pointer points to first saved register.
26598 Restore stack pointer to last saved register. */
26599 amount = offsets->frame - offsets->saved_regs;
26601 /* Force out any pending memory operations that reference stacked data
26602 before stack de-allocation occurs. */
26603 emit_insn (gen_blockage ());
26604 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26605 hard_frame_pointer_rtx,
26606 GEN_INT (amount)));
26607 arm_add_cfa_adjust_cfa_note (insn, amount,
26609 hard_frame_pointer_rtx);
26611 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26613 emit_insn (gen_force_register_use (stack_pointer_rtx));
26617 /* In Thumb-2 mode, the frame pointer points to the last saved
26619 amount = offsets->locals_base - offsets->saved_regs;
26622 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26623 hard_frame_pointer_rtx,
26624 GEN_INT (amount)));
26625 arm_add_cfa_adjust_cfa_note (insn, amount,
26626 hard_frame_pointer_rtx,
26627 hard_frame_pointer_rtx);
26630 /* Force out any pending memory operations that reference stacked data
26631 before stack de-allocation occurs. */
26632 emit_insn (gen_blockage ());
26633 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26634 hard_frame_pointer_rtx));
26635 arm_add_cfa_adjust_cfa_note (insn, 0,
26637 hard_frame_pointer_rtx);
26638 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26640 emit_insn (gen_force_register_use (stack_pointer_rtx));
26645 /* Pop off outgoing args and local frame to adjust stack pointer to
26646 last saved register. */
26647 amount = offsets->outgoing_args - offsets->saved_regs;
26651 /* Force out any pending memory operations that reference stacked data
26652 before stack de-allocation occurs. */
26653 emit_insn (gen_blockage ());
26654 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26656 GEN_INT (amount)));
26657 arm_add_cfa_adjust_cfa_note (tmp, amount,
26658 stack_pointer_rtx, stack_pointer_rtx);
26659 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26661 emit_insn (gen_force_register_use (stack_pointer_rtx));
26665 if (TARGET_HARD_FLOAT)
26667 /* Generate VFP register multi-pop. */
26668 int end_reg = LAST_VFP_REGNUM + 1;
26670 /* Scan the registers in reverse order. We need to match
26671 any groupings made in the prologue and generate matching
26672 vldm operations. The need to match groups is because,
26673 unlike pop, vldm can only do consecutive regs. */
26674 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26675 /* Look for a case where a reg does not need restoring. */
26676 if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
26677 && (!df_regs_ever_live_p (i + 1)
26678 || call_used_or_fixed_reg_p (i + 1)))
26680 /* Restore the regs discovered so far (from reg+2 to
26682 if (end_reg > i + 2)
26683 arm_emit_vfp_multi_reg_pop (i + 2,
26684 (end_reg - (i + 2)) / 2,
26685 stack_pointer_rtx);
26689 /* Restore the remaining regs that we have discovered (or possibly
26690 even all of them, if the conditional in the for loop never
26692 if (end_reg > i + 2)
26693 arm_emit_vfp_multi_reg_pop (i + 2,
26694 (end_reg - (i + 2)) / 2,
26695 stack_pointer_rtx);
26699 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26700 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
26703 rtx addr = gen_rtx_MEM (V2SImode,
26704 gen_rtx_POST_INC (SImode,
26705 stack_pointer_rtx));
26706 set_mem_alias_set (addr, get_frame_alias_set ());
26707 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26708 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26709 gen_rtx_REG (V2SImode, i),
26711 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26712 stack_pointer_rtx, stack_pointer_rtx);
26715 if (saved_regs_mask)
26718 bool return_in_pc = false;
26720 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26721 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26722 && !IS_CMSE_ENTRY (func_type)
26723 && !IS_STACKALIGN (func_type)
26725 && crtl->args.pretend_args_size == 0
26726 && saved_regs_mask & (1 << LR_REGNUM)
26727 && !crtl->calls_eh_return)
26729 saved_regs_mask &= ~(1 << LR_REGNUM);
26730 saved_regs_mask |= (1 << PC_REGNUM);
26731 return_in_pc = true;
26734 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26736 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26737 if (saved_regs_mask & (1 << i))
26739 rtx addr = gen_rtx_MEM (SImode,
26740 gen_rtx_POST_INC (SImode,
26741 stack_pointer_rtx));
26742 set_mem_alias_set (addr, get_frame_alias_set ());
26744 if (i == PC_REGNUM)
26746 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26747 XVECEXP (insn, 0, 0) = ret_rtx;
26748 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26750 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26751 insn = emit_jump_insn (insn);
26755 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26757 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26758 gen_rtx_REG (SImode, i),
26760 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26762 stack_pointer_rtx);
26769 && current_tune->prefer_ldrd_strd
26770 && !optimize_function_for_size_p (cfun))
26773 thumb2_emit_ldrd_pop (saved_regs_mask);
26774 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26775 arm_emit_ldrd_pop (saved_regs_mask);
26777 arm_emit_multi_reg_pop (saved_regs_mask);
26780 arm_emit_multi_reg_pop (saved_regs_mask);
26788 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26792 rtx dwarf = NULL_RTX;
26794 emit_insn (gen_addsi3 (stack_pointer_rtx,
26796 GEN_INT (amount)));
26798 RTX_FRAME_RELATED_P (tmp) = 1;
26800 if (cfun->machine->uses_anonymous_args)
26802 /* Restore pretend args. Refer arm_expand_prologue on how to save
26803 pretend_args in stack. */
26804 int num_regs = crtl->args.pretend_args_size / 4;
26805 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26806 for (j = 0, i = 0; j < num_regs; i++)
26807 if (saved_regs_mask & (1 << i))
26809 rtx reg = gen_rtx_REG (SImode, i);
26810 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26813 REG_NOTES (tmp) = dwarf;
26815 arm_add_cfa_adjust_cfa_note (tmp, amount,
26816 stack_pointer_rtx, stack_pointer_rtx);
26819 /* Clear all caller-saved regs that are not used to return. */
26820 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26822 /* CMSE_ENTRY always returns. */
26823 gcc_assert (really_return);
26824 cmse_nonsecure_entry_clear_before_return ();
26827 if (!really_return)
26830 if (crtl->calls_eh_return)
26831 emit_insn (gen_addsi3 (stack_pointer_rtx,
26833 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26835 if (IS_STACKALIGN (func_type))
26836 /* Restore the original stack pointer. Before prologue, the stack was
26837 realigned and the original stack pointer saved in r0. For details,
26838 see comment in arm_expand_prologue. */
26839 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26841 emit_jump_insn (simple_return_rtx);
26844 /* Implementation of insn prologue_thumb1_interwork. This is the first
26845 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26848 thumb1_output_interwork (void)
26851 FILE *f = asm_out_file;
26853 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26854 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26856 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26858 /* Generate code sequence to switch us into Thumb mode. */
26859 /* The .code 32 directive has already been emitted by
26860 ASM_DECLARE_FUNCTION_NAME. */
26861 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26862 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26864 /* Generate a label, so that the debugger will notice the
26865 change in instruction sets. This label is also used by
26866 the assembler to bypass the ARM code when this function
26867 is called from a Thumb encoded function elsewhere in the
26868 same file. Hence the definition of STUB_NAME here must
26869 agree with the definition in gas/config/tc-arm.c. */
26871 #define STUB_NAME ".real_start_of"
26873 fprintf (f, "\t.code\t16\n");
26875 if (arm_dllexport_name_p (name))
26876 name = arm_strip_name_encoding (name);
26878 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26879 fprintf (f, "\t.thumb_func\n");
26880 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26885 /* Handle the case of a double word load into a low register from
26886 a computed memory address. The computed address may involve a
26887 register which is overwritten by the load. */
26889 thumb_load_double_from_address (rtx *operands)
26897 gcc_assert (REG_P (operands[0]));
26898 gcc_assert (MEM_P (operands[1]));
26900 /* Get the memory address. */
26901 addr = XEXP (operands[1], 0);
26903 /* Work out how the memory address is computed. */
26904 switch (GET_CODE (addr))
26907 operands[2] = adjust_address (operands[1], SImode, 4);
26909 if (REGNO (operands[0]) == REGNO (addr))
26911 output_asm_insn ("ldr\t%H0, %2", operands);
26912 output_asm_insn ("ldr\t%0, %1", operands);
26916 output_asm_insn ("ldr\t%0, %1", operands);
26917 output_asm_insn ("ldr\t%H0, %2", operands);
26922 /* Compute <address> + 4 for the high order load. */
26923 operands[2] = adjust_address (operands[1], SImode, 4);
26925 output_asm_insn ("ldr\t%0, %1", operands);
26926 output_asm_insn ("ldr\t%H0, %2", operands);
26930 arg1 = XEXP (addr, 0);
26931 arg2 = XEXP (addr, 1);
26933 if (CONSTANT_P (arg1))
26934 base = arg2, offset = arg1;
26936 base = arg1, offset = arg2;
26938 gcc_assert (REG_P (base));
26940 /* Catch the case of <address> = <reg> + <reg> */
26941 if (REG_P (offset))
26943 int reg_offset = REGNO (offset);
26944 int reg_base = REGNO (base);
26945 int reg_dest = REGNO (operands[0]);
26947 /* Add the base and offset registers together into the
26948 higher destination register. */
26949 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26950 reg_dest + 1, reg_base, reg_offset);
26952 /* Load the lower destination register from the address in
26953 the higher destination register. */
26954 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26955 reg_dest, reg_dest + 1);
26957 /* Load the higher destination register from its own address
26959 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26960 reg_dest + 1, reg_dest + 1);
26964 /* Compute <address> + 4 for the high order load. */
26965 operands[2] = adjust_address (operands[1], SImode, 4);
26967 /* If the computed address is held in the low order register
26968 then load the high order register first, otherwise always
26969 load the low order register first. */
26970 if (REGNO (operands[0]) == REGNO (base))
26972 output_asm_insn ("ldr\t%H0, %2", operands);
26973 output_asm_insn ("ldr\t%0, %1", operands);
26977 output_asm_insn ("ldr\t%0, %1", operands);
26978 output_asm_insn ("ldr\t%H0, %2", operands);
26984 /* With no registers to worry about we can just load the value
26986 operands[2] = adjust_address (operands[1], SImode, 4);
26988 output_asm_insn ("ldr\t%H0, %2", operands);
26989 output_asm_insn ("ldr\t%0, %1", operands);
26993 gcc_unreachable ();
27000 thumb_output_move_mem_multiple (int n, rtx *operands)
27005 if (REGNO (operands[4]) > REGNO (operands[5]))
27006 std::swap (operands[4], operands[5]);
27008 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27009 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27013 if (REGNO (operands[4]) > REGNO (operands[5]))
27014 std::swap (operands[4], operands[5]);
27015 if (REGNO (operands[5]) > REGNO (operands[6]))
27016 std::swap (operands[5], operands[6]);
27017 if (REGNO (operands[4]) > REGNO (operands[5]))
27018 std::swap (operands[4], operands[5]);
27020 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27021 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27025 gcc_unreachable ();
27031 /* Output a call-via instruction for thumb state. */
27033 thumb_call_via_reg (rtx reg)
27035 int regno = REGNO (reg);
27038 gcc_assert (regno < LR_REGNUM);
27040 /* If we are in the normal text section we can use a single instance
27041 per compilation unit. If we are doing function sections, then we need
27042 an entry per section, since we can't rely on reachability. */
27043 if (in_section == text_section)
27045 thumb_call_reg_needed = 1;
27047 if (thumb_call_via_label[regno] == NULL)
27048 thumb_call_via_label[regno] = gen_label_rtx ();
27049 labelp = thumb_call_via_label + regno;
27053 if (cfun->machine->call_via[regno] == NULL)
27054 cfun->machine->call_via[regno] = gen_label_rtx ();
27055 labelp = cfun->machine->call_via + regno;
27058 output_asm_insn ("bl\t%a0", labelp);
27062 /* Routines for generating rtl. */
27064 thumb_expand_cpymemqi (rtx *operands)
27066 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27067 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27068 HOST_WIDE_INT len = INTVAL (operands[2]);
27069 HOST_WIDE_INT offset = 0;
27073 emit_insn (gen_cpymem12b (out, in, out, in));
27079 emit_insn (gen_cpymem8b (out, in, out, in));
27085 rtx reg = gen_reg_rtx (SImode);
27086 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27087 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27094 rtx reg = gen_reg_rtx (HImode);
27095 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27096 plus_constant (Pmode, in,
27098 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27107 rtx reg = gen_reg_rtx (QImode);
27108 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27109 plus_constant (Pmode, in,
27111 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27118 thumb_reload_out_hi (rtx *operands)
27120 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27123 /* Return the length of a function name prefix
27124 that starts with the character 'c'. */
27126 arm_get_strip_length (int c)
27130 ARM_NAME_ENCODING_LENGTHS
27135 /* Return a pointer to a function's name with any
27136 and all prefix encodings stripped from it. */
27138 arm_strip_name_encoding (const char *name)
27142 while ((skip = arm_get_strip_length (* name)))
27148 /* If there is a '*' anywhere in the name's prefix, then
27149 emit the stripped name verbatim, otherwise prepend an
27150 underscore if leading underscores are being used. */
27152 arm_asm_output_labelref (FILE *stream, const char *name)
27157 while ((skip = arm_get_strip_length (* name)))
27159 verbatim |= (*name == '*');
27164 fputs (name, stream);
27166 asm_fprintf (stream, "%U%s", name);
27169 /* This function is used to emit an EABI tag and its associated value.
27170 We emit the numerical value of the tag in case the assembler does not
27171 support textual tags. (Eg gas prior to 2.20). If requested we include
27172 the tag name in a comment so that anyone reading the assembler output
27173 will know which tag is being set.
27175 This function is not static because arm-c.c needs it too. */
27178 arm_emit_eabi_attribute (const char *name, int num, int val)
27180 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27181 if (flag_verbose_asm || flag_debug_asm)
27182 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27183 asm_fprintf (asm_out_file, "\n");
27186 /* This function is used to print CPU tuning information as comment
27187 in assembler file. Pointers are not printed for now. */
27190 arm_print_tune_info (void)
27192 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
27193 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
27194 current_tune->constant_limit);
27195 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27196 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
27197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27198 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
27199 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27200 "prefetch.l1_cache_size:\t%d\n",
27201 current_tune->prefetch.l1_cache_size);
27202 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27203 "prefetch.l1_cache_line_size:\t%d\n",
27204 current_tune->prefetch.l1_cache_line_size);
27205 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27206 "prefer_constant_pool:\t%d\n",
27207 (int) current_tune->prefer_constant_pool);
27208 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27209 "branch_cost:\t(s:speed, p:predictable)\n");
27210 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
27211 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
27212 current_tune->branch_cost (false, false));
27213 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
27214 current_tune->branch_cost (false, true));
27215 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
27216 current_tune->branch_cost (true, false));
27217 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
27218 current_tune->branch_cost (true, true));
27219 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27220 "prefer_ldrd_strd:\t%d\n",
27221 (int) current_tune->prefer_ldrd_strd);
27222 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27223 "logical_op_non_short_circuit:\t[%d,%d]\n",
27224 (int) current_tune->logical_op_non_short_circuit_thumb,
27225 (int) current_tune->logical_op_non_short_circuit_arm);
27226 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27227 "disparage_flag_setting_t16_encodings:\t%d\n",
27228 (int) current_tune->disparage_flag_setting_t16_encodings);
27229 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27230 "string_ops_prefer_neon:\t%d\n",
27231 (int) current_tune->string_ops_prefer_neon);
27232 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
27233 "max_insns_inline_memset:\t%d\n",
27234 current_tune->max_insns_inline_memset);
27235 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
27236 current_tune->fusible_ops);
27237 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
27238 (int) current_tune->sched_autopref);
27241 /* Print .arch and .arch_extension directives corresponding to the
27242 current architecture configuration. */
27244 arm_print_asm_arch_directives ()
27246 const arch_option *arch
27247 = arm_parse_arch_option_name (all_architectures, "-march",
27248 arm_active_target.arch_name);
27249 auto_sbitmap opt_bits (isa_num_bits);
27253 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
27254 arm_last_printed_arch_string = arm_active_target.arch_name;
27255 if (!arch->common.extensions)
27258 for (const struct cpu_arch_extension *opt = arch->common.extensions;
27264 arm_initialize_isa (opt_bits, opt->isa_bits);
27266 /* If every feature bit of this option is set in the target
27267 ISA specification, print out the option name. However,
27268 don't print anything if all the bits are part of the
27269 FPU specification. */
27270 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
27271 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
27272 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
27278 arm_file_start (void)
27284 /* We don't have a specified CPU. Use the architecture to
27287 Note: it might be better to do this unconditionally, then the
27288 assembler would not need to know about all new CPU names as
27290 if (!arm_active_target.core_name)
27292 /* armv7ve doesn't support any extensions. */
27293 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
27295 /* Keep backward compatability for assemblers
27296 which don't support armv7ve. */
27297 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
27298 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
27299 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
27300 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
27301 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
27302 arm_last_printed_arch_string = "armv7ve";
27305 arm_print_asm_arch_directives ();
27307 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
27309 asm_fprintf (asm_out_file, "\t.arch %s\n",
27310 arm_active_target.core_name + 8);
27311 arm_last_printed_arch_string = arm_active_target.core_name + 8;
27315 const char* truncated_name
27316 = arm_rewrite_selected_cpu (arm_active_target.core_name);
27317 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27320 if (print_tune_info)
27321 arm_print_tune_info ();
27323 if (! TARGET_SOFT_FLOAT)
27325 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
27326 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
27328 if (TARGET_HARD_FLOAT_ABI)
27329 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27332 /* Some of these attributes only apply when the corresponding features
27333 are used. However we don't have any easy way of figuring this out.
27334 Conservatively record the setting that would have been used. */
27336 if (flag_rounding_math)
27337 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27339 if (!flag_unsafe_math_optimizations)
27341 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27342 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27344 if (flag_signaling_nans)
27345 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27347 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27348 flag_finite_math_only ? 1 : 3);
27350 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27351 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27352 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27353 flag_short_enums ? 1 : 2);
27355 /* Tag_ABI_optimization_goals. */
27358 else if (optimize >= 2)
27364 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27366 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27369 if (arm_fp16_format)
27370 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27371 (int) arm_fp16_format);
27373 if (arm_lang_output_object_attributes_hook)
27374 arm_lang_output_object_attributes_hook();
27377 default_file_start ();
27381 arm_file_end (void)
27385 if (NEED_INDICATE_EXEC_STACK)
27386 /* Add .note.GNU-stack. */
27387 file_end_indicate_exec_stack ();
27389 if (! thumb_call_reg_needed)
27392 switch_to_section (text_section);
27393 asm_fprintf (asm_out_file, "\t.code 16\n");
27394 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27396 for (regno = 0; regno < LR_REGNUM; regno++)
27398 rtx label = thumb_call_via_label[regno];
27402 targetm.asm_out.internal_label (asm_out_file, "L",
27403 CODE_LABEL_NUMBER (label));
27404 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27410 /* Symbols in the text segment can be accessed without indirecting via the
27411 constant pool; it may take an extra binary operation, but this is still
27412 faster than indirecting via memory. Don't do this when not optimizing,
27413 since we won't be calculating al of the offsets necessary to do this
27417 arm_encode_section_info (tree decl, rtx rtl, int first)
27419 if (optimize > 0 && TREE_CONSTANT (decl))
27420 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27422 default_encode_section_info (decl, rtl, first);
27424 #endif /* !ARM_PE */
27427 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27429 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27430 && !strcmp (prefix, "L"))
27432 arm_ccfsm_state = 0;
27433 arm_target_insn = NULL;
27435 default_internal_label (stream, prefix, labelno);
27438 /* Output code to add DELTA to the first argument, and then jump
27439 to FUNCTION. Used for C++ multiple inheritance. */
27442 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27443 HOST_WIDE_INT, tree function)
27445 static int thunk_label = 0;
27448 int mi_delta = delta;
27449 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27451 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27454 mi_delta = - mi_delta;
27456 final_start_function (emit_barrier (), file, 1);
27460 int labelno = thunk_label++;
27461 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27462 /* Thunks are entered in arm mode when available. */
27463 if (TARGET_THUMB1_ONLY)
27465 /* push r3 so we can use it as a temporary. */
27466 /* TODO: Omit this save if r3 is not used. */
27467 fputs ("\tpush {r3}\n", file);
27468 fputs ("\tldr\tr3, ", file);
27472 fputs ("\tldr\tr12, ", file);
27474 assemble_name (file, label);
27475 fputc ('\n', file);
27478 /* If we are generating PIC, the ldr instruction below loads
27479 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27480 the address of the add + 8, so we have:
27482 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27485 Note that we have "+ 1" because some versions of GNU ld
27486 don't set the low bit of the result for R_ARM_REL32
27487 relocations against thumb function symbols.
27488 On ARMv6M this is +4, not +8. */
27489 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27490 assemble_name (file, labelpc);
27491 fputs (":\n", file);
27492 if (TARGET_THUMB1_ONLY)
27494 /* This is 2 insns after the start of the thunk, so we know it
27495 is 4-byte aligned. */
27496 fputs ("\tadd\tr3, pc, r3\n", file);
27497 fputs ("\tmov r12, r3\n", file);
27500 fputs ("\tadd\tr12, pc, r12\n", file);
27502 else if (TARGET_THUMB1_ONLY)
27503 fputs ("\tmov r12, r3\n", file);
27505 if (TARGET_THUMB1_ONLY)
27507 if (mi_delta > 255)
27509 fputs ("\tldr\tr3, ", file);
27510 assemble_name (file, label);
27511 fputs ("+4\n", file);
27512 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
27513 mi_op, this_regno, this_regno);
27515 else if (mi_delta != 0)
27517 /* Thumb1 unified syntax requires s suffix in instruction name when
27518 one of the operands is immediate. */
27519 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
27520 mi_op, this_regno, this_regno,
27526 /* TODO: Use movw/movt for large constants when available. */
27527 while (mi_delta != 0)
27529 if ((mi_delta & (3 << shift)) == 0)
27533 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27534 mi_op, this_regno, this_regno,
27535 mi_delta & (0xff << shift));
27536 mi_delta &= ~(0xff << shift);
27543 if (TARGET_THUMB1_ONLY)
27544 fputs ("\tpop\t{r3}\n", file);
27546 fprintf (file, "\tbx\tr12\n");
27547 ASM_OUTPUT_ALIGN (file, 2);
27548 assemble_name (file, label);
27549 fputs (":\n", file);
27552 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
27553 rtx tem = XEXP (DECL_RTL (function), 0);
27554 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
27555 pipeline offset is four rather than eight. Adjust the offset
27557 tem = plus_constant (GET_MODE (tem), tem,
27558 TARGET_THUMB1_ONLY ? -3 : -7);
27559 tem = gen_rtx_MINUS (GET_MODE (tem),
27561 gen_rtx_SYMBOL_REF (Pmode,
27562 ggc_strdup (labelpc)));
27563 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27566 /* Output ".word .LTHUNKn". */
27567 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27569 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27570 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27574 fputs ("\tb\t", file);
27575 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27576 if (NEED_PLT_RELOC)
27577 fputs ("(PLT)", file);
27578 fputc ('\n', file);
27581 final_end_function ();
27584 /* MI thunk handling for TARGET_32BIT. */
27587 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
27588 HOST_WIDE_INT vcall_offset, tree function)
27590 const bool long_call_p = arm_is_long_call_p (function);
27592 /* On ARM, this_regno is R0 or R1 depending on
27593 whether the function returns an aggregate or not.
27595 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
27597 ? R1_REGNUM : R0_REGNUM);
27599 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
27600 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
27601 reload_completed = 1;
27602 emit_note (NOTE_INSN_PROLOGUE_END);
27604 /* Add DELTA to THIS_RTX. */
27606 arm_split_constant (PLUS, Pmode, NULL_RTX,
27607 delta, this_rtx, this_rtx, false);
27609 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
27610 if (vcall_offset != 0)
27612 /* Load *THIS_RTX. */
27613 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
27614 /* Compute *THIS_RTX + VCALL_OFFSET. */
27615 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
27617 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
27618 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
27619 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
27622 /* Generate a tail call to the target function. */
27623 if (!TREE_USED (function))
27625 assemble_external (function);
27626 TREE_USED (function) = 1;
27628 rtx funexp = XEXP (DECL_RTL (function), 0);
27631 emit_move_insn (temp, funexp);
27634 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27635 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
27636 SIBLING_CALL_P (insn) = 1;
27639 /* Indirect calls require a bit of fixup in PIC mode. */
27642 split_all_insns_noflow ();
27646 insn = get_insns ();
27647 shorten_branches (insn);
27648 final_start_function (insn, file, 1);
27649 final (insn, file, 1);
27650 final_end_function ();
27652 /* Stop pretending this is a post-reload pass. */
27653 reload_completed = 0;
27656 /* Output code to add DELTA to the first argument, and then jump
27657 to FUNCTION. Used for C++ multiple inheritance. */
27660 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
27661 HOST_WIDE_INT vcall_offset, tree function)
27663 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
27665 assemble_start_function (thunk, fnname);
27667 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
27669 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
27670 assemble_end_function (thunk, fnname);
27674 arm_emit_vector_const (FILE *file, rtx x)
27677 const char * pattern;
27679 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27681 switch (GET_MODE (x))
27683 case E_V2SImode: pattern = "%08x"; break;
27684 case E_V4HImode: pattern = "%04x"; break;
27685 case E_V8QImode: pattern = "%02x"; break;
27686 default: gcc_unreachable ();
27689 fprintf (file, "0x");
27690 for (i = CONST_VECTOR_NUNITS (x); i--;)
27694 element = CONST_VECTOR_ELT (x, i);
27695 fprintf (file, pattern, INTVAL (element));
27701 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27702 HFmode constant pool entries are actually loaded with ldr. */
27704 arm_emit_fp16_const (rtx c)
27708 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27709 if (WORDS_BIG_ENDIAN)
27710 assemble_zeros (2);
27711 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27712 if (!WORDS_BIG_ENDIAN)
27713 assemble_zeros (2);
27717 arm_output_load_gr (rtx *operands)
27724 if (!MEM_P (operands [1])
27725 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27726 || !REG_P (reg = XEXP (sum, 0))
27727 || !CONST_INT_P (offset = XEXP (sum, 1))
27728 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27729 return "wldrw%?\t%0, %1";
27731 /* Fix up an out-of-range load of a GR register. */
27732 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27733 wcgr = operands[0];
27735 output_asm_insn ("ldr%?\t%0, %1", operands);
27737 operands[0] = wcgr;
27739 output_asm_insn ("tmcr%?\t%0, %1", operands);
27740 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27745 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27747 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27748 named arg and all anonymous args onto the stack.
27749 XXX I know the prologue shouldn't be pushing registers, but it is faster
27753 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27754 const function_arg_info &arg,
27756 int second_time ATTRIBUTE_UNUSED)
27758 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27761 cfun->machine->uses_anonymous_args = 1;
27762 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27764 nregs = pcum->aapcs_ncrn;
27767 int res = arm_needs_doubleword_align (arg.mode, arg.type);
27768 if (res < 0 && warn_psabi)
27769 inform (input_location, "parameter passing for argument of "
27770 "type %qT changed in GCC 7.1", arg.type);
27774 if (res > 1 && warn_psabi)
27775 inform (input_location,
27776 "parameter passing for argument of type "
27777 "%qT changed in GCC 9.1", arg.type);
27782 nregs = pcum->nregs;
27784 if (nregs < NUM_ARG_REGS)
27785 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27788 /* We can't rely on the caller doing the proper promotion when
27789 using APCS or ATPCS. */
27792 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27794 return !TARGET_AAPCS_BASED;
27797 static machine_mode
27798 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27800 int *punsignedp ATTRIBUTE_UNUSED,
27801 const_tree fntype ATTRIBUTE_UNUSED,
27802 int for_return ATTRIBUTE_UNUSED)
27804 if (GET_MODE_CLASS (mode) == MODE_INT
27805 && GET_MODE_SIZE (mode) < 4)
27813 arm_default_short_enums (void)
27815 return ARM_DEFAULT_SHORT_ENUMS;
27819 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27822 arm_align_anon_bitfield (void)
27824 return TARGET_AAPCS_BASED;
27828 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27831 arm_cxx_guard_type (void)
27833 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27837 /* The EABI says test the least significant bit of a guard variable. */
27840 arm_cxx_guard_mask_bit (void)
27842 return TARGET_AAPCS_BASED;
27846 /* The EABI specifies that all array cookies are 8 bytes long. */
27849 arm_get_cookie_size (tree type)
27853 if (!TARGET_AAPCS_BASED)
27854 return default_cxx_get_cookie_size (type);
27856 size = build_int_cst (sizetype, 8);
27861 /* The EABI says that array cookies should also contain the element size. */
27864 arm_cookie_has_size (void)
27866 return TARGET_AAPCS_BASED;
27870 /* The EABI says constructors and destructors should return a pointer to
27871 the object constructed/destroyed. */
27874 arm_cxx_cdtor_returns_this (void)
27876 return TARGET_AAPCS_BASED;
27879 /* The EABI says that an inline function may never be the key
27883 arm_cxx_key_method_may_be_inline (void)
27885 return !TARGET_AAPCS_BASED;
27889 arm_cxx_determine_class_data_visibility (tree decl)
27891 if (!TARGET_AAPCS_BASED
27892 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27895 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27896 is exported. However, on systems without dynamic vague linkage,
27897 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27898 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27899 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27901 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27902 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27906 arm_cxx_class_data_always_comdat (void)
27908 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27909 vague linkage if the class has no key function. */
27910 return !TARGET_AAPCS_BASED;
27914 /* The EABI says __aeabi_atexit should be used to register static
27918 arm_cxx_use_aeabi_atexit (void)
27920 return TARGET_AAPCS_BASED;
27925 arm_set_return_address (rtx source, rtx scratch)
27927 arm_stack_offsets *offsets;
27928 HOST_WIDE_INT delta;
27930 unsigned long saved_regs;
27932 offsets = arm_get_frame_offsets ();
27933 saved_regs = offsets->saved_regs_mask;
27935 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27936 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27939 if (frame_pointer_needed)
27940 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27943 /* LR will be the first saved register. */
27944 delta = offsets->outgoing_args - (offsets->frame + 4);
27949 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27950 GEN_INT (delta & ~4095)));
27955 addr = stack_pointer_rtx;
27957 addr = plus_constant (Pmode, addr, delta);
27960 /* The store needs to be marked to prevent DSE from deleting
27961 it as dead if it is based on fp. */
27962 mem = gen_frame_mem (Pmode, addr);
27963 MEM_VOLATILE_P (mem) = true;
27964 emit_move_insn (mem, source);
27970 thumb_set_return_address (rtx source, rtx scratch)
27972 arm_stack_offsets *offsets;
27973 HOST_WIDE_INT delta;
27974 HOST_WIDE_INT limit;
27977 unsigned long mask;
27981 offsets = arm_get_frame_offsets ();
27982 mask = offsets->saved_regs_mask;
27983 if (mask & (1 << LR_REGNUM))
27986 /* Find the saved regs. */
27987 if (frame_pointer_needed)
27989 delta = offsets->soft_frame - offsets->saved_args;
27990 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27996 delta = offsets->outgoing_args - offsets->saved_args;
27999 /* Allow for the stack frame. */
28000 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28002 /* The link register is always the first saved register. */
28005 /* Construct the address. */
28006 addr = gen_rtx_REG (SImode, reg);
28009 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28010 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28014 addr = plus_constant (Pmode, addr, delta);
28016 /* The store needs to be marked to prevent DSE from deleting
28017 it as dead if it is based on fp. */
28018 mem = gen_frame_mem (Pmode, addr);
28019 MEM_VOLATILE_P (mem) = true;
28020 emit_move_insn (mem, source);
28023 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28026 /* Implements target hook vector_mode_supported_p. */
28028 arm_vector_mode_supported_p (machine_mode mode)
28030 /* Neon also supports V2SImode, etc. listed in the clause below. */
28031 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28032 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
28033 || mode == V2DImode || mode == V8HFmode))
28036 if ((TARGET_NEON || TARGET_IWMMXT)
28037 && ((mode == V2SImode)
28038 || (mode == V4HImode)
28039 || (mode == V8QImode)))
28042 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28043 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28044 || mode == V2HAmode))
28050 /* Implements target hook array_mode_supported_p. */
28053 arm_array_mode_supported_p (machine_mode mode,
28054 unsigned HOST_WIDE_INT nelems)
28056 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28057 for now, as the lane-swapping logic needs to be extended in the expanders.
28058 See PR target/82518. */
28059 if (TARGET_NEON && !BYTES_BIG_ENDIAN
28060 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28061 && (nelems >= 2 && nelems <= 4))
28067 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28068 registers when autovectorizing for Neon, at least until multiple vector
28069 widths are supported properly by the middle-end. */
28071 static machine_mode
28072 arm_preferred_simd_mode (scalar_mode mode)
28078 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28080 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28082 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28084 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28086 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28093 if (TARGET_REALLY_IWMMXT)
28109 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28111 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28112 using r0-r4 for function arguments, r7 for the stack frame and don't have
28113 enough left over to do doubleword arithmetic. For Thumb-2 all the
28114 potentially problematic instructions accept high registers so this is not
28115 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28116 that require many low registers. */
28118 arm_class_likely_spilled_p (reg_class_t rclass)
28120 if ((TARGET_THUMB1 && rclass == LO_REGS)
28121 || rclass == CC_REG)
28127 /* Implements target hook small_register_classes_for_mode_p. */
28129 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
28131 return TARGET_THUMB1;
28134 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28135 ARM insns and therefore guarantee that the shift count is modulo 256.
28136 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28137 guarantee no particular behavior for out-of-range counts. */
28139 static unsigned HOST_WIDE_INT
28140 arm_shift_truncation_mask (machine_mode mode)
28142 return mode == SImode ? 255 : 0;
28146 /* Map internal gcc register numbers to DWARF2 register numbers. */
28149 arm_dbx_register_number (unsigned int regno)
28154 if (IS_VFP_REGNUM (regno))
28156 /* See comment in arm_dwarf_register_span. */
28157 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28158 return 64 + regno - FIRST_VFP_REGNUM;
28160 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28163 if (IS_IWMMXT_GR_REGNUM (regno))
28164 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28166 if (IS_IWMMXT_REGNUM (regno))
28167 return 112 + regno - FIRST_IWMMXT_REGNUM;
28169 return DWARF_FRAME_REGISTERS;
28172 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28173 GCC models tham as 64 32-bit registers, so we need to describe this to
28174 the DWARF generation code. Other registers can use the default. */
28176 arm_dwarf_register_span (rtx rtl)
28184 regno = REGNO (rtl);
28185 if (!IS_VFP_REGNUM (regno))
28188 /* XXX FIXME: The EABI defines two VFP register ranges:
28189 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28191 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28192 corresponding D register. Until GDB supports this, we shall use the
28193 legacy encodings. We also use these encodings for D0-D15 for
28194 compatibility with older debuggers. */
28195 mode = GET_MODE (rtl);
28196 if (GET_MODE_SIZE (mode) < 8)
28199 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28201 nregs = GET_MODE_SIZE (mode) / 4;
28202 for (i = 0; i < nregs; i += 2)
28203 if (TARGET_BIG_END)
28205 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28206 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28210 parts[i] = gen_rtx_REG (SImode, regno + i);
28211 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28216 nregs = GET_MODE_SIZE (mode) / 8;
28217 for (i = 0; i < nregs; i++)
28218 parts[i] = gen_rtx_REG (DImode, regno + i);
28221 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28224 #if ARM_UNWIND_INFO
28225 /* Emit unwind directives for a store-multiple instruction or stack pointer
28226 push during alignment.
28227 These should only ever be generated by the function prologue code, so
28228 expect them to have a particular form.
28229 The store-multiple instruction sometimes pushes pc as the last register,
28230 although it should not be tracked into unwind information, or for -Os
28231 sometimes pushes some dummy registers before first register that needs
28232 to be tracked in unwind information; such dummy registers are there just
28233 to avoid separate stack adjustment, and will not be restored in the
28237 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28240 HOST_WIDE_INT offset;
28241 HOST_WIDE_INT nregs;
28245 unsigned padfirst = 0, padlast = 0;
28248 e = XVECEXP (p, 0, 0);
28249 gcc_assert (GET_CODE (e) == SET);
28251 /* First insn will adjust the stack pointer. */
28252 gcc_assert (GET_CODE (e) == SET
28253 && REG_P (SET_DEST (e))
28254 && REGNO (SET_DEST (e)) == SP_REGNUM
28255 && GET_CODE (SET_SRC (e)) == PLUS);
28257 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28258 nregs = XVECLEN (p, 0) - 1;
28259 gcc_assert (nregs);
28261 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28264 /* For -Os dummy registers can be pushed at the beginning to
28265 avoid separate stack pointer adjustment. */
28266 e = XVECEXP (p, 0, 1);
28267 e = XEXP (SET_DEST (e), 0);
28268 if (GET_CODE (e) == PLUS)
28269 padfirst = INTVAL (XEXP (e, 1));
28270 gcc_assert (padfirst == 0 || optimize_size);
28271 /* The function prologue may also push pc, but not annotate it as it is
28272 never restored. We turn this into a stack pointer adjustment. */
28273 e = XVECEXP (p, 0, nregs);
28274 e = XEXP (SET_DEST (e), 0);
28275 if (GET_CODE (e) == PLUS)
28276 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28278 padlast = offset - 4;
28279 gcc_assert (padlast == 0 || padlast == 4);
28281 fprintf (asm_out_file, "\t.pad #4\n");
28283 fprintf (asm_out_file, "\t.save {");
28285 else if (IS_VFP_REGNUM (reg))
28288 fprintf (asm_out_file, "\t.vsave {");
28291 /* Unknown register type. */
28292 gcc_unreachable ();
28294 /* If the stack increment doesn't match the size of the saved registers,
28295 something has gone horribly wrong. */
28296 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28300 /* The remaining insns will describe the stores. */
28301 for (i = 1; i <= nregs; i++)
28303 /* Expect (set (mem <addr>) (reg)).
28304 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28305 e = XVECEXP (p, 0, i);
28306 gcc_assert (GET_CODE (e) == SET
28307 && MEM_P (SET_DEST (e))
28308 && REG_P (SET_SRC (e)));
28310 reg = REGNO (SET_SRC (e));
28311 gcc_assert (reg >= lastreg);
28314 fprintf (asm_out_file, ", ");
28315 /* We can't use %r for vfp because we need to use the
28316 double precision register names. */
28317 if (IS_VFP_REGNUM (reg))
28318 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28320 asm_fprintf (asm_out_file, "%r", reg);
28324 /* Check that the addresses are consecutive. */
28325 e = XEXP (SET_DEST (e), 0);
28326 if (GET_CODE (e) == PLUS)
28327 gcc_assert (REG_P (XEXP (e, 0))
28328 && REGNO (XEXP (e, 0)) == SP_REGNUM
28329 && CONST_INT_P (XEXP (e, 1))
28330 && offset == INTVAL (XEXP (e, 1)));
28334 && REGNO (e) == SP_REGNUM);
28335 offset += reg_size;
28338 fprintf (asm_out_file, "}\n");
28340 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28343 /* Emit unwind directives for a SET. */
28346 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28354 switch (GET_CODE (e0))
28357 /* Pushing a single register. */
28358 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28359 || !REG_P (XEXP (XEXP (e0, 0), 0))
28360 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28363 asm_fprintf (asm_out_file, "\t.save ");
28364 if (IS_VFP_REGNUM (REGNO (e1)))
28365 asm_fprintf(asm_out_file, "{d%d}\n",
28366 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28368 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28372 if (REGNO (e0) == SP_REGNUM)
28374 /* A stack increment. */
28375 if (GET_CODE (e1) != PLUS
28376 || !REG_P (XEXP (e1, 0))
28377 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28378 || !CONST_INT_P (XEXP (e1, 1)))
28381 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28382 -INTVAL (XEXP (e1, 1)));
28384 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28386 HOST_WIDE_INT offset;
28388 if (GET_CODE (e1) == PLUS)
28390 if (!REG_P (XEXP (e1, 0))
28391 || !CONST_INT_P (XEXP (e1, 1)))
28393 reg = REGNO (XEXP (e1, 0));
28394 offset = INTVAL (XEXP (e1, 1));
28395 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28396 HARD_FRAME_POINTER_REGNUM, reg,
28399 else if (REG_P (e1))
28402 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28403 HARD_FRAME_POINTER_REGNUM, reg);
28408 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28410 /* Move from sp to reg. */
28411 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28413 else if (GET_CODE (e1) == PLUS
28414 && REG_P (XEXP (e1, 0))
28415 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28416 && CONST_INT_P (XEXP (e1, 1)))
28418 /* Set reg to offset from sp. */
28419 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28420 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28432 /* Emit unwind directives for the given insn. */
28435 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
28438 bool handled_one = false;
28440 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28443 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28444 && (TREE_NOTHROW (current_function_decl)
28445 || crtl->all_throwers_are_sibcalls))
28448 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28451 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28453 switch (REG_NOTE_KIND (note))
28455 case REG_FRAME_RELATED_EXPR:
28456 pat = XEXP (note, 0);
28459 case REG_CFA_REGISTER:
28460 pat = XEXP (note, 0);
28463 pat = PATTERN (insn);
28464 if (GET_CODE (pat) == PARALLEL)
28465 pat = XVECEXP (pat, 0, 0);
28468 /* Only emitted for IS_STACKALIGN re-alignment. */
28473 src = SET_SRC (pat);
28474 dest = SET_DEST (pat);
28476 gcc_assert (src == stack_pointer_rtx);
28477 reg = REGNO (dest);
28478 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28481 handled_one = true;
28484 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28485 to get correct dwarf information for shrink-wrap. We should not
28486 emit unwind information for it because these are used either for
28487 pretend arguments or notes to adjust sp and restore registers from
28489 case REG_CFA_DEF_CFA:
28490 case REG_CFA_ADJUST_CFA:
28491 case REG_CFA_RESTORE:
28494 case REG_CFA_EXPRESSION:
28495 case REG_CFA_OFFSET:
28496 /* ??? Only handling here what we actually emit. */
28497 gcc_unreachable ();
28505 pat = PATTERN (insn);
28508 switch (GET_CODE (pat))
28511 arm_unwind_emit_set (asm_out_file, pat);
28515 /* Store multiple. */
28516 arm_unwind_emit_sequence (asm_out_file, pat);
28525 /* Output a reference from a function exception table to the type_info
28526 object X. The EABI specifies that the symbol should be relocated by
28527 an R_ARM_TARGET2 relocation. */
28530 arm_output_ttype (rtx x)
28532 fputs ("\t.word\t", asm_out_file);
28533 output_addr_const (asm_out_file, x);
28534 /* Use special relocations for symbol references. */
28535 if (!CONST_INT_P (x))
28536 fputs ("(TARGET2)", asm_out_file);
28537 fputc ('\n', asm_out_file);
28542 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28545 arm_asm_emit_except_personality (rtx personality)
28547 fputs ("\t.personality\t", asm_out_file);
28548 output_addr_const (asm_out_file, personality);
28549 fputc ('\n', asm_out_file);
28551 #endif /* ARM_UNWIND_INFO */
28553 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28556 arm_asm_init_sections (void)
28558 #if ARM_UNWIND_INFO
28559 exception_section = get_unnamed_section (0, output_section_asm_op,
28561 #endif /* ARM_UNWIND_INFO */
28563 #ifdef OBJECT_FORMAT_ELF
28564 if (target_pure_code)
28565 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
28569 /* Output unwind directives for the start/end of a function. */
28572 arm_output_fn_unwind (FILE * f, bool prologue)
28574 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28578 fputs ("\t.fnstart\n", f);
28581 /* If this function will never be unwound, then mark it as such.
28582 The came condition is used in arm_unwind_emit to suppress
28583 the frame annotations. */
28584 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28585 && (TREE_NOTHROW (current_function_decl)
28586 || crtl->all_throwers_are_sibcalls))
28587 fputs("\t.cantunwind\n", f);
28589 fputs ("\t.fnend\n", f);
28594 arm_emit_tls_decoration (FILE *fp, rtx x)
28596 enum tls_reloc reloc;
28599 val = XVECEXP (x, 0, 0);
28600 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28602 output_addr_const (fp, val);
28607 fputs ("(tlsgd)", fp);
28609 case TLS_GD32_FDPIC:
28610 fputs ("(tlsgd_fdpic)", fp);
28613 fputs ("(tlsldm)", fp);
28615 case TLS_LDM32_FDPIC:
28616 fputs ("(tlsldm_fdpic)", fp);
28619 fputs ("(tlsldo)", fp);
28622 fputs ("(gottpoff)", fp);
28624 case TLS_IE32_FDPIC:
28625 fputs ("(gottpoff_fdpic)", fp);
28628 fputs ("(tpoff)", fp);
28631 fputs ("(tlsdesc)", fp);
28634 gcc_unreachable ();
28643 fputs (" + (. - ", fp);
28644 output_addr_const (fp, XVECEXP (x, 0, 2));
28645 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28646 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28647 output_addr_const (fp, XVECEXP (x, 0, 3));
28657 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28660 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28662 gcc_assert (size == 4);
28663 fputs ("\t.word\t", file);
28664 output_addr_const (file, x);
28665 fputs ("(tlsldo)", file);
28668 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28671 arm_output_addr_const_extra (FILE *fp, rtx x)
28673 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28674 return arm_emit_tls_decoration (fp, x);
28675 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28678 int labelno = INTVAL (XVECEXP (x, 0, 0));
28680 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28681 assemble_name_raw (fp, label);
28685 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28687 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28691 output_addr_const (fp, XVECEXP (x, 0, 0));
28695 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28697 output_addr_const (fp, XVECEXP (x, 0, 0));
28701 output_addr_const (fp, XVECEXP (x, 0, 1));
28705 else if (GET_CODE (x) == CONST_VECTOR)
28706 return arm_emit_vector_const (fp, x);
28711 /* Output assembly for a shift instruction.
28712 SET_FLAGS determines how the instruction modifies the condition codes.
28713 0 - Do not set condition codes.
28714 1 - Set condition codes.
28715 2 - Use smallest instruction. */
28717 arm_output_shift(rtx * operands, int set_flags)
28720 static const char flag_chars[3] = {'?', '.', '!'};
28725 c = flag_chars[set_flags];
28726 shift = shift_op(operands[3], &val);
28730 operands[2] = GEN_INT(val);
28731 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28734 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28736 output_asm_insn (pattern, operands);
28740 /* Output assembly for a WMMX immediate shift instruction. */
28742 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28744 int shift = INTVAL (operands[2]);
28746 machine_mode opmode = GET_MODE (operands[0]);
28748 gcc_assert (shift >= 0);
28750 /* If the shift value in the register versions is > 63 (for D qualifier),
28751 31 (for W qualifier) or 15 (for H qualifier). */
28752 if (((opmode == V4HImode) && (shift > 15))
28753 || ((opmode == V2SImode) && (shift > 31))
28754 || ((opmode == DImode) && (shift > 63)))
28758 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28759 output_asm_insn (templ, operands);
28760 if (opmode == DImode)
28762 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28763 output_asm_insn (templ, operands);
28768 /* The destination register will contain all zeros. */
28769 sprintf (templ, "wzero\t%%0");
28770 output_asm_insn (templ, operands);
28775 if ((opmode == DImode) && (shift > 32))
28777 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28778 output_asm_insn (templ, operands);
28779 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28780 output_asm_insn (templ, operands);
28784 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28785 output_asm_insn (templ, operands);
28790 /* Output assembly for a WMMX tinsr instruction. */
28792 arm_output_iwmmxt_tinsr (rtx *operands)
28794 int mask = INTVAL (operands[3]);
28797 int units = mode_nunits[GET_MODE (operands[0])];
28798 gcc_assert ((mask & (mask - 1)) == 0);
28799 for (i = 0; i < units; ++i)
28801 if ((mask & 0x01) == 1)
28807 gcc_assert (i < units);
28809 switch (GET_MODE (operands[0]))
28812 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28815 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28818 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28821 gcc_unreachable ();
28824 output_asm_insn (templ, operands);
28829 /* Output a Thumb-1 casesi dispatch sequence. */
28831 thumb1_output_casesi (rtx *operands)
28833 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28835 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28837 switch (GET_MODE(diff_vec))
28840 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28841 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28843 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28844 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28846 return "bl\t%___gnu_thumb1_case_si";
28848 gcc_unreachable ();
28852 /* Output a Thumb-2 casesi instruction. */
28854 thumb2_output_casesi (rtx *operands)
28856 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28858 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28860 output_asm_insn ("cmp\t%0, %1", operands);
28861 output_asm_insn ("bhi\t%l3", operands);
28862 switch (GET_MODE(diff_vec))
28865 return "tbb\t[%|pc, %0]";
28867 return "tbh\t[%|pc, %0, lsl #1]";
28871 output_asm_insn ("adr\t%4, %l2", operands);
28872 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28873 output_asm_insn ("add\t%4, %4, %5", operands);
28878 output_asm_insn ("adr\t%4, %l2", operands);
28879 return "ldr\t%|pc, [%4, %0, lsl #2]";
28882 gcc_unreachable ();
28886 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28887 per-core tuning structs. */
28889 arm_issue_rate (void)
28891 return current_tune->issue_rate;
28894 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
28896 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
28898 if (DEBUG_INSN_P (insn))
28901 rtx_code code = GET_CODE (PATTERN (insn));
28902 if (code == USE || code == CLOBBER)
28905 if (get_attr_type (insn) == TYPE_NO_INSN)
28911 /* Return how many instructions should scheduler lookahead to choose the
28914 arm_first_cycle_multipass_dfa_lookahead (void)
28916 int issue_rate = arm_issue_rate ();
28918 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28921 /* Enable modeling of L2 auto-prefetcher. */
28923 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28925 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28929 arm_mangle_type (const_tree type)
28931 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28932 has to be managled as if it is in the "std" namespace. */
28933 if (TARGET_AAPCS_BASED
28934 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28935 return "St9__va_list";
28937 /* Half-precision float. */
28938 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28941 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28943 if (TYPE_NAME (type) != NULL)
28944 return arm_mangle_builtin_type (type);
28946 /* Use the default mangling. */
28950 /* Order of allocation of core registers for Thumb: this allocation is
28951 written over the corresponding initial entries of the array
28952 initialized with REG_ALLOC_ORDER. We allocate all low registers
28953 first. Saving and restoring a low register is usually cheaper than
28954 using a call-clobbered high register. */
28956 static const int thumb_core_reg_alloc_order[] =
28958 3, 2, 1, 0, 4, 5, 6, 7,
28959 12, 14, 8, 9, 10, 11
28962 /* Adjust register allocation order when compiling for Thumb. */
28965 arm_order_regs_for_local_alloc (void)
28967 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28968 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28970 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28971 sizeof (thumb_core_reg_alloc_order));
28974 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28977 arm_frame_pointer_required (void)
28979 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28982 /* If the function receives nonlocal gotos, it needs to save the frame
28983 pointer in the nonlocal_goto_save_area object. */
28984 if (cfun->has_nonlocal_label)
28987 /* The frame pointer is required for non-leaf APCS frames. */
28988 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28991 /* If we are probing the stack in the prologue, we will have a faulting
28992 instruction prior to the stack adjustment and this requires a frame
28993 pointer if we want to catch the exception using the EABI unwinder. */
28994 if (!IS_INTERRUPT (arm_current_func_type ())
28995 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28996 || flag_stack_clash_protection)
28997 && arm_except_unwind_info (&global_options) == UI_TARGET
28998 && cfun->can_throw_non_call_exceptions)
29000 HOST_WIDE_INT size = get_frame_size ();
29002 /* That's irrelevant if there is no stack adjustment. */
29006 /* That's relevant only if there is a stack probe. */
29007 if (crtl->is_leaf && !cfun->calls_alloca)
29009 /* We don't have the final size of the frame so adjust. */
29010 size += 32 * UNITS_PER_WORD;
29011 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29021 /* Only thumb1 can't support conditional execution, so return true if
29022 the target is not thumb1. */
29024 arm_have_conditional_execution (void)
29026 return !TARGET_THUMB1;
29029 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29030 static HOST_WIDE_INT
29031 arm_vector_alignment (const_tree type)
29033 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29035 if (TARGET_AAPCS_BASED)
29036 align = MIN (align, 64);
29041 static unsigned int
29042 arm_autovectorize_vector_modes (vector_modes *modes, bool)
29044 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29046 modes->safe_push (V16QImode);
29047 modes->safe_push (V8QImode);
29053 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29055 /* Vectors which aren't in packed structures will not be less aligned than
29056 the natural alignment of their element type, so this is safe. */
29057 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29060 return default_builtin_vector_alignment_reachable (type, is_packed);
29064 arm_builtin_support_vector_misalignment (machine_mode mode,
29065 const_tree type, int misalignment,
29068 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29070 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29075 /* If the misalignment is unknown, we should be able to handle the access
29076 so long as it is not to a member of a packed data structure. */
29077 if (misalignment == -1)
29080 /* Return true if the misalignment is a multiple of the natural alignment
29081 of the vector's element type. This is probably always going to be
29082 true in practice, since we've already established that this isn't a
29084 return ((misalignment % align) == 0);
29087 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29092 arm_conditional_register_usage (void)
29096 if (TARGET_THUMB1 && optimize_size)
29098 /* When optimizing for size on Thumb-1, it's better not
29099 to use the HI regs, because of the overhead of
29101 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
29102 fixed_regs[regno] = call_used_regs[regno] = 1;
29105 /* The link register can be clobbered by any branch insn,
29106 but we have no way to track that at present, so mark
29107 it as unavailable. */
29109 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29111 if (TARGET_32BIT && TARGET_HARD_FLOAT)
29113 /* VFPv3 registers are disabled when earlier VFP
29114 versions are selected due to the definition of
29115 LAST_VFP_REGNUM. */
29116 for (regno = FIRST_VFP_REGNUM;
29117 regno <= LAST_VFP_REGNUM; ++ regno)
29119 fixed_regs[regno] = 0;
29120 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29121 || regno >= FIRST_VFP_REGNUM + 32;
29125 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
29127 regno = FIRST_IWMMXT_GR_REGNUM;
29128 /* The 2002/10/09 revision of the XScale ABI has wCG0
29129 and wCG1 as call-preserved registers. The 2002/11/21
29130 revision changed this so that all wCG registers are
29131 scratch registers. */
29132 for (regno = FIRST_IWMMXT_GR_REGNUM;
29133 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29134 fixed_regs[regno] = 0;
29135 /* The XScale ABI has wR0 - wR9 as scratch registers,
29136 the rest as call-preserved registers. */
29137 for (regno = FIRST_IWMMXT_REGNUM;
29138 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29140 fixed_regs[regno] = 0;
29141 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29145 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29147 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29148 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29150 else if (TARGET_APCS_STACK)
29152 fixed_regs[10] = 1;
29153 call_used_regs[10] = 1;
29155 /* -mcaller-super-interworking reserves r11 for calls to
29156 _interwork_r11_call_via_rN(). Making the register global
29157 is an easy way of ensuring that it remains valid for all
29159 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29160 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29162 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29163 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29164 if (TARGET_CALLER_INTERWORKING)
29165 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29168 /* The Q and GE bits are only accessed via special ACLE patterns. */
29169 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
29170 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
29172 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29176 arm_preferred_rename_class (reg_class_t rclass)
29178 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29179 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29180 and code size can be reduced. */
29181 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29187 /* Compute the attribute "length" of insn "*push_multi".
29188 So this function MUST be kept in sync with that insn pattern. */
29190 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29192 int i, regno, hi_reg;
29193 int num_saves = XVECLEN (parallel_op, 0);
29203 regno = REGNO (first_op);
29204 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
29205 list is 8-bit. Normally this means all registers in the list must be
29206 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
29207 encodings. There is one exception for PUSH that LR in HI_REGS can be used
29208 with 16-bit encoding. */
29209 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29210 for (i = 1; i < num_saves && !hi_reg; i++)
29212 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29213 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29221 /* Compute the attribute "length" of insn. Currently, this function is used
29222 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
29223 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
29224 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
29225 true if OPERANDS contains insn which explicit updates base register. */
29228 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
29237 rtx parallel_op = operands[0];
29238 /* Initialize to elements number of PARALLEL. */
29239 unsigned indx = XVECLEN (parallel_op, 0) - 1;
29240 /* Initialize the value to base register. */
29241 unsigned regno = REGNO (operands[1]);
29242 /* Skip return and write back pattern.
29243 We only need register pop pattern for later analysis. */
29244 unsigned first_indx = 0;
29245 first_indx += return_pc ? 1 : 0;
29246 first_indx += write_back_p ? 1 : 0;
29248 /* A pop operation can be done through LDM or POP. If the base register is SP
29249 and if it's with write back, then a LDM will be alias of POP. */
29250 bool pop_p = (regno == SP_REGNUM && write_back_p);
29251 bool ldm_p = !pop_p;
29253 /* Check base register for LDM. */
29254 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
29257 /* Check each register in the list. */
29258 for (; indx >= first_indx; indx--)
29260 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
29261 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
29262 comment in arm_attr_length_push_multi. */
29263 if (REGNO_REG_CLASS (regno) == HI_REGS
29264 && (regno != PC_REGNUM || ldm_p))
29271 /* Compute the number of instructions emitted by output_move_double. */
29273 arm_count_output_move_double_insns (rtx *operands)
29277 /* output_move_double may modify the operands array, so call it
29278 here on a copy of the array. */
29279 ops[0] = operands[0];
29280 ops[1] = operands[1];
29281 output_move_double (ops, false, &count);
29285 /* Same as above, but operands are a register/memory pair in SImode.
29286 Assumes operands has the base register in position 0 and memory in position
29287 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
29289 arm_count_ldrdstrd_insns (rtx *operands, bool load)
29293 int regnum, memnum;
29295 regnum = 0, memnum = 1;
29297 regnum = 1, memnum = 0;
29298 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
29299 ops[memnum] = adjust_address (operands[2], DImode, 0);
29300 output_move_double (ops, false, &count);
29306 vfp3_const_double_for_fract_bits (rtx operand)
29308 REAL_VALUE_TYPE r0;
29310 if (!CONST_DOUBLE_P (operand))
29313 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
29314 if (exact_real_inverse (DFmode, &r0)
29315 && !REAL_VALUE_NEGATIVE (r0))
29317 if (exact_real_truncate (DFmode, &r0))
29319 HOST_WIDE_INT value = real_to_integer (&r0);
29320 value = value & 0xffffffff;
29321 if ((value != 0) && ( (value & (value - 1)) == 0))
29323 int ret = exact_log2 (value);
29324 gcc_assert (IN_RANGE (ret, 0, 31));
29332 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
29333 log2 is in [1, 32], return that log2. Otherwise return -1.
29334 This is used in the patterns for vcvt.s32.f32 floating-point to
29335 fixed-point conversions. */
29338 vfp3_const_double_for_bits (rtx x)
29340 const REAL_VALUE_TYPE *r;
29342 if (!CONST_DOUBLE_P (x))
29345 r = CONST_DOUBLE_REAL_VALUE (x);
29347 if (REAL_VALUE_NEGATIVE (*r)
29348 || REAL_VALUE_ISNAN (*r)
29349 || REAL_VALUE_ISINF (*r)
29350 || !real_isinteger (r, SFmode))
29353 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
29355 /* The exact_log2 above will have returned -1 if this is
29356 not an exact log2. */
29357 if (!IN_RANGE (hwint, 1, 32))
29364 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29367 arm_pre_atomic_barrier (enum memmodel model)
29369 if (need_atomic_barrier_p (model, true))
29370 emit_insn (gen_memory_barrier ());
29374 arm_post_atomic_barrier (enum memmodel model)
29376 if (need_atomic_barrier_p (model, false))
29377 emit_insn (gen_memory_barrier ());
29380 /* Emit the load-exclusive and store-exclusive instructions.
29381 Use acquire and release versions if necessary. */
29384 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
29386 rtx (*gen) (rtx, rtx);
29392 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29393 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29394 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29395 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29397 gcc_unreachable ();
29404 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
29405 case E_HImode: gen = gen_arm_load_exclusivehi; break;
29406 case E_SImode: gen = gen_arm_load_exclusivesi; break;
29407 case E_DImode: gen = gen_arm_load_exclusivedi; break;
29409 gcc_unreachable ();
29413 emit_insn (gen (rval, mem));
29417 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
29420 rtx (*gen) (rtx, rtx, rtx);
29426 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
29427 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
29428 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
29429 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
29431 gcc_unreachable ();
29438 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
29439 case E_HImode: gen = gen_arm_store_exclusivehi; break;
29440 case E_SImode: gen = gen_arm_store_exclusivesi; break;
29441 case E_DImode: gen = gen_arm_store_exclusivedi; break;
29443 gcc_unreachable ();
29447 emit_insn (gen (bval, rval, mem));
29450 /* Mark the previous jump instruction as unlikely. */
29453 emit_unlikely_jump (rtx insn)
29455 rtx_insn *jump = emit_jump_insn (insn);
29456 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
29459 /* Expand a compare and swap pattern. */
29462 arm_expand_compare_and_swap (rtx operands[])
29464 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29465 machine_mode mode, cmp_mode;
29467 bval = operands[0];
29468 rval = operands[1];
29470 oldval = operands[3];
29471 newval = operands[4];
29472 is_weak = operands[5];
29473 mod_s = operands[6];
29474 mod_f = operands[7];
29475 mode = GET_MODE (mem);
29477 /* Normally the succ memory model must be stronger than fail, but in the
29478 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29479 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29481 if (TARGET_HAVE_LDACQ
29482 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
29483 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
29484 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29490 /* For narrow modes, we're going to perform the comparison in SImode,
29491 so do the zero-extension now. */
29492 rval = gen_reg_rtx (SImode);
29493 oldval = convert_modes (SImode, mode, oldval, true);
29497 /* Force the value into a register if needed. We waited until after
29498 the zero-extension above to do this properly. */
29499 if (!arm_add_operand (oldval, SImode))
29500 oldval = force_reg (SImode, oldval);
29504 if (!cmpdi_operand (oldval, mode))
29505 oldval = force_reg (mode, oldval);
29509 gcc_unreachable ();
29513 cmp_mode = E_SImode;
29515 cmp_mode = CC_Zmode;
29517 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
29518 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
29519 oldval, newval, is_weak, mod_s, mod_f));
29521 if (mode == QImode || mode == HImode)
29522 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29524 /* In all cases, we arrange for success to be signaled by Z set.
29525 This arrangement allows for the boolean result to be used directly
29526 in a subsequent branch, post optimization. For Thumb-1 targets, the
29527 boolean negation of the result is also stored in bval because Thumb-1
29528 backend lacks dependency tracking for CC flag due to flag-setting not
29529 being represented at RTL level. */
29531 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
29534 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
29535 emit_insn (gen_rtx_SET (bval, x));
29539 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29540 another memory store between the load-exclusive and store-exclusive can
29541 reset the monitor from Exclusive to Open state. This means we must wait
29542 until after reload to split the pattern, lest we get a register spill in
29543 the middle of the atomic sequence. Success of the compare and swap is
29544 indicated by the Z flag set for 32bit targets and by neg_bval being zero
29545 for Thumb-1 targets (ie. negation of the boolean value returned by
29546 atomic_compare_and_swapmode standard pattern in operand 0). */
29549 arm_split_compare_and_swap (rtx operands[])
29551 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
29553 enum memmodel mod_s, mod_f;
29555 rtx_code_label *label1, *label2;
29558 rval = operands[1];
29560 oldval = operands[3];
29561 newval = operands[4];
29562 is_weak = (operands[5] != const0_rtx);
29563 mod_s_rtx = operands[6];
29564 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
29565 mod_f = memmodel_from_int (INTVAL (operands[7]));
29566 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
29567 mode = GET_MODE (mem);
29569 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
29571 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
29572 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
29574 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
29575 a full barrier is emitted after the store-release. */
29577 use_acquire = false;
29579 /* Checks whether a barrier is needed and emits one accordingly. */
29580 if (!(use_acquire || use_release))
29581 arm_pre_atomic_barrier (mod_s);
29586 label1 = gen_label_rtx ();
29587 emit_label (label1);
29589 label2 = gen_label_rtx ();
29591 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29593 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
29594 as required to communicate with arm_expand_compare_and_swap. */
29597 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
29598 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29599 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29600 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29601 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
29605 emit_move_insn (neg_bval, const1_rtx);
29606 cond = gen_rtx_NE (VOIDmode, rval, oldval);
29607 if (thumb1_cmpneg_operand (oldval, SImode))
29608 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
29611 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
29614 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
29616 /* Weak or strong, we want EQ to be true for success, so that we
29617 match the flags that we got from the compare above. */
29620 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29621 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
29622 emit_insn (gen_rtx_SET (cond, x));
29627 /* Z is set to boolean value of !neg_bval, as required to communicate
29628 with arm_expand_compare_and_swap. */
29629 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
29630 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
29633 if (!is_mm_relaxed (mod_f))
29634 emit_label (label2);
29636 /* Checks whether a barrier is needed and emits one accordingly. */
29638 || !(use_acquire || use_release))
29639 arm_post_atomic_barrier (mod_s);
29641 if (is_mm_relaxed (mod_f))
29642 emit_label (label2);
29645 /* Split an atomic operation pattern. Operation is given by CODE and is one
29646 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
29647 operation). Operation is performed on the content at MEM and on VALUE
29648 following the memory model MODEL_RTX. The content at MEM before and after
29649 the operation is returned in OLD_OUT and NEW_OUT respectively while the
29650 success of the operation is returned in COND. Using a scratch register or
29651 an operand register for these determines what result is returned for that
29655 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29656 rtx value, rtx model_rtx, rtx cond)
29658 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
29659 machine_mode mode = GET_MODE (mem);
29660 machine_mode wmode = (mode == DImode ? DImode : SImode);
29661 rtx_code_label *label;
29662 bool all_low_regs, bind_old_new;
29665 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
29667 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
29668 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
29670 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
29671 a full barrier is emitted after the store-release. */
29673 use_acquire = false;
29675 /* Checks whether a barrier is needed and emits one accordingly. */
29676 if (!(use_acquire || use_release))
29677 arm_pre_atomic_barrier (model);
29679 label = gen_label_rtx ();
29680 emit_label (label);
29683 new_out = gen_lowpart (wmode, new_out);
29685 old_out = gen_lowpart (wmode, old_out);
29688 value = simplify_gen_subreg (wmode, value, mode, 0);
29690 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29692 /* Does the operation require destination and first operand to use the same
29693 register? This is decided by register constraints of relevant insn
29694 patterns in thumb1.md. */
29695 gcc_assert (!new_out || REG_P (new_out));
29696 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
29697 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
29698 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
29703 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
29705 /* We want to return the old value while putting the result of the operation
29706 in the same register as the old value so copy the old value over to the
29707 destination register and use that register for the operation. */
29708 if (old_out && bind_old_new)
29710 emit_move_insn (new_out, old_out);
29721 x = gen_rtx_AND (wmode, old_out, value);
29722 emit_insn (gen_rtx_SET (new_out, x));
29723 x = gen_rtx_NOT (wmode, new_out);
29724 emit_insn (gen_rtx_SET (new_out, x));
29728 if (CONST_INT_P (value))
29730 value = GEN_INT (-INTVAL (value));
29736 if (mode == DImode)
29738 /* DImode plus/minus need to clobber flags. */
29739 /* The adddi3 and subdi3 patterns are incorrectly written so that
29740 they require matching operands, even when we could easily support
29741 three operands. Thankfully, this can be fixed up post-splitting,
29742 as the individual add+adc patterns do accept three operands and
29743 post-reload cprop can make these moves go away. */
29744 emit_move_insn (new_out, old_out);
29746 x = gen_adddi3 (new_out, new_out, value);
29748 x = gen_subdi3 (new_out, new_out, value);
29755 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29756 emit_insn (gen_rtx_SET (new_out, x));
29760 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29763 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29764 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29766 /* Checks whether a barrier is needed and emits one accordingly. */
29768 || !(use_acquire || use_release))
29769 arm_post_atomic_barrier (model);
29772 #define MAX_VECT_LEN 16
29774 struct expand_vec_perm_d
29776 rtx target, op0, op1;
29777 vec_perm_indices perm;
29778 machine_mode vmode;
29783 /* Generate a variable permutation. */
29786 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29788 machine_mode vmode = GET_MODE (target);
29789 bool one_vector_p = rtx_equal_p (op0, op1);
29791 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29792 gcc_checking_assert (GET_MODE (op0) == vmode);
29793 gcc_checking_assert (GET_MODE (op1) == vmode);
29794 gcc_checking_assert (GET_MODE (sel) == vmode);
29795 gcc_checking_assert (TARGET_NEON);
29799 if (vmode == V8QImode)
29800 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29802 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29808 if (vmode == V8QImode)
29810 pair = gen_reg_rtx (V16QImode);
29811 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29812 pair = gen_lowpart (TImode, pair);
29813 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29817 pair = gen_reg_rtx (OImode);
29818 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29819 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29825 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29827 machine_mode vmode = GET_MODE (target);
29828 unsigned int nelt = GET_MODE_NUNITS (vmode);
29829 bool one_vector_p = rtx_equal_p (op0, op1);
29832 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29833 numbering of elements for big-endian, we must reverse the order. */
29834 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29836 /* The VTBL instruction does not use a modulo index, so we must take care
29837 of that ourselves. */
29838 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29839 mask = gen_const_vec_duplicate (vmode, mask);
29840 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29842 arm_expand_vec_perm_1 (target, op0, op1, sel);
29845 /* Map lane ordering between architectural lane order, and GCC lane order,
29846 taking into account ABI. See comment above output_move_neon for details. */
29849 neon_endian_lane_map (machine_mode mode, int lane)
29851 if (BYTES_BIG_ENDIAN)
29853 int nelems = GET_MODE_NUNITS (mode);
29854 /* Reverse lane order. */
29855 lane = (nelems - 1 - lane);
29856 /* Reverse D register order, to match ABI. */
29857 if (GET_MODE_SIZE (mode) == 16)
29858 lane = lane ^ (nelems / 2);
29863 /* Some permutations index into pairs of vectors, this is a helper function
29864 to map indexes into those pairs of vectors. */
29867 neon_pair_endian_lane_map (machine_mode mode, int lane)
29869 int nelem = GET_MODE_NUNITS (mode);
29870 if (BYTES_BIG_ENDIAN)
29872 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29876 /* Generate or test for an insn that supports a constant permutation. */
29878 /* Recognize patterns for the VUZP insns. */
29881 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29883 unsigned int i, odd, mask, nelt = d->perm.length ();
29884 rtx out0, out1, in0, in1;
29888 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29891 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29892 big endian pattern on 64 bit vectors, so we correct for that. */
29893 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29894 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29896 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29898 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29900 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29904 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29906 for (i = 0; i < nelt; i++)
29909 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29910 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29920 if (swap_nelt != 0)
29921 std::swap (in0, in1);
29924 out1 = gen_reg_rtx (d->vmode);
29926 std::swap (out0, out1);
29928 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29932 /* Recognize patterns for the VZIP insns. */
29935 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29937 unsigned int i, high, mask, nelt = d->perm.length ();
29938 rtx out0, out1, in0, in1;
29942 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29945 is_swapped = BYTES_BIG_ENDIAN;
29947 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29950 if (first_elem == neon_endian_lane_map (d->vmode, high))
29952 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29956 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29958 for (i = 0; i < nelt / 2; i++)
29961 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29962 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29966 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29967 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29979 std::swap (in0, in1);
29982 out1 = gen_reg_rtx (d->vmode);
29984 std::swap (out0, out1);
29986 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29990 /* Recognize patterns for the VREV insns. */
29992 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29994 unsigned int i, j, diff, nelt = d->perm.length ();
29995 rtx (*gen) (machine_mode, rtx, rtx);
29997 if (!d->one_vector_p)
30008 gen = gen_neon_vrev64;
30019 gen = gen_neon_vrev32;
30025 gen = gen_neon_vrev64;
30036 gen = gen_neon_vrev16;
30040 gen = gen_neon_vrev32;
30046 gen = gen_neon_vrev64;
30056 for (i = 0; i < nelt ; i += diff + 1)
30057 for (j = 0; j <= diff; j += 1)
30059 /* This is guaranteed to be true as the value of diff
30060 is 7, 3, 1 and we should have enough elements in the
30061 queue to generate this. Getting a vector mask with a
30062 value of diff other than these values implies that
30063 something is wrong by the time we get here. */
30064 gcc_assert (i + j < nelt);
30065 if (d->perm[i + j] != i + diff - j)
30073 emit_insn (gen (d->vmode, d->target, d->op0));
30077 /* Recognize patterns for the VTRN insns. */
30080 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30082 unsigned int i, odd, mask, nelt = d->perm.length ();
30083 rtx out0, out1, in0, in1;
30085 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30088 /* Note that these are little-endian tests. Adjust for big-endian later. */
30089 if (d->perm[0] == 0)
30091 else if (d->perm[0] == 1)
30095 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30097 for (i = 0; i < nelt; i += 2)
30099 if (d->perm[i] != i + odd)
30101 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30111 if (BYTES_BIG_ENDIAN)
30113 std::swap (in0, in1);
30118 out1 = gen_reg_rtx (d->vmode);
30120 std::swap (out0, out1);
30122 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
30126 /* Recognize patterns for the VEXT insns. */
30129 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30131 unsigned int i, nelt = d->perm.length ();
30134 unsigned int location;
30136 unsigned int next = d->perm[0] + 1;
30138 /* TODO: Handle GCC's numbering of elements for big-endian. */
30139 if (BYTES_BIG_ENDIAN)
30142 /* Check if the extracted indexes are increasing by one. */
30143 for (i = 1; i < nelt; next++, i++)
30145 /* If we hit the most significant element of the 2nd vector in
30146 the previous iteration, no need to test further. */
30147 if (next == 2 * nelt)
30150 /* If we are operating on only one vector: it could be a
30151 rotation. If there are only two elements of size < 64, let
30152 arm_evpc_neon_vrev catch it. */
30153 if (d->one_vector_p && (next == nelt))
30155 if ((nelt == 2) && (d->vmode != V2DImode))
30161 if (d->perm[i] != next)
30165 location = d->perm[0];
30171 offset = GEN_INT (location);
30173 if(d->vmode == E_DImode)
30176 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
30180 /* The NEON VTBL instruction is a fully variable permuation that's even
30181 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30182 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30183 can do slightly better by expanding this as a constant where we don't
30184 have to apply a mask. */
30187 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30189 rtx rperm[MAX_VECT_LEN], sel;
30190 machine_mode vmode = d->vmode;
30191 unsigned int i, nelt = d->perm.length ();
30193 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30194 numbering of elements for big-endian, we must reverse the order. */
30195 if (BYTES_BIG_ENDIAN)
30201 /* Generic code will try constant permutation twice. Once with the
30202 original mode and again with the elements lowered to QImode.
30203 So wait and don't do the selector expansion ourselves. */
30204 if (vmode != V8QImode && vmode != V16QImode)
30207 for (i = 0; i < nelt; ++i)
30208 rperm[i] = GEN_INT (d->perm[i]);
30209 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30210 sel = force_reg (vmode, sel);
30212 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30217 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30219 /* Check if the input mask matches vext before reordering the
30222 if (arm_evpc_neon_vext (d))
30225 /* The pattern matching functions above are written to look for a small
30226 number to begin the sequence (0, 1, N/2). If we begin with an index
30227 from the second operand, we can swap the operands. */
30228 unsigned int nelt = d->perm.length ();
30229 if (d->perm[0] >= nelt)
30231 d->perm.rotate_inputs (1);
30232 std::swap (d->op0, d->op1);
30237 if (arm_evpc_neon_vuzp (d))
30239 if (arm_evpc_neon_vzip (d))
30241 if (arm_evpc_neon_vrev (d))
30243 if (arm_evpc_neon_vtrn (d))
30245 return arm_evpc_neon_vtbl (d);
30250 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30253 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
30254 const vec_perm_indices &sel)
30256 struct expand_vec_perm_d d;
30257 int i, nelt, which;
30259 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
30267 gcc_assert (VECTOR_MODE_P (d.vmode));
30268 d.testing_p = !target;
30270 nelt = GET_MODE_NUNITS (d.vmode);
30271 for (i = which = 0; i < nelt; ++i)
30273 int ei = sel[i] & (2 * nelt - 1);
30274 which |= (ei < nelt ? 1 : 2);
30283 d.one_vector_p = false;
30284 if (d.testing_p || !rtx_equal_p (op0, op1))
30287 /* The elements of PERM do not suggest that only the first operand
30288 is used, but both operands are identical. Allow easier matching
30289 of the permutation by folding the permutation into the single
30294 d.one_vector_p = true;
30299 d.one_vector_p = true;
30303 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
30306 return arm_expand_vec_perm_const_1 (&d);
30308 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30309 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30310 if (!d.one_vector_p)
30311 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30314 bool ret = arm_expand_vec_perm_const_1 (&d);
30321 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
30323 /* If we are soft float and we do not have ldrd
30324 then all auto increment forms are ok. */
30325 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30330 /* Post increment and Pre Decrement are supported for all
30331 instruction forms except for vector forms. */
30334 if (VECTOR_MODE_P (mode))
30336 if (code != ARM_PRE_DEC)
30346 /* Without LDRD and mode size greater than
30347 word size, there is no point in auto-incrementing
30348 because ldm and stm will not have these forms. */
30349 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30352 /* Vector and floating point modes do not support
30353 these auto increment forms. */
30354 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30367 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30368 on ARM, since we know that shifts by negative amounts are no-ops.
30369 Additionally, the default expansion code is not available or suitable
30370 for post-reload insn splits (this can occur when the register allocator
30371 chooses not to do a shift in NEON).
30373 This function is used in both initial expand and post-reload splits, and
30374 handles all kinds of 64-bit shifts.
30376 Input requirements:
30377 - It is safe for the input and output to be the same register, but
30378 early-clobber rules apply for the shift amount and scratch registers.
30379 - Shift by register requires both scratch registers. In all other cases
30380 the scratch registers may be NULL.
30381 - Ashiftrt by a register also clobbers the CC register. */
30383 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30384 rtx amount, rtx scratch1, rtx scratch2)
30386 rtx out_high = gen_highpart (SImode, out);
30387 rtx out_low = gen_lowpart (SImode, out);
30388 rtx in_high = gen_highpart (SImode, in);
30389 rtx in_low = gen_lowpart (SImode, in);
30392 in = the register pair containing the input value.
30393 out = the destination register pair.
30394 up = the high- or low-part of each pair.
30395 down = the opposite part to "up".
30396 In a shift, we can consider bits to shift from "up"-stream to
30397 "down"-stream, so in a left-shift "up" is the low-part and "down"
30398 is the high-part of each register pair. */
30400 rtx out_up = code == ASHIFT ? out_low : out_high;
30401 rtx out_down = code == ASHIFT ? out_high : out_low;
30402 rtx in_up = code == ASHIFT ? in_low : in_high;
30403 rtx in_down = code == ASHIFT ? in_high : in_low;
30405 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30407 && (REG_P (out) || GET_CODE (out) == SUBREG)
30408 && GET_MODE (out) == DImode);
30410 && (REG_P (in) || GET_CODE (in) == SUBREG)
30411 && GET_MODE (in) == DImode);
30413 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30414 && GET_MODE (amount) == SImode)
30415 || CONST_INT_P (amount)));
30416 gcc_assert (scratch1 == NULL
30417 || (GET_CODE (scratch1) == SCRATCH)
30418 || (GET_MODE (scratch1) == SImode
30419 && REG_P (scratch1)));
30420 gcc_assert (scratch2 == NULL
30421 || (GET_CODE (scratch2) == SCRATCH)
30422 || (GET_MODE (scratch2) == SImode
30423 && REG_P (scratch2)));
30424 gcc_assert (!REG_P (out) || !REG_P (amount)
30425 || !HARD_REGISTER_P (out)
30426 || (REGNO (out) != REGNO (amount)
30427 && REGNO (out) + 1 != REGNO (amount)));
30429 /* Macros to make following code more readable. */
30430 #define SUB_32(DEST,SRC) \
30431 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30432 #define RSB_32(DEST,SRC) \
30433 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30434 #define SUB_S_32(DEST,SRC) \
30435 gen_addsi3_compare0 ((DEST), (SRC), \
30437 #define SET(DEST,SRC) \
30438 gen_rtx_SET ((DEST), (SRC))
30439 #define SHIFT(CODE,SRC,AMOUNT) \
30440 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30441 #define LSHIFT(CODE,SRC,AMOUNT) \
30442 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30443 SImode, (SRC), (AMOUNT))
30444 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30445 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30446 SImode, (SRC), (AMOUNT))
30448 gen_rtx_IOR (SImode, (A), (B))
30449 #define BRANCH(COND,LABEL) \
30450 gen_arm_cond_branch ((LABEL), \
30451 gen_rtx_ ## COND (CCmode, cc_reg, \
30455 /* Shifts by register and shifts by constant are handled separately. */
30456 if (CONST_INT_P (amount))
30458 /* We have a shift-by-constant. */
30460 /* First, handle out-of-range shift amounts.
30461 In both cases we try to match the result an ARM instruction in a
30462 shift-by-register would give. This helps reduce execution
30463 differences between optimization levels, but it won't stop other
30464 parts of the compiler doing different things. This is "undefined
30465 behavior, in any case. */
30466 if (INTVAL (amount) <= 0)
30467 emit_insn (gen_movdi (out, in));
30468 else if (INTVAL (amount) >= 64)
30470 if (code == ASHIFTRT)
30472 rtx const31_rtx = GEN_INT (31);
30473 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30474 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30477 emit_insn (gen_movdi (out, const0_rtx));
30480 /* Now handle valid shifts. */
30481 else if (INTVAL (amount) < 32)
30483 /* Shifts by a constant less than 32. */
30484 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30486 /* Clearing the out register in DImode first avoids lots
30487 of spilling and results in less stack usage.
30488 Later this redundant insn is completely removed.
30489 Do that only if "in" and "out" are different registers. */
30490 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30491 emit_insn (SET (out, const0_rtx));
30492 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30493 emit_insn (SET (out_down,
30494 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30496 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30500 /* Shifts by a constant greater than 31. */
30501 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30503 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
30504 emit_insn (SET (out, const0_rtx));
30505 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30506 if (code == ASHIFTRT)
30507 emit_insn (gen_ashrsi3 (out_up, in_up,
30510 emit_insn (SET (out_up, const0_rtx));
30515 /* We have a shift-by-register. */
30516 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
30518 /* This alternative requires the scratch registers. */
30519 gcc_assert (scratch1 && REG_P (scratch1));
30520 gcc_assert (scratch2 && REG_P (scratch2));
30522 /* We will need the values "amount-32" and "32-amount" later.
30523 Swapping them around now allows the later code to be more general. */
30527 emit_insn (SUB_32 (scratch1, amount));
30528 emit_insn (RSB_32 (scratch2, amount));
30531 emit_insn (RSB_32 (scratch1, amount));
30532 /* Also set CC = amount > 32. */
30533 emit_insn (SUB_S_32 (scratch2, amount));
30536 emit_insn (RSB_32 (scratch1, amount));
30537 emit_insn (SUB_32 (scratch2, amount));
30540 gcc_unreachable ();
30543 /* Emit code like this:
30546 out_down = in_down << amount;
30547 out_down = (in_up << (amount - 32)) | out_down;
30548 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30549 out_up = in_up << amount;
30552 out_down = in_down >> amount;
30553 out_down = (in_up << (32 - amount)) | out_down;
30555 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30556 out_up = in_up << amount;
30559 out_down = in_down >> amount;
30560 out_down = (in_up << (32 - amount)) | out_down;
30562 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30563 out_up = in_up << amount;
30565 The ARM and Thumb2 variants are the same but implemented slightly
30566 differently. If this were only called during expand we could just
30567 use the Thumb2 case and let combine do the right thing, but this
30568 can also be called from post-reload splitters. */
30570 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30572 if (!TARGET_THUMB2)
30574 /* Emit code for ARM mode. */
30575 emit_insn (SET (out_down,
30576 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30577 if (code == ASHIFTRT)
30579 rtx_code_label *done_label = gen_label_rtx ();
30580 emit_jump_insn (BRANCH (LT, done_label));
30581 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30583 emit_label (done_label);
30586 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30591 /* Emit code for Thumb2 mode.
30592 Thumb2 can't do shift and or in one insn. */
30593 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30594 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30596 if (code == ASHIFTRT)
30598 rtx_code_label *done_label = gen_label_rtx ();
30599 emit_jump_insn (BRANCH (LT, done_label));
30600 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30601 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30602 emit_label (done_label);
30606 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30607 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30611 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30625 /* Returns true if the pattern is a valid symbolic address, which is either a
30626 symbol_ref or (symbol_ref + addend).
30628 According to the ARM ELF ABI, the initial addend of REL-type relocations
30629 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
30630 literal field of the instruction as a 16-bit signed value in the range
30631 -32768 <= A < 32768. */
30634 arm_valid_symbolic_address_p (rtx addr)
30636 rtx xop0, xop1 = NULL_RTX;
30639 if (target_word_relocations)
30642 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
30645 /* (const (plus: symbol_ref const_int)) */
30646 if (GET_CODE (addr) == CONST)
30647 tmp = XEXP (addr, 0);
30649 if (GET_CODE (tmp) == PLUS)
30651 xop0 = XEXP (tmp, 0);
30652 xop1 = XEXP (tmp, 1);
30654 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
30655 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
30661 /* Returns true if a valid comparison operation and makes
30662 the operands in a form that is valid. */
30664 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30666 enum rtx_code code = GET_CODE (*comparison);
30668 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30669 ? GET_MODE (*op2) : GET_MODE (*op1);
30671 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30673 if (code == UNEQ || code == LTGT)
30676 code_int = (int)code;
30677 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30678 PUT_CODE (*comparison, (enum rtx_code)code_int);
30683 if (!arm_add_operand (*op1, mode))
30684 *op1 = force_reg (mode, *op1);
30685 if (!arm_add_operand (*op2, mode))
30686 *op2 = force_reg (mode, *op2);
30690 /* gen_compare_reg() will sort out any invalid operands. */
30694 if (!TARGET_VFP_FP16INST)
30696 /* FP16 comparisons are done in SF mode. */
30698 *op1 = convert_to_mode (mode, *op1, 1);
30699 *op2 = convert_to_mode (mode, *op2, 1);
30700 /* Fall through. */
30703 if (!vfp_compare_operand (*op1, mode))
30704 *op1 = force_reg (mode, *op1);
30705 if (!vfp_compare_operand (*op2, mode))
30706 *op2 = force_reg (mode, *op2);
30716 /* Maximum number of instructions to set block of memory. */
30718 arm_block_set_max_insns (void)
30720 if (optimize_function_for_size_p (cfun))
30723 return current_tune->max_insns_inline_memset;
30726 /* Return TRUE if it's profitable to set block of memory for
30727 non-vectorized case. VAL is the value to set the memory
30728 with. LENGTH is the number of bytes to set. ALIGN is the
30729 alignment of the destination memory in bytes. UNALIGNED_P
30730 is TRUE if we can only set the memory with instructions
30731 meeting alignment requirements. USE_STRD_P is TRUE if we
30732 can use strd to set the memory. */
30734 arm_block_set_non_vect_profit_p (rtx val,
30735 unsigned HOST_WIDE_INT length,
30736 unsigned HOST_WIDE_INT align,
30737 bool unaligned_p, bool use_strd_p)
30740 /* For leftovers in bytes of 0-7, we can set the memory block using
30741 strb/strh/str with minimum instruction number. */
30742 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30746 num = arm_const_inline_cost (SET, val);
30747 num += length / align + length % align;
30749 else if (use_strd_p)
30751 num = arm_const_double_inline_cost (val);
30752 num += (length >> 3) + leftover[length & 7];
30756 num = arm_const_inline_cost (SET, val);
30757 num += (length >> 2) + leftover[length & 3];
30760 /* We may be able to combine last pair STRH/STRB into a single STR
30761 by shifting one byte back. */
30762 if (unaligned_access && length > 3 && (length & 3) == 3)
30765 return (num <= arm_block_set_max_insns ());
30768 /* Return TRUE if it's profitable to set block of memory for
30769 vectorized case. LENGTH is the number of bytes to set.
30770 ALIGN is the alignment of destination memory in bytes.
30771 MODE is the vector mode used to set the memory. */
30773 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30774 unsigned HOST_WIDE_INT align,
30778 bool unaligned_p = ((align & 3) != 0);
30779 unsigned int nelt = GET_MODE_NUNITS (mode);
30781 /* Instruction loading constant value. */
30783 /* Instructions storing the memory. */
30784 num += (length + nelt - 1) / nelt;
30785 /* Instructions adjusting the address expression. Only need to
30786 adjust address expression if it's 4 bytes aligned and bytes
30787 leftover can only be stored by mis-aligned store instruction. */
30788 if (!unaligned_p && (length & 3) != 0)
30791 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30792 if (!unaligned_p && mode == V16QImode)
30795 return (num <= arm_block_set_max_insns ());
30798 /* Set a block of memory using vectorization instructions for the
30799 unaligned case. We fill the first LENGTH bytes of the memory
30800 area starting from DSTBASE with byte constant VALUE. ALIGN is
30801 the alignment requirement of memory. Return TRUE if succeeded. */
30803 arm_block_set_unaligned_vect (rtx dstbase,
30804 unsigned HOST_WIDE_INT length,
30805 unsigned HOST_WIDE_INT value,
30806 unsigned HOST_WIDE_INT align)
30808 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30811 rtx (*gen_func) (rtx, rtx);
30813 unsigned HOST_WIDE_INT v = value;
30814 unsigned int offset = 0;
30815 gcc_assert ((align & 0x3) != 0);
30816 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30817 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30818 if (length >= nelt_v16)
30821 gen_func = gen_movmisalignv16qi;
30826 gen_func = gen_movmisalignv8qi;
30828 nelt_mode = GET_MODE_NUNITS (mode);
30829 gcc_assert (length >= nelt_mode);
30830 /* Skip if it isn't profitable. */
30831 if (!arm_block_set_vect_profit_p (length, align, mode))
30834 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30835 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30837 v = sext_hwi (v, BITS_PER_WORD);
30839 reg = gen_reg_rtx (mode);
30840 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30841 /* Emit instruction loading the constant value. */
30842 emit_move_insn (reg, val_vec);
30844 /* Handle nelt_mode bytes in a vector. */
30845 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30847 emit_insn ((*gen_func) (mem, reg));
30848 if (i + 2 * nelt_mode <= length)
30850 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30851 offset += nelt_mode;
30852 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30856 /* If there are not less than nelt_v8 bytes leftover, we must be in
30858 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30860 /* Handle (8, 16) bytes leftover. */
30861 if (i + nelt_v8 < length)
30863 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30864 offset += length - i;
30865 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30867 /* We are shifting bytes back, set the alignment accordingly. */
30868 if ((length & 1) != 0 && align >= 2)
30869 set_mem_align (mem, BITS_PER_UNIT);
30871 emit_insn (gen_movmisalignv16qi (mem, reg));
30873 /* Handle (0, 8] bytes leftover. */
30874 else if (i < length && i + nelt_v8 >= length)
30876 if (mode == V16QImode)
30877 reg = gen_lowpart (V8QImode, reg);
30879 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30880 + (nelt_mode - nelt_v8))));
30881 offset += (length - i) + (nelt_mode - nelt_v8);
30882 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30884 /* We are shifting bytes back, set the alignment accordingly. */
30885 if ((length & 1) != 0 && align >= 2)
30886 set_mem_align (mem, BITS_PER_UNIT);
30888 emit_insn (gen_movmisalignv8qi (mem, reg));
30894 /* Set a block of memory using vectorization instructions for the
30895 aligned case. We fill the first LENGTH bytes of the memory area
30896 starting from DSTBASE with byte constant VALUE. ALIGN is the
30897 alignment requirement of memory. Return TRUE if succeeded. */
30899 arm_block_set_aligned_vect (rtx dstbase,
30900 unsigned HOST_WIDE_INT length,
30901 unsigned HOST_WIDE_INT value,
30902 unsigned HOST_WIDE_INT align)
30904 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30905 rtx dst, addr, mem;
30908 unsigned int offset = 0;
30910 gcc_assert ((align & 0x3) == 0);
30911 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30912 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30913 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30918 nelt_mode = GET_MODE_NUNITS (mode);
30919 gcc_assert (length >= nelt_mode);
30920 /* Skip if it isn't profitable. */
30921 if (!arm_block_set_vect_profit_p (length, align, mode))
30924 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30926 reg = gen_reg_rtx (mode);
30927 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30928 /* Emit instruction loading the constant value. */
30929 emit_move_insn (reg, val_vec);
30932 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30933 if (mode == V16QImode)
30935 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30936 emit_insn (gen_movmisalignv16qi (mem, reg));
30938 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30939 if (i + nelt_v8 < length && i + nelt_v16 > length)
30941 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30942 offset += length - nelt_mode;
30943 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30944 /* We are shifting bytes back, set the alignment accordingly. */
30945 if ((length & 0x3) == 0)
30946 set_mem_align (mem, BITS_PER_UNIT * 4);
30947 else if ((length & 0x1) == 0)
30948 set_mem_align (mem, BITS_PER_UNIT * 2);
30950 set_mem_align (mem, BITS_PER_UNIT);
30952 emit_insn (gen_movmisalignv16qi (mem, reg));
30955 /* Fall through for bytes leftover. */
30957 nelt_mode = GET_MODE_NUNITS (mode);
30958 reg = gen_lowpart (V8QImode, reg);
30961 /* Handle 8 bytes in a vector. */
30962 for (; (i + nelt_mode <= length); i += nelt_mode)
30964 addr = plus_constant (Pmode, dst, i);
30965 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30966 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
30967 emit_move_insn (mem, reg);
30969 emit_insn (gen_unaligned_storev8qi (mem, reg));
30972 /* Handle single word leftover by shifting 4 bytes back. We can
30973 use aligned access for this case. */
30974 if (i + UNITS_PER_WORD == length)
30976 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30977 offset += i - UNITS_PER_WORD;
30978 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30979 /* We are shifting 4 bytes back, set the alignment accordingly. */
30980 if (align > UNITS_PER_WORD)
30981 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30983 emit_insn (gen_unaligned_storev8qi (mem, reg));
30985 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30986 We have to use unaligned access for this case. */
30987 else if (i < length)
30989 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30990 offset += length - nelt_mode;
30991 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30992 /* We are shifting bytes back, set the alignment accordingly. */
30993 if ((length & 1) == 0)
30994 set_mem_align (mem, BITS_PER_UNIT * 2);
30996 set_mem_align (mem, BITS_PER_UNIT);
30998 emit_insn (gen_movmisalignv8qi (mem, reg));
31004 /* Set a block of memory using plain strh/strb instructions, only
31005 using instructions allowed by ALIGN on processor. We fill the
31006 first LENGTH bytes of the memory area starting from DSTBASE
31007 with byte constant VALUE. ALIGN is the alignment requirement
31010 arm_block_set_unaligned_non_vect (rtx dstbase,
31011 unsigned HOST_WIDE_INT length,
31012 unsigned HOST_WIDE_INT value,
31013 unsigned HOST_WIDE_INT align)
31016 rtx dst, addr, mem;
31017 rtx val_exp, val_reg, reg;
31019 HOST_WIDE_INT v = value;
31021 gcc_assert (align == 1 || align == 2);
31024 v |= (value << BITS_PER_UNIT);
31026 v = sext_hwi (v, BITS_PER_WORD);
31027 val_exp = GEN_INT (v);
31028 /* Skip if it isn't profitable. */
31029 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31030 align, true, false))
31033 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31034 mode = (align == 2 ? HImode : QImode);
31035 val_reg = force_reg (SImode, val_exp);
31036 reg = gen_lowpart (mode, val_reg);
31038 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31040 addr = plus_constant (Pmode, dst, i);
31041 mem = adjust_automodify_address (dstbase, mode, addr, i);
31042 emit_move_insn (mem, reg);
31045 /* Handle single byte leftover. */
31046 if (i + 1 == length)
31048 reg = gen_lowpart (QImode, val_reg);
31049 addr = plus_constant (Pmode, dst, i);
31050 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31051 emit_move_insn (mem, reg);
31055 gcc_assert (i == length);
31059 /* Set a block of memory using plain strd/str/strh/strb instructions,
31060 to permit unaligned copies on processors which support unaligned
31061 semantics for those instructions. We fill the first LENGTH bytes
31062 of the memory area starting from DSTBASE with byte constant VALUE.
31063 ALIGN is the alignment requirement of memory. */
31065 arm_block_set_aligned_non_vect (rtx dstbase,
31066 unsigned HOST_WIDE_INT length,
31067 unsigned HOST_WIDE_INT value,
31068 unsigned HOST_WIDE_INT align)
31071 rtx dst, addr, mem;
31072 rtx val_exp, val_reg, reg;
31073 unsigned HOST_WIDE_INT v;
31076 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
31077 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
31079 v = (value | (value << 8) | (value << 16) | (value << 24));
31080 if (length < UNITS_PER_WORD)
31081 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
31084 v |= (v << BITS_PER_WORD);
31086 v = sext_hwi (v, BITS_PER_WORD);
31088 val_exp = GEN_INT (v);
31089 /* Skip if it isn't profitable. */
31090 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31091 align, false, use_strd_p))
31096 /* Try without strd. */
31097 v = (v >> BITS_PER_WORD);
31098 v = sext_hwi (v, BITS_PER_WORD);
31099 val_exp = GEN_INT (v);
31100 use_strd_p = false;
31101 if (!arm_block_set_non_vect_profit_p (val_exp, length,
31102 align, false, use_strd_p))
31107 dst = copy_addr_to_reg (XEXP (dstbase, 0));
31108 /* Handle double words using strd if possible. */
31111 val_reg = force_reg (DImode, val_exp);
31113 for (; (i + 8 <= length); i += 8)
31115 addr = plus_constant (Pmode, dst, i);
31116 mem = adjust_automodify_address (dstbase, DImode, addr, i);
31117 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31118 emit_move_insn (mem, reg);
31120 emit_insn (gen_unaligned_storedi (mem, reg));
31124 val_reg = force_reg (SImode, val_exp);
31126 /* Handle words. */
31127 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
31128 for (; (i + 4 <= length); i += 4)
31130 addr = plus_constant (Pmode, dst, i);
31131 mem = adjust_automodify_address (dstbase, SImode, addr, i);
31132 if ((align & 3) == 0)
31133 emit_move_insn (mem, reg);
31135 emit_insn (gen_unaligned_storesi (mem, reg));
31138 /* Merge last pair of STRH and STRB into a STR if possible. */
31139 if (unaligned_access && i > 0 && (i + 3) == length)
31141 addr = plus_constant (Pmode, dst, i - 1);
31142 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
31143 /* We are shifting one byte back, set the alignment accordingly. */
31144 if ((align & 1) == 0)
31145 set_mem_align (mem, BITS_PER_UNIT);
31147 /* Most likely this is an unaligned access, and we can't tell at
31148 compilation time. */
31149 emit_insn (gen_unaligned_storesi (mem, reg));
31153 /* Handle half word leftover. */
31154 if (i + 2 <= length)
31156 reg = gen_lowpart (HImode, val_reg);
31157 addr = plus_constant (Pmode, dst, i);
31158 mem = adjust_automodify_address (dstbase, HImode, addr, i);
31159 if ((align & 1) == 0)
31160 emit_move_insn (mem, reg);
31162 emit_insn (gen_unaligned_storehi (mem, reg));
31167 /* Handle single byte leftover. */
31168 if (i + 1 == length)
31170 reg = gen_lowpart (QImode, val_reg);
31171 addr = plus_constant (Pmode, dst, i);
31172 mem = adjust_automodify_address (dstbase, QImode, addr, i);
31173 emit_move_insn (mem, reg);
31179 /* Set a block of memory using vectorization instructions for both
31180 aligned and unaligned cases. We fill the first LENGTH bytes of
31181 the memory area starting from DSTBASE with byte constant VALUE.
31182 ALIGN is the alignment requirement of memory. */
31184 arm_block_set_vect (rtx dstbase,
31185 unsigned HOST_WIDE_INT length,
31186 unsigned HOST_WIDE_INT value,
31187 unsigned HOST_WIDE_INT align)
31189 /* Check whether we need to use unaligned store instruction. */
31190 if (((align & 3) != 0 || (length & 3) != 0)
31191 /* Check whether unaligned store instruction is available. */
31192 && (!unaligned_access || BYTES_BIG_ENDIAN))
31195 if ((align & 3) == 0)
31196 return arm_block_set_aligned_vect (dstbase, length, value, align);
31198 return arm_block_set_unaligned_vect (dstbase, length, value, align);
31201 /* Expand string store operation. Firstly we try to do that by using
31202 vectorization instructions, then try with ARM unaligned access and
31203 double-word store if profitable. OPERANDS[0] is the destination,
31204 OPERANDS[1] is the number of bytes, operands[2] is the value to
31205 initialize the memory, OPERANDS[3] is the known alignment of the
31208 arm_gen_setmem (rtx *operands)
31210 rtx dstbase = operands[0];
31211 unsigned HOST_WIDE_INT length;
31212 unsigned HOST_WIDE_INT value;
31213 unsigned HOST_WIDE_INT align;
31215 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
31218 length = UINTVAL (operands[1]);
31222 value = (UINTVAL (operands[2]) & 0xFF);
31223 align = UINTVAL (operands[3]);
31224 if (TARGET_NEON && length >= 8
31225 && current_tune->string_ops_prefer_neon
31226 && arm_block_set_vect (dstbase, length, value, align))
31229 if (!unaligned_access && (align & 3) != 0)
31230 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
31232 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
31237 arm_macro_fusion_p (void)
31239 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
31242 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
31243 for MOVW / MOVT macro fusion. */
31246 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
31248 /* We are trying to fuse
31249 movw imm / movt imm
31250 instructions as a group that gets scheduled together. */
31252 rtx set_dest = SET_DEST (curr_set);
31254 if (GET_MODE (set_dest) != SImode)
31257 /* We are trying to match:
31258 prev (movw) == (set (reg r0) (const_int imm16))
31259 curr (movt) == (set (zero_extract (reg r0)
31262 (const_int imm16_1))
31264 prev (movw) == (set (reg r1)
31265 (high (symbol_ref ("SYM"))))
31266 curr (movt) == (set (reg r0)
31268 (symbol_ref ("SYM")))) */
31270 if (GET_CODE (set_dest) == ZERO_EXTRACT)
31272 if (CONST_INT_P (SET_SRC (curr_set))
31273 && CONST_INT_P (SET_SRC (prev_set))
31274 && REG_P (XEXP (set_dest, 0))
31275 && REG_P (SET_DEST (prev_set))
31276 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
31280 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
31281 && REG_P (SET_DEST (curr_set))
31282 && REG_P (SET_DEST (prev_set))
31283 && GET_CODE (SET_SRC (prev_set)) == HIGH
31284 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
31291 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
31293 rtx prev_set = single_set (prev);
31294 rtx curr_set = single_set (curr);
31300 if (any_condjump_p (curr))
31303 if (!arm_macro_fusion_p ())
31306 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
31307 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
31313 /* Return true iff the instruction fusion described by OP is enabled. */
31315 arm_fusion_enabled_p (tune_params::fuse_ops op)
31317 return current_tune->fusible_ops & op;
31320 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
31321 scheduled for speculative execution. Reject the long-running division
31322 and square-root instructions. */
31325 arm_sched_can_speculate_insn (rtx_insn *insn)
31327 switch (get_attr_type (insn))
31335 case TYPE_NEON_FP_SQRT_S:
31336 case TYPE_NEON_FP_SQRT_D:
31337 case TYPE_NEON_FP_SQRT_S_Q:
31338 case TYPE_NEON_FP_SQRT_D_Q:
31339 case TYPE_NEON_FP_DIV_S:
31340 case TYPE_NEON_FP_DIV_D:
31341 case TYPE_NEON_FP_DIV_S_Q:
31342 case TYPE_NEON_FP_DIV_D_Q:
31349 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31351 static unsigned HOST_WIDE_INT
31352 arm_asan_shadow_offset (void)
31354 return HOST_WIDE_INT_1U << 29;
31358 /* This is a temporary fix for PR60655. Ideally we need
31359 to handle most of these cases in the generic part but
31360 currently we reject minus (..) (sym_ref). We try to
31361 ameliorate the case with minus (sym_ref1) (sym_ref2)
31362 where they are in the same section. */
31365 arm_const_not_ok_for_debug_p (rtx p)
31367 tree decl_op0 = NULL;
31368 tree decl_op1 = NULL;
31370 if (GET_CODE (p) == UNSPEC)
31372 if (GET_CODE (p) == MINUS)
31374 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31376 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31378 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31379 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31381 if ((VAR_P (decl_op1)
31382 || TREE_CODE (decl_op1) == CONST_DECL)
31383 && (VAR_P (decl_op0)
31384 || TREE_CODE (decl_op0) == CONST_DECL))
31385 return (get_variable_section (decl_op1, false)
31386 != get_variable_section (decl_op0, false));
31388 if (TREE_CODE (decl_op1) == LABEL_DECL
31389 && TREE_CODE (decl_op0) == LABEL_DECL)
31390 return (DECL_CONTEXT (decl_op1)
31391 != DECL_CONTEXT (decl_op0));
31401 /* return TRUE if x is a reference to a value in a constant pool */
31403 arm_is_constant_pool_ref (rtx x)
31406 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31407 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31410 /* Remember the last target of arm_set_current_function. */
31411 static GTY(()) tree arm_previous_fndecl;
31413 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
31416 save_restore_target_globals (tree new_tree)
31418 /* If we have a previous state, use it. */
31419 if (TREE_TARGET_GLOBALS (new_tree))
31420 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31421 else if (new_tree == target_option_default_node)
31422 restore_target_globals (&default_target_globals);
31425 /* Call target_reinit and save the state for TARGET_GLOBALS. */
31426 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31429 arm_option_params_internal ();
31432 /* Invalidate arm_previous_fndecl. */
31435 arm_reset_previous_fndecl (void)
31437 arm_previous_fndecl = NULL_TREE;
31440 /* Establish appropriate back-end context for processing the function
31441 FNDECL. The argument might be NULL to indicate processing at top
31442 level, outside of any function scope. */
31445 arm_set_current_function (tree fndecl)
31447 if (!fndecl || fndecl == arm_previous_fndecl)
31450 tree old_tree = (arm_previous_fndecl
31451 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
31454 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31456 /* If current function has no attributes but previous one did,
31457 use the default node. */
31458 if (! new_tree && old_tree)
31459 new_tree = target_option_default_node;
31461 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
31462 the default have been handled by save_restore_target_globals from
31463 arm_pragma_target_parse. */
31464 if (old_tree == new_tree)
31467 arm_previous_fndecl = fndecl;
31469 /* First set the target options. */
31470 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31472 save_restore_target_globals (new_tree);
31475 /* Implement TARGET_OPTION_PRINT. */
31478 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
31480 int flags = ptr->x_target_flags;
31481 const char *fpu_name;
31483 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
31484 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
31486 fprintf (file, "%*sselected isa %s\n", indent, "",
31487 TARGET_THUMB2_P (flags) ? "thumb2" :
31488 TARGET_THUMB_P (flags) ? "thumb1" :
31491 if (ptr->x_arm_arch_string)
31492 fprintf (file, "%*sselected architecture %s\n", indent, "",
31493 ptr->x_arm_arch_string);
31495 if (ptr->x_arm_cpu_string)
31496 fprintf (file, "%*sselected CPU %s\n", indent, "",
31497 ptr->x_arm_cpu_string);
31499 if (ptr->x_arm_tune_string)
31500 fprintf (file, "%*sselected tune %s\n", indent, "",
31501 ptr->x_arm_tune_string);
31503 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
31506 /* Hook to determine if one function can safely inline another. */
31509 arm_can_inline_p (tree caller, tree callee)
31511 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
31512 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
31513 bool can_inline = true;
31515 struct cl_target_option *caller_opts
31516 = TREE_TARGET_OPTION (caller_tree ? caller_tree
31517 : target_option_default_node);
31519 struct cl_target_option *callee_opts
31520 = TREE_TARGET_OPTION (callee_tree ? callee_tree
31521 : target_option_default_node);
31523 if (callee_opts == caller_opts)
31526 /* Callee's ISA features should be a subset of the caller's. */
31527 struct arm_build_target caller_target;
31528 struct arm_build_target callee_target;
31529 caller_target.isa = sbitmap_alloc (isa_num_bits);
31530 callee_target.isa = sbitmap_alloc (isa_num_bits);
31532 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
31534 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
31536 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
31537 can_inline = false;
31539 sbitmap_free (caller_target.isa);
31540 sbitmap_free (callee_target.isa);
31542 /* OK to inline between different modes.
31543 Function with mode specific instructions, e.g using asm,
31544 must be explicitly protected with noinline. */
31548 /* Hook to fix function's alignment affected by target attribute. */
31551 arm_relayout_function (tree fndecl)
31553 if (DECL_USER_ALIGN (fndecl))
31556 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31559 callee_tree = target_option_default_node;
31561 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
31564 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
31567 /* Inner function to process the attribute((target(...))), take an argument and
31568 set the current options from the argument. If we have a list, recursively
31569 go over the list. */
31572 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
31574 if (TREE_CODE (args) == TREE_LIST)
31578 for (; args; args = TREE_CHAIN (args))
31579 if (TREE_VALUE (args)
31580 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
31585 else if (TREE_CODE (args) != STRING_CST)
31587 error ("attribute %<target%> argument not a string");
31591 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
31594 while ((q = strtok (argstr, ",")) != NULL)
31597 if (!strcmp (q, "thumb"))
31599 opts->x_target_flags |= MASK_THUMB;
31600 if (TARGET_FDPIC && !arm_arch_thumb2)
31601 sorry ("FDPIC mode is not supported in Thumb-1 mode");
31604 else if (!strcmp (q, "arm"))
31605 opts->x_target_flags &= ~MASK_THUMB;
31607 else if (!strcmp (q, "general-regs-only"))
31608 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
31610 else if (!strncmp (q, "fpu=", 4))
31613 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
31614 &fpu_index, CL_TARGET))
31616 error ("invalid fpu for target attribute or pragma %qs", q);
31619 if (fpu_index == TARGET_FPU_auto)
31621 /* This doesn't really make sense until we support
31622 general dynamic selection of the architecture and all
31624 sorry ("auto fpu selection not currently permitted here");
31627 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
31629 else if (!strncmp (q, "arch=", 5))
31631 char *arch = q + 5;
31632 const arch_option *arm_selected_arch
31633 = arm_parse_arch_option_name (all_architectures, "arch", arch);
31635 if (!arm_selected_arch)
31637 error ("invalid architecture for target attribute or pragma %qs",
31642 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
31644 else if (q[0] == '+')
31646 opts->x_arm_arch_string
31647 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
31651 error ("unknown target attribute or pragma %qs", q);
31659 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
31662 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
31663 struct gcc_options *opts_set)
31665 struct cl_target_option cl_opts;
31667 if (!arm_valid_target_attribute_rec (args, opts))
31670 cl_target_option_save (&cl_opts, opts);
31671 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
31672 arm_option_check_internal (opts);
31673 /* Do any overrides, such as global options arch=xxx.
31674 We do this since arm_active_target was overridden. */
31675 arm_option_reconfigure_globals ();
31676 arm_options_perform_arch_sanity_checks ();
31677 arm_option_override_internal (opts, opts_set);
31679 return build_target_option_node (opts);
31683 add_attribute (const char * mode, tree *attributes)
31685 size_t len = strlen (mode);
31686 tree value = build_string (len, mode);
31688 TREE_TYPE (value) = build_array_type (char_type_node,
31689 build_index_type (size_int (len)));
31691 *attributes = tree_cons (get_identifier ("target"),
31692 build_tree_list (NULL_TREE, value),
31696 /* For testing. Insert thumb or arm modes alternatively on functions. */
31699 arm_insert_attributes (tree fndecl, tree * attributes)
31703 if (! TARGET_FLIP_THUMB)
31706 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
31707 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
31710 /* Nested definitions must inherit mode. */
31711 if (current_function_decl)
31713 mode = TARGET_THUMB ? "thumb" : "arm";
31714 add_attribute (mode, attributes);
31718 /* If there is already a setting don't change it. */
31719 if (lookup_attribute ("target", *attributes) != NULL)
31722 mode = thumb_flipper ? "thumb" : "arm";
31723 add_attribute (mode, attributes);
31725 thumb_flipper = !thumb_flipper;
31728 /* Hook to validate attribute((target("string"))). */
31731 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
31732 tree args, int ARG_UNUSED (flags))
31735 struct gcc_options func_options;
31736 tree cur_tree, new_optimize;
31737 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31739 /* Get the optimization options of the current function. */
31740 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31742 /* If the function changed the optimization levels as well as setting target
31743 options, start with the optimizations specified. */
31744 if (!func_optimize)
31745 func_optimize = optimization_default_node;
31747 /* Init func_options. */
31748 memset (&func_options, 0, sizeof (func_options));
31749 init_options_struct (&func_options, NULL);
31750 lang_hooks.init_options_struct (&func_options);
31752 /* Initialize func_options to the defaults. */
31753 cl_optimization_restore (&func_options,
31754 TREE_OPTIMIZATION (func_optimize));
31756 cl_target_option_restore (&func_options,
31757 TREE_TARGET_OPTION (target_option_default_node));
31759 /* Set func_options flags with new target mode. */
31760 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31761 &global_options_set);
31763 if (cur_tree == NULL_TREE)
31766 new_optimize = build_optimization_node (&func_options);
31768 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31770 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31775 /* Match an ISA feature bitmap to a named FPU. We always use the
31776 first entry that exactly matches the feature set, so that we
31777 effectively canonicalize the FPU name for the assembler. */
31779 arm_identify_fpu_from_isa (sbitmap isa)
31781 auto_sbitmap fpubits (isa_num_bits);
31782 auto_sbitmap cand_fpubits (isa_num_bits);
31784 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
31786 /* If there are no ISA feature bits relating to the FPU, we must be
31787 doing soft-float. */
31788 if (bitmap_empty_p (fpubits))
31791 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31793 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31794 if (bitmap_equal_p (fpubits, cand_fpubits))
31795 return all_fpus[i].name;
31797 /* We must find an entry, or things have gone wrong. */
31798 gcc_unreachable ();
31801 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31802 by the function fndecl. */
31804 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31806 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31808 struct cl_target_option *targ_options;
31810 targ_options = TREE_TARGET_OPTION (target_parts);
31812 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31813 gcc_assert (targ_options);
31815 /* Only update the assembler .arch string if it is distinct from the last
31816 such string we printed. arch_to_print is set conditionally in case
31817 targ_options->x_arm_arch_string is NULL which can be the case
31818 when cc1 is invoked directly without passing -march option. */
31819 std::string arch_to_print;
31820 if (targ_options->x_arm_arch_string)
31821 arch_to_print = targ_options->x_arm_arch_string;
31823 if (arch_to_print != arm_last_printed_arch_string)
31825 std::string arch_name
31826 = arch_to_print.substr (0, arch_to_print.find ("+"));
31827 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31828 const arch_option *arch
31829 = arm_parse_arch_option_name (all_architectures, "-march",
31830 targ_options->x_arm_arch_string);
31831 auto_sbitmap opt_bits (isa_num_bits);
31834 if (arch->common.extensions)
31836 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31842 arm_initialize_isa (opt_bits, opt->isa_bits);
31843 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31844 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
31845 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31851 arm_last_printed_arch_string = arch_to_print;
31854 fprintf (stream, "\t.syntax unified\n");
31858 if (is_called_in_ARM_mode (decl)
31859 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31860 && cfun->is_thunk))
31861 fprintf (stream, "\t.code 32\n");
31862 else if (TARGET_THUMB1)
31863 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31865 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31868 fprintf (stream, "\t.arm\n");
31870 std::string fpu_to_print
31871 = TARGET_SOFT_FLOAT
31872 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31874 if (fpu_to_print != arm_last_printed_arch_string)
31876 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31877 arm_last_printed_fpu_string = fpu_to_print;
31880 if (TARGET_POKE_FUNCTION_NAME)
31881 arm_poke_function_name (stream, (const char *) name);
31884 /* If MEM is in the form of [base+offset], extract the two parts
31885 of address and set to BASE and OFFSET, otherwise return false
31886 after clearing BASE and OFFSET. */
31889 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31893 gcc_assert (MEM_P (mem));
31895 addr = XEXP (mem, 0);
31897 /* Strip off const from addresses like (const (addr)). */
31898 if (GET_CODE (addr) == CONST)
31899 addr = XEXP (addr, 0);
31901 if (GET_CODE (addr) == REG)
31904 *offset = const0_rtx;
31908 if (GET_CODE (addr) == PLUS
31909 && GET_CODE (XEXP (addr, 0)) == REG
31910 && CONST_INT_P (XEXP (addr, 1)))
31912 *base = XEXP (addr, 0);
31913 *offset = XEXP (addr, 1);
31918 *offset = NULL_RTX;
31923 /* If INSN is a load or store of address in the form of [base+offset],
31924 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31925 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31926 otherwise return FALSE. */
31929 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31933 gcc_assert (INSN_P (insn));
31934 x = PATTERN (insn);
31935 if (GET_CODE (x) != SET)
31939 dest = SET_DEST (x);
31940 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31943 extract_base_offset_in_addr (dest, base, offset);
31945 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31948 extract_base_offset_in_addr (src, base, offset);
31953 return (*base != NULL_RTX && *offset != NULL_RTX);
31956 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31958 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31959 and PRI are only calculated for these instructions. For other instruction,
31960 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31961 instruction fusion can be supported by returning different priorities.
31963 It's important that irrelevant instructions get the largest FUSION_PRI. */
31966 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31967 int *fusion_pri, int *pri)
31973 gcc_assert (INSN_P (insn));
31976 if (!fusion_load_store (insn, &base, &offset, &is_load))
31983 /* Load goes first. */
31985 *fusion_pri = tmp - 1;
31987 *fusion_pri = tmp - 2;
31991 /* INSN with smaller base register goes first. */
31992 tmp -= ((REGNO (base) & 0xff) << 20);
31994 /* INSN with smaller offset goes first. */
31995 off_val = (int)(INTVAL (offset));
31997 tmp -= (off_val & 0xfffff);
31999 tmp += ((- off_val) & 0xfffff);
32006 /* Construct and return a PARALLEL RTX vector with elements numbering the
32007 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
32008 the vector - from the perspective of the architecture. This does not
32009 line up with GCC's perspective on lane numbers, so we end up with
32010 different masks depending on our target endian-ness. The diagram
32011 below may help. We must draw the distinction when building masks
32012 which select one half of the vector. An instruction selecting
32013 architectural low-lanes for a big-endian target, must be described using
32014 a mask selecting GCC high-lanes.
32016 Big-Endian Little-Endian
32018 GCC 0 1 2 3 3 2 1 0
32019 | x | x | x | x | | x | x | x | x |
32020 Architecture 3 2 1 0 3 2 1 0
32022 Low Mask: { 2, 3 } { 0, 1 }
32023 High Mask: { 0, 1 } { 2, 3 }
32027 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
32029 int nunits = GET_MODE_NUNITS (mode);
32030 rtvec v = rtvec_alloc (nunits / 2);
32031 int high_base = nunits / 2;
32037 if (BYTES_BIG_ENDIAN)
32038 base = high ? low_base : high_base;
32040 base = high ? high_base : low_base;
32042 for (i = 0; i < nunits / 2; i++)
32043 RTVEC_ELT (v, i) = GEN_INT (base + i);
32045 t1 = gen_rtx_PARALLEL (mode, v);
32049 /* Check OP for validity as a PARALLEL RTX vector with elements
32050 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
32051 from the perspective of the architecture. See the diagram above
32052 arm_simd_vect_par_cnst_half_p for more details. */
32055 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
32058 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
32059 HOST_WIDE_INT count_op = XVECLEN (op, 0);
32060 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
32063 if (!VECTOR_MODE_P (mode))
32066 if (count_op != count_ideal)
32069 for (i = 0; i < count_ideal; i++)
32071 rtx elt_op = XVECEXP (op, 0, i);
32072 rtx elt_ideal = XVECEXP (ideal, 0, i);
32074 if (!CONST_INT_P (elt_op)
32075 || INTVAL (elt_ideal) != INTVAL (elt_op))
32081 /* Can output mi_thunk for all cases except for non-zero vcall_offset
32084 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
32087 /* For now, we punt and not handle this for TARGET_THUMB1. */
32088 if (vcall_offset && TARGET_THUMB1)
32091 /* Otherwise ok. */
32095 /* Generate RTL for a conditional branch with rtx comparison CODE in
32096 mode CC_MODE. The destination of the unlikely conditional branch
32100 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
32104 x = gen_rtx_fmt_ee (code, VOIDmode,
32105 gen_rtx_REG (cc_mode, CC_REGNUM),
32108 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
32109 gen_rtx_LABEL_REF (VOIDmode, label_ref),
32111 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
32114 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
32116 For pure-code sections there is no letter code for this attribute, so
32117 output all the section flags numerically when this is needed. */
32120 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
32123 if (flags & SECTION_ARM_PURECODE)
32127 if (!(flags & SECTION_DEBUG))
32129 if (flags & SECTION_EXCLUDE)
32130 *num |= 0x80000000;
32131 if (flags & SECTION_WRITE)
32133 if (flags & SECTION_CODE)
32135 if (flags & SECTION_MERGE)
32137 if (flags & SECTION_STRINGS)
32139 if (flags & SECTION_TLS)
32141 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
32150 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
32152 If pure-code is passed as an option, make sure all functions are in
32153 sections that have the SHF_ARM_PURECODE attribute. */
32156 arm_function_section (tree decl, enum node_frequency freq,
32157 bool startup, bool exit)
32159 const char * section_name;
32162 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
32163 return default_function_section (decl, freq, startup, exit);
32165 if (!target_pure_code)
32166 return default_function_section (decl, freq, startup, exit);
32169 section_name = DECL_SECTION_NAME (decl);
32171 /* If a function is not in a named section then it falls under the 'default'
32172 text section, also known as '.text'. We can preserve previous behavior as
32173 the default text section already has the SHF_ARM_PURECODE section
32177 section *default_sec = default_function_section (decl, freq, startup,
32180 /* If default_sec is not null, then it must be a special section like for
32181 example .text.startup. We set the pure-code attribute and return the
32182 same section to preserve existing behavior. */
32184 default_sec->common.flags |= SECTION_ARM_PURECODE;
32185 return default_sec;
32188 /* Otherwise look whether a section has already been created with
32190 sec = get_named_section (decl, section_name, 0);
32192 /* If that is not the case passing NULL as the section's name to
32193 'get_named_section' will create a section with the declaration's
32195 sec = get_named_section (decl, NULL, 0);
32197 /* Set the SHF_ARM_PURECODE attribute. */
32198 sec->common.flags |= SECTION_ARM_PURECODE;
32203 /* Implements the TARGET_SECTION_FLAGS hook.
32205 If DECL is a function declaration and pure-code is passed as an option
32206 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
32207 section's name and RELOC indicates whether the declarations initializer may
32208 contain runtime relocations. */
32210 static unsigned int
32211 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
32213 unsigned int flags = default_section_type_flags (decl, name, reloc);
32215 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
32216 flags |= SECTION_ARM_PURECODE;
32221 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
32224 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
32226 rtx *quot_p, rtx *rem_p)
32228 if (mode == SImode)
32229 gcc_assert (!TARGET_IDIV);
32231 scalar_int_mode libval_mode
32232 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
32234 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
32236 op0, GET_MODE (op0),
32237 op1, GET_MODE (op1));
32239 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
32240 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
32241 GET_MODE_SIZE (mode));
32243 gcc_assert (quotient);
32244 gcc_assert (remainder);
32246 *quot_p = quotient;
32247 *rem_p = remainder;
32250 /* This function checks for the availability of the coprocessor builtin passed
32251 in BUILTIN for the current target. Returns true if it is available and
32252 false otherwise. If a BUILTIN is passed for which this function has not
32253 been implemented it will cause an exception. */
32256 arm_coproc_builtin_available (enum unspecv builtin)
32258 /* None of these builtins are available in Thumb mode if the target only
32259 supports Thumb-1. */
32277 case VUNSPEC_LDC2L:
32279 case VUNSPEC_STC2L:
32282 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
32289 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
32291 if (arm_arch6 || arm_arch5te)
32294 case VUNSPEC_MCRR2:
32295 case VUNSPEC_MRRC2:
32300 gcc_unreachable ();
32305 /* This function returns true if OP is a valid memory operand for the ldc and
32306 stc coprocessor instructions and false otherwise. */
32309 arm_coproc_ldc_stc_legitimate_address (rtx op)
32311 HOST_WIDE_INT range;
32312 /* Has to be a memory operand. */
32318 /* We accept registers. */
32322 switch GET_CODE (op)
32326 /* Or registers with an offset. */
32327 if (!REG_P (XEXP (op, 0)))
32332 /* The offset must be an immediate though. */
32333 if (!CONST_INT_P (op))
32336 range = INTVAL (op);
32338 /* Within the range of [-1020,1020]. */
32339 if (!IN_RANGE (range, -1020, 1020))
32342 /* And a multiple of 4. */
32343 return (range % 4) == 0;
32349 return REG_P (XEXP (op, 0));
32351 gcc_unreachable ();
32356 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
32358 In VFPv1, VFP registers could only be accessed in the mode they were
32359 set, so subregs would be invalid there. However, we don't support
32360 VFPv1 at the moment, and the restriction was lifted in VFPv2.
32362 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
32363 VFP registers in little-endian order. We can't describe that accurately to
32364 GCC, so avoid taking subregs of such values.
32366 The only exception is going from a 128-bit to a 64-bit type. In that
32367 case the data layout happens to be consistent for big-endian, so we
32368 explicitly allow that case. */
32371 arm_can_change_mode_class (machine_mode from, machine_mode to,
32372 reg_class_t rclass)
32375 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
32376 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
32377 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
32378 && reg_classes_intersect_p (VFP_REGS, rclass))
32383 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
32384 strcpy from constants will be faster. */
32386 static HOST_WIDE_INT
32387 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
32389 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
32390 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
32391 return MAX (align, BITS_PER_WORD * factor);
32395 /* Emit a speculation barrier on target architectures that do not have
32396 DSB/ISB directly. Such systems probably don't need a barrier
32397 themselves, but if the code is ever run on a later architecture, it
32398 might become a problem. */
32400 arm_emit_speculation_barrier_function ()
32402 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
32405 /* Have we recorded an explicit access to the Q bit of APSR?. */
32407 arm_q_bit_access (void)
32409 if (cfun && cfun->decl)
32410 return lookup_attribute ("acle qbit",
32411 DECL_ATTRIBUTES (cfun->decl));
32415 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
32417 arm_ge_bits_access (void)
32419 if (cfun && cfun->decl)
32420 return lookup_attribute ("acle gebits",
32421 DECL_ATTRIBUTES (cfun->decl));
32426 namespace selftest {
32428 /* Scan the static data tables generated by parsecpu.awk looking for
32429 potential issues with the data. We primarily check for
32430 inconsistencies in the option extensions at present (extensions
32431 that duplicate others but aren't marked as aliases). Furthermore,
32432 for correct canonicalization later options must never be a subset
32433 of an earlier option. Any extension should also only specify other
32434 feature bits and never an architecture bit. The architecture is inferred
32435 from the declaration of the extension. */
32437 arm_test_cpu_arch_data (void)
32439 const arch_option *arch;
32440 const cpu_option *cpu;
32441 auto_sbitmap target_isa (isa_num_bits);
32442 auto_sbitmap isa1 (isa_num_bits);
32443 auto_sbitmap isa2 (isa_num_bits);
32445 for (arch = all_architectures; arch->common.name != NULL; ++arch)
32447 const cpu_arch_extension *ext1, *ext2;
32449 if (arch->common.extensions == NULL)
32452 arm_initialize_isa (target_isa, arch->common.isa_bits);
32454 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
32459 arm_initialize_isa (isa1, ext1->isa_bits);
32460 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32462 if (ext2->alias || ext1->remove != ext2->remove)
32465 arm_initialize_isa (isa2, ext2->isa_bits);
32466 /* If the option is a subset of the parent option, it doesn't
32467 add anything and so isn't useful. */
32468 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32470 /* If the extension specifies any architectural bits then
32471 disallow it. Extensions should only specify feature bits. */
32472 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32477 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
32479 const cpu_arch_extension *ext1, *ext2;
32481 if (cpu->common.extensions == NULL)
32484 arm_initialize_isa (target_isa, arch->common.isa_bits);
32486 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
32491 arm_initialize_isa (isa1, ext1->isa_bits);
32492 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
32494 if (ext2->alias || ext1->remove != ext2->remove)
32497 arm_initialize_isa (isa2, ext2->isa_bits);
32498 /* If the option is a subset of the parent option, it doesn't
32499 add anything and so isn't useful. */
32500 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
32502 /* If the extension specifies any architectural bits then
32503 disallow it. Extensions should only specify feature bits. */
32504 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
32510 /* Scan the static data tables generated by parsecpu.awk looking for
32511 potential issues with the data. Here we check for consistency between the
32512 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
32513 a feature bit that is not defined by any FPU flag. */
32515 arm_test_fpu_data (void)
32517 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
32518 auto_sbitmap fpubits (isa_num_bits);
32519 auto_sbitmap tmpset (isa_num_bits);
32521 static const enum isa_feature fpu_bitlist_internal[]
32522 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
32523 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
32525 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32527 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
32528 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
32529 bitmap_clear (isa_all_fpubits_internal);
32530 bitmap_copy (isa_all_fpubits_internal, tmpset);
32533 if (!bitmap_empty_p (isa_all_fpubits_internal))
32535 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
32536 " group that are not defined by any FPU.\n"
32537 " Check your arm-cpus.in.\n");
32538 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
32543 arm_run_selftests (void)
32545 arm_test_cpu_arch_data ();
32546 arm_test_fpu_data ();
32548 } /* Namespace selftest. */
32550 #undef TARGET_RUN_TARGET_SELFTESTS
32551 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
32552 #endif /* CHECKING_P */
32554 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
32555 Unlike the arm version, we do NOT implement asm flag outputs. */
32558 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
32559 vec<const char *> &constraints,
32560 vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
32562 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
32563 if (strncmp (constraints[i], "=@cc", 4) == 0)
32565 sorry ("asm flags not supported in thumb1 mode");
32571 struct gcc_target targetm = TARGET_INITIALIZER;
32573 #include "gt-arm.h"