1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
36 #include "insn-attr.h"
42 #include "diagnostic-core.h"
49 #include "target-def.h"
51 #include "langhooks.h"
59 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix;
63 void (*arm_lang_output_object_attributes_hook)(void);
70 /* Forward function declarations. */
71 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
72 static int arm_compute_static_chain_stack_bytes (void);
73 static arm_stack_offsets *arm_get_frame_offsets (void);
74 static void arm_add_gc_roots (void);
75 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
76 HOST_WIDE_INT, rtx, rtx, int, int);
77 static unsigned bit_count (unsigned long);
78 static int arm_address_register_rtx_p (rtx, int);
79 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
80 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
81 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
82 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
83 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
84 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
85 inline static int thumb1_index_register_rtx_p (rtx, int);
86 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
87 static int thumb_far_jump_used_p (void);
88 static bool thumb_force_lr_save (void);
89 static unsigned arm_size_return_regs (void);
90 static bool arm_assemble_integer (rtx, unsigned int, int);
91 static void arm_print_operand (FILE *, rtx, int);
92 static void arm_print_operand_address (FILE *, rtx);
93 static bool arm_print_operand_punct_valid_p (unsigned char code);
94 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
95 static arm_cc get_arm_condition_code (rtx);
96 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
97 static rtx is_jump_table (rtx);
98 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 static const char *shift_op (rtx, HOST_WIDE_INT *);
101 static struct machine_function *arm_init_machine_status (void);
102 static void thumb_exit (FILE *, int);
103 static rtx is_jump_table (rtx);
104 static HOST_WIDE_INT get_jump_table_size (rtx);
105 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_forward_ref (Mfix *);
107 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_backward_ref (Mfix *);
109 static void assign_minipool_offsets (Mfix *);
110 static void arm_print_value (FILE *, rtx);
111 static void dump_minipool (rtx);
112 static int arm_barrier_cost (rtx);
113 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
114 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
115 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 static void arm_reorg (void);
118 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
119 static unsigned long arm_compute_save_reg0_reg12_mask (void);
120 static unsigned long arm_compute_save_reg_mask (void);
121 static unsigned long arm_isr_value (tree);
122 static unsigned long arm_compute_func_type (void);
123 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
126 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
127 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
130 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
131 static int arm_comp_type_attributes (const_tree, const_tree);
132 static void arm_set_default_type_attributes (tree);
133 static int arm_adjust_cost (rtx, rtx, rtx, int);
134 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static rtx emit_multi_reg_push (unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
180 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
182 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
184 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
187 static rtx aapcs_libcall_value (enum machine_mode);
188 static int aapcs_select_return_coproc (const_tree, const_tree);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
192 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_encode_section_info (tree, rtx, int);
198 static void arm_file_end (void);
199 static void arm_file_start (void);
201 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
203 static bool arm_pass_by_reference (cumulative_args_t,
204 enum machine_mode, const_tree, bool);
205 static bool arm_promote_prototypes (const_tree);
206 static bool arm_default_short_enums (void);
207 static bool arm_align_anon_bitfield (void);
208 static bool arm_return_in_msb (const_tree);
209 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
210 static bool arm_return_in_memory (const_tree, const_tree);
212 static void arm_unwind_emit (FILE *, rtx);
213 static bool arm_output_ttype (rtx);
214 static void arm_asm_emit_except_personality (rtx);
215 static void arm_asm_init_sections (void);
217 static rtx arm_dwarf_register_span (rtx);
219 static tree arm_cxx_guard_type (void);
220 static bool arm_cxx_guard_mask_bit (void);
221 static tree arm_get_cookie_size (tree);
222 static bool arm_cookie_has_size (void);
223 static bool arm_cxx_cdtor_returns_this (void);
224 static bool arm_cxx_key_method_may_be_inline (void);
225 static void arm_cxx_determine_class_data_visibility (tree);
226 static bool arm_cxx_class_data_always_comdat (void);
227 static bool arm_cxx_use_aeabi_atexit (void);
228 static void arm_init_libfuncs (void);
229 static tree arm_build_builtin_va_list (void);
230 static void arm_expand_builtin_va_start (tree, rtx);
231 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
232 static void arm_option_override (void);
233 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
234 static bool arm_cannot_copy_insn_p (rtx);
235 static bool arm_tls_symbol_p (rtx x);
236 static int arm_issue_rate (void);
237 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
238 static bool arm_output_addr_const_extra (FILE *, rtx);
239 static bool arm_allocate_stack_slots_for_args (void);
240 static bool arm_warn_func_return (tree);
241 static const char *arm_invalid_parameter_type (const_tree t);
242 static const char *arm_invalid_return_type (const_tree t);
243 static tree arm_promoted_type (const_tree t);
244 static tree arm_convert_to_type (tree type, tree expr);
245 static bool arm_scalar_mode_supported_p (enum machine_mode);
246 static bool arm_frame_pointer_required (void);
247 static bool arm_can_eliminate (const int, const int);
248 static void arm_asm_trampoline_template (FILE *);
249 static void arm_trampoline_init (rtx, tree, rtx);
250 static rtx arm_trampoline_adjust_address (rtx);
251 static rtx arm_pic_static_addr (rtx orig, rtx reg);
252 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool arm_array_mode_supported_p (enum machine_mode,
256 unsigned HOST_WIDE_INT);
257 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
258 static bool arm_class_likely_spilled_p (reg_class_t);
259 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
260 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
261 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 static void arm_conditional_register_usage (void);
266 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
267 static unsigned int arm_autovectorize_vector_sizes (void);
268 static int arm_default_branch_cost (bool, bool);
269 static int arm_cortex_a5_branch_cost (bool, bool);
271 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
272 const unsigned char *sel);
274 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
276 int misalign ATTRIBUTE_UNUSED);
277 static unsigned arm_add_stmt_cost (void *data, int count,
278 enum vect_cost_for_stmt kind,
279 struct _stmt_vec_info *stmt_info,
281 enum vect_cost_model_location where);
283 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
284 bool op0_preserve_value);
285 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
287 /* Table of machine attributes. */
288 static const struct attribute_spec arm_attribute_table[] =
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
291 affects_type_identity } */
292 /* Function calls made to this symbol must be done indirectly, because
293 it may lie outside of the 26 bit addressing range of a normal function
295 { "long_call", 0, 0, false, true, true, NULL, false },
296 /* Whereas these functions are always known to reside within the 26 bit
298 { "short_call", 0, 0, false, true, true, NULL, false },
299 /* Specify the procedure call conventions for a function. */
300 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
302 /* Interrupt Service Routines have special prologue and epilogue requirements. */
303 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
305 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
307 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
310 /* ARM/PE has three new attributes:
312 dllexport - for exporting a function/variable that will live in a dll
313 dllimport - for importing a function/variable from a dll
315 Microsoft allows multiple declspecs in one __declspec, separating
316 them with spaces. We do NOT support this. Instead, use __declspec
319 { "dllimport", 0, 0, true, false, false, NULL, false },
320 { "dllexport", 0, 0, true, false, false, NULL, false },
321 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
323 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
324 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
325 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
326 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
329 { NULL, 0, 0, false, false, false, NULL, false }
332 /* Initialize the GCC target structure. */
333 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
334 #undef TARGET_MERGE_DECL_ATTRIBUTES
335 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
338 #undef TARGET_LEGITIMIZE_ADDRESS
339 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
341 #undef TARGET_ATTRIBUTE_TABLE
342 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
344 #undef TARGET_ASM_FILE_START
345 #define TARGET_ASM_FILE_START arm_file_start
346 #undef TARGET_ASM_FILE_END
347 #define TARGET_ASM_FILE_END arm_file_end
349 #undef TARGET_ASM_ALIGNED_SI_OP
350 #define TARGET_ASM_ALIGNED_SI_OP NULL
351 #undef TARGET_ASM_INTEGER
352 #define TARGET_ASM_INTEGER arm_assemble_integer
354 #undef TARGET_PRINT_OPERAND
355 #define TARGET_PRINT_OPERAND arm_print_operand
356 #undef TARGET_PRINT_OPERAND_ADDRESS
357 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
358 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
359 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
361 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
362 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
364 #undef TARGET_ASM_FUNCTION_PROLOGUE
365 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
367 #undef TARGET_ASM_FUNCTION_EPILOGUE
368 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
370 #undef TARGET_OPTION_OVERRIDE
371 #define TARGET_OPTION_OVERRIDE arm_option_override
373 #undef TARGET_COMP_TYPE_ATTRIBUTES
374 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
376 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
377 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
379 #undef TARGET_SCHED_ADJUST_COST
380 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
382 #undef TARGET_SCHED_REORDER
383 #define TARGET_SCHED_REORDER arm_sched_reorder
385 #undef TARGET_REGISTER_MOVE_COST
386 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
388 #undef TARGET_MEMORY_MOVE_COST
389 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
391 #undef TARGET_ENCODE_SECTION_INFO
393 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
395 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
398 #undef TARGET_STRIP_NAME_ENCODING
399 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
401 #undef TARGET_ASM_INTERNAL_LABEL
402 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
404 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
405 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
407 #undef TARGET_FUNCTION_VALUE
408 #define TARGET_FUNCTION_VALUE arm_function_value
410 #undef TARGET_LIBCALL_VALUE
411 #define TARGET_LIBCALL_VALUE arm_libcall_value
413 #undef TARGET_FUNCTION_VALUE_REGNO_P
414 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
416 #undef TARGET_ASM_OUTPUT_MI_THUNK
417 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
418 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
419 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
421 #undef TARGET_RTX_COSTS
422 #define TARGET_RTX_COSTS arm_rtx_costs
423 #undef TARGET_ADDRESS_COST
424 #define TARGET_ADDRESS_COST arm_address_cost
426 #undef TARGET_SHIFT_TRUNCATION_MASK
427 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
428 #undef TARGET_VECTOR_MODE_SUPPORTED_P
429 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
430 #undef TARGET_ARRAY_MODE_SUPPORTED_P
431 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
432 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
433 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
434 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
435 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
436 arm_autovectorize_vector_sizes
438 #undef TARGET_MACHINE_DEPENDENT_REORG
439 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
441 #undef TARGET_INIT_BUILTINS
442 #define TARGET_INIT_BUILTINS arm_init_builtins
443 #undef TARGET_EXPAND_BUILTIN
444 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
445 #undef TARGET_BUILTIN_DECL
446 #define TARGET_BUILTIN_DECL arm_builtin_decl
448 #undef TARGET_INIT_LIBFUNCS
449 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
451 #undef TARGET_PROMOTE_FUNCTION_MODE
452 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
453 #undef TARGET_PROMOTE_PROTOTYPES
454 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
455 #undef TARGET_PASS_BY_REFERENCE
456 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
457 #undef TARGET_ARG_PARTIAL_BYTES
458 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
459 #undef TARGET_FUNCTION_ARG
460 #define TARGET_FUNCTION_ARG arm_function_arg
461 #undef TARGET_FUNCTION_ARG_ADVANCE
462 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
463 #undef TARGET_FUNCTION_ARG_BOUNDARY
464 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
466 #undef TARGET_SETUP_INCOMING_VARARGS
467 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
469 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
470 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
472 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
473 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
474 #undef TARGET_TRAMPOLINE_INIT
475 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
476 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
477 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
479 #undef TARGET_WARN_FUNC_RETURN
480 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
482 #undef TARGET_DEFAULT_SHORT_ENUMS
483 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
485 #undef TARGET_ALIGN_ANON_BITFIELD
486 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
488 #undef TARGET_NARROW_VOLATILE_BITFIELD
489 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
491 #undef TARGET_CXX_GUARD_TYPE
492 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
494 #undef TARGET_CXX_GUARD_MASK_BIT
495 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
497 #undef TARGET_CXX_GET_COOKIE_SIZE
498 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
500 #undef TARGET_CXX_COOKIE_HAS_SIZE
501 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
503 #undef TARGET_CXX_CDTOR_RETURNS_THIS
504 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
506 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
507 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
509 #undef TARGET_CXX_USE_AEABI_ATEXIT
510 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
512 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
513 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
514 arm_cxx_determine_class_data_visibility
516 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
517 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
519 #undef TARGET_RETURN_IN_MSB
520 #define TARGET_RETURN_IN_MSB arm_return_in_msb
522 #undef TARGET_RETURN_IN_MEMORY
523 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
525 #undef TARGET_MUST_PASS_IN_STACK
526 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
529 #undef TARGET_ASM_UNWIND_EMIT
530 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
532 /* EABI unwinding tables use a different format for the typeinfo tables. */
533 #undef TARGET_ASM_TTYPE
534 #define TARGET_ASM_TTYPE arm_output_ttype
536 #undef TARGET_ARM_EABI_UNWINDER
537 #define TARGET_ARM_EABI_UNWINDER true
539 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
540 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
542 #undef TARGET_ASM_INIT_SECTIONS
543 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
544 #endif /* ARM_UNWIND_INFO */
546 #undef TARGET_DWARF_REGISTER_SPAN
547 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
549 #undef TARGET_CANNOT_COPY_INSN_P
550 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
553 #undef TARGET_HAVE_TLS
554 #define TARGET_HAVE_TLS true
557 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
558 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
560 #undef TARGET_LEGITIMATE_CONSTANT_P
561 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
563 #undef TARGET_CANNOT_FORCE_CONST_MEM
564 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
566 #undef TARGET_MAX_ANCHOR_OFFSET
567 #define TARGET_MAX_ANCHOR_OFFSET 4095
569 /* The minimum is set such that the total size of the block
570 for a particular anchor is -4088 + 1 + 4095 bytes, which is
571 divisible by eight, ensuring natural spacing of anchors. */
572 #undef TARGET_MIN_ANCHOR_OFFSET
573 #define TARGET_MIN_ANCHOR_OFFSET -4088
575 #undef TARGET_SCHED_ISSUE_RATE
576 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
578 #undef TARGET_MANGLE_TYPE
579 #define TARGET_MANGLE_TYPE arm_mangle_type
581 #undef TARGET_BUILD_BUILTIN_VA_LIST
582 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
583 #undef TARGET_EXPAND_BUILTIN_VA_START
584 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
585 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
586 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
589 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
590 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
593 #undef TARGET_LEGITIMATE_ADDRESS_P
594 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
596 #undef TARGET_PREFERRED_RELOAD_CLASS
597 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
599 #undef TARGET_INVALID_PARAMETER_TYPE
600 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
602 #undef TARGET_INVALID_RETURN_TYPE
603 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
605 #undef TARGET_PROMOTED_TYPE
606 #define TARGET_PROMOTED_TYPE arm_promoted_type
608 #undef TARGET_CONVERT_TO_TYPE
609 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
611 #undef TARGET_SCALAR_MODE_SUPPORTED_P
612 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
614 #undef TARGET_FRAME_POINTER_REQUIRED
615 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
617 #undef TARGET_CAN_ELIMINATE
618 #define TARGET_CAN_ELIMINATE arm_can_eliminate
620 #undef TARGET_CONDITIONAL_REGISTER_USAGE
621 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
623 #undef TARGET_CLASS_LIKELY_SPILLED_P
624 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
626 #undef TARGET_VECTORIZE_BUILTINS
627 #define TARGET_VECTORIZE_BUILTINS
629 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
630 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
631 arm_builtin_vectorized_function
633 #undef TARGET_VECTOR_ALIGNMENT
634 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
636 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
637 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
638 arm_vector_alignment_reachable
640 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
641 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
642 arm_builtin_support_vector_misalignment
644 #undef TARGET_PREFERRED_RENAME_CLASS
645 #define TARGET_PREFERRED_RENAME_CLASS \
646 arm_preferred_rename_class
648 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
649 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
650 arm_vectorize_vec_perm_const_ok
652 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
653 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
654 arm_builtin_vectorization_cost
655 #undef TARGET_VECTORIZE_ADD_STMT_COST
656 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
658 #undef TARGET_CANONICALIZE_COMPARISON
659 #define TARGET_CANONICALIZE_COMPARISON \
660 arm_canonicalize_comparison
662 #undef TARGET_ASAN_SHADOW_OFFSET
663 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
665 struct gcc_target targetm = TARGET_INITIALIZER;
667 /* Obstack for minipool constant handling. */
668 static struct obstack minipool_obstack;
669 static char * minipool_startobj;
671 /* The maximum number of insns skipped which
672 will be conditionalised if possible. */
673 static int max_insns_skipped = 5;
675 extern FILE * asm_out_file;
677 /* True if we are currently building a constant table. */
678 int making_const_table;
680 /* The processor for which instructions should be scheduled. */
681 enum processor_type arm_tune = arm_none;
683 /* The current tuning set. */
684 const struct tune_params *current_tune;
686 /* Which floating point hardware to schedule for. */
689 /* Which floating popint hardware to use. */
690 const struct arm_fpu_desc *arm_fpu_desc;
692 /* Used for Thumb call_via trampolines. */
693 rtx thumb_call_via_label[14];
694 static int thumb_call_reg_needed;
696 /* Bit values used to identify processor capabilities. */
697 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
698 #define FL_ARCH3M (1 << 1) /* Extended multiply */
699 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
700 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
701 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
702 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
703 #define FL_THUMB (1 << 6) /* Thumb aware */
704 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
705 #define FL_STRONG (1 << 8) /* StrongARM */
706 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
707 #define FL_XSCALE (1 << 10) /* XScale */
708 /* spare (1 << 11) */
709 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
710 media instructions. */
711 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
712 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
713 Note: ARM6 & 7 derivatives only. */
714 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
715 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
716 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
718 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
719 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
720 #define FL_NEON (1 << 20) /* Neon instructions. */
721 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
723 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
724 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
725 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
727 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
728 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
730 /* Flags that only effect tuning, not available instructions. */
731 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
734 #define FL_FOR_ARCH2 FL_NOTM
735 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
736 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
737 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
738 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
739 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
740 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
741 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
742 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
743 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
744 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
745 #define FL_FOR_ARCH6J FL_FOR_ARCH6
746 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
747 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
748 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
749 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
750 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
751 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
752 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
753 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
754 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
755 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
756 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
757 | FL_ARM_DIV | FL_NOTM)
759 /* The bits in this mask specify which
760 instructions we are allowed to generate. */
761 static unsigned long insn_flags = 0;
763 /* The bits in this mask specify which instruction scheduling options should
765 static unsigned long tune_flags = 0;
767 /* The highest ARM architecture version supported by the
769 enum base_architecture arm_base_arch = BASE_ARCH_0;
771 /* The following are used in the arm.md file as equivalents to bits
772 in the above two flag variables. */
774 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
777 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
780 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
783 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
786 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
789 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
792 /* Nonzero if this chip supports the ARM 6K extensions. */
795 /* Nonzero if instructions present in ARMv6-M can be used. */
798 /* Nonzero if this chip supports the ARM 7 extensions. */
801 /* Nonzero if instructions not present in the 'M' profile can be used. */
802 int arm_arch_notm = 0;
804 /* Nonzero if instructions present in ARMv7E-M can be used. */
807 /* Nonzero if instructions present in ARMv8 can be used. */
810 /* Nonzero if this chip can benefit from load scheduling. */
811 int arm_ld_sched = 0;
813 /* Nonzero if this chip is a StrongARM. */
814 int arm_tune_strongarm = 0;
816 /* Nonzero if this chip supports Intel Wireless MMX technology. */
817 int arm_arch_iwmmxt = 0;
819 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
820 int arm_arch_iwmmxt2 = 0;
822 /* Nonzero if this chip is an XScale. */
823 int arm_arch_xscale = 0;
825 /* Nonzero if tuning for XScale */
826 int arm_tune_xscale = 0;
828 /* Nonzero if we want to tune for stores that access the write-buffer.
829 This typically means an ARM6 or ARM7 with MMU or MPU. */
830 int arm_tune_wbuf = 0;
832 /* Nonzero if tuning for Cortex-A9. */
833 int arm_tune_cortex_a9 = 0;
835 /* Nonzero if generating Thumb instructions. */
838 /* Nonzero if generating Thumb-1 instructions. */
841 /* Nonzero if we should define __THUMB_INTERWORK__ in the
843 XXX This is a bit of a hack, it's intended to help work around
844 problems in GLD which doesn't understand that armv5t code is
845 interworking clean. */
846 int arm_cpp_interwork = 0;
848 /* Nonzero if chip supports Thumb 2. */
851 /* Nonzero if chip supports integer division instruction. */
852 int arm_arch_arm_hwdiv;
853 int arm_arch_thumb_hwdiv;
855 /* Nonzero if we should use Neon to handle 64-bits operations rather
856 than core registers. */
857 int prefer_neon_for_64bits = 0;
859 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
860 we must report the mode of the memory reference from
861 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
862 enum machine_mode output_memory_reference_mode;
864 /* The register number to be used for the PIC offset register. */
865 unsigned arm_pic_register = INVALID_REGNUM;
867 /* Set to 1 after arm_reorg has started. Reset to start at the start of
868 the next function. */
869 static int after_arm_reorg = 0;
871 enum arm_pcs arm_pcs_default;
873 /* For an explanation of these variables, see final_prescan_insn below. */
875 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
876 enum arm_cond_code arm_current_cc;
879 int arm_target_label;
880 /* The number of conditionally executed insns, including the current insn. */
881 int arm_condexec_count = 0;
882 /* A bitmask specifying the patterns for the IT block.
883 Zero means do not output an IT block before this insn. */
884 int arm_condexec_mask = 0;
885 /* The number of bits used in arm_condexec_mask. */
886 int arm_condexec_masklen = 0;
888 /* The condition codes of the ARM, and the inverse function. */
889 static const char * const arm_condition_codes[] =
891 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
892 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
895 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
896 int arm_regs_in_sequence[] =
898 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
901 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
902 #define streq(string1, string2) (strcmp (string1, string2) == 0)
904 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
905 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
906 | (1 << PIC_OFFSET_TABLE_REGNUM)))
908 /* Initialization code. */
912 const char *const name;
913 enum processor_type core;
915 enum base_architecture base_arch;
916 const unsigned long flags;
917 const struct tune_params *const tune;
921 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
922 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
927 /* arm generic vectorizer costs. */
929 struct cpu_vec_costs arm_default_vec_cost = {
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 1, /* vec_unalign_load_cost. */
938 1, /* vec_unalign_store_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
944 const struct tune_params arm_slowmul_tune =
946 arm_slowmul_rtx_costs,
948 3, /* Constant limit. */
949 5, /* Max cond insns. */
950 ARM_PREFETCH_NOT_BENEFICIAL,
951 true, /* Prefer constant pool. */
952 arm_default_branch_cost,
953 false, /* Prefer LDRD/STRD. */
954 {true, true}, /* Prefer non short circuit. */
955 &arm_default_vec_cost, /* Vectorizer costs. */
956 false /* Prefer Neon for 64-bits bitops. */
959 const struct tune_params arm_fastmul_tune =
961 arm_fastmul_rtx_costs,
963 1, /* Constant limit. */
964 5, /* Max cond insns. */
965 ARM_PREFETCH_NOT_BENEFICIAL,
966 true, /* Prefer constant pool. */
967 arm_default_branch_cost,
968 false, /* Prefer LDRD/STRD. */
969 {true, true}, /* Prefer non short circuit. */
970 &arm_default_vec_cost, /* Vectorizer costs. */
971 false /* Prefer Neon for 64-bits bitops. */
974 /* StrongARM has early execution of branches, so a sequence that is worth
975 skipping is shorter. Set max_insns_skipped to a lower value. */
977 const struct tune_params arm_strongarm_tune =
979 arm_fastmul_rtx_costs,
981 1, /* Constant limit. */
982 3, /* Max cond insns. */
983 ARM_PREFETCH_NOT_BENEFICIAL,
984 true, /* Prefer constant pool. */
985 arm_default_branch_cost,
986 false, /* Prefer LDRD/STRD. */
987 {true, true}, /* Prefer non short circuit. */
988 &arm_default_vec_cost, /* Vectorizer costs. */
989 false /* Prefer Neon for 64-bits bitops. */
992 const struct tune_params arm_xscale_tune =
994 arm_xscale_rtx_costs,
995 xscale_sched_adjust_cost,
996 2, /* Constant limit. */
997 3, /* Max cond insns. */
998 ARM_PREFETCH_NOT_BENEFICIAL,
999 true, /* Prefer constant pool. */
1000 arm_default_branch_cost,
1001 false, /* Prefer LDRD/STRD. */
1002 {true, true}, /* Prefer non short circuit. */
1003 &arm_default_vec_cost, /* Vectorizer costs. */
1004 false /* Prefer Neon for 64-bits bitops. */
1007 const struct tune_params arm_9e_tune =
1011 1, /* Constant limit. */
1012 5, /* Max cond insns. */
1013 ARM_PREFETCH_NOT_BENEFICIAL,
1014 true, /* Prefer constant pool. */
1015 arm_default_branch_cost,
1016 false, /* Prefer LDRD/STRD. */
1017 {true, true}, /* Prefer non short circuit. */
1018 &arm_default_vec_cost, /* Vectorizer costs. */
1019 false /* Prefer Neon for 64-bits bitops. */
1022 const struct tune_params arm_v6t2_tune =
1026 1, /* Constant limit. */
1027 5, /* Max cond insns. */
1028 ARM_PREFETCH_NOT_BENEFICIAL,
1029 false, /* Prefer constant pool. */
1030 arm_default_branch_cost,
1031 false, /* Prefer LDRD/STRD. */
1032 {true, true}, /* Prefer non short circuit. */
1033 &arm_default_vec_cost, /* Vectorizer costs. */
1034 false /* Prefer Neon for 64-bits bitops. */
1037 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1038 const struct tune_params arm_cortex_tune =
1042 1, /* Constant limit. */
1043 5, /* Max cond insns. */
1044 ARM_PREFETCH_NOT_BENEFICIAL,
1045 false, /* Prefer constant pool. */
1046 arm_default_branch_cost,
1047 false, /* Prefer LDRD/STRD. */
1048 {true, true}, /* Prefer non short circuit. */
1049 &arm_default_vec_cost, /* Vectorizer costs. */
1050 false /* Prefer Neon for 64-bits bitops. */
1053 const struct tune_params arm_cortex_a15_tune =
1057 1, /* Constant limit. */
1058 5, /* Max cond insns. */
1059 ARM_PREFETCH_NOT_BENEFICIAL,
1060 false, /* Prefer constant pool. */
1061 arm_default_branch_cost,
1062 true, /* Prefer LDRD/STRD. */
1063 {true, true}, /* Prefer non short circuit. */
1064 &arm_default_vec_cost, /* Vectorizer costs. */
1065 false /* Prefer Neon for 64-bits bitops. */
1068 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1069 less appealing. Set max_insns_skipped to a low value. */
1071 const struct tune_params arm_cortex_a5_tune =
1075 1, /* Constant limit. */
1076 1, /* Max cond insns. */
1077 ARM_PREFETCH_NOT_BENEFICIAL,
1078 false, /* Prefer constant pool. */
1079 arm_cortex_a5_branch_cost,
1080 false, /* Prefer LDRD/STRD. */
1081 {false, false}, /* Prefer non short circuit. */
1082 &arm_default_vec_cost, /* Vectorizer costs. */
1083 false /* Prefer Neon for 64-bits bitops. */
1086 const struct tune_params arm_cortex_a9_tune =
1089 cortex_a9_sched_adjust_cost,
1090 1, /* Constant limit. */
1091 5, /* Max cond insns. */
1092 ARM_PREFETCH_BENEFICIAL(4,32,32),
1093 false, /* Prefer constant pool. */
1094 arm_default_branch_cost,
1095 false, /* Prefer LDRD/STRD. */
1096 {true, true}, /* Prefer non short circuit. */
1097 &arm_default_vec_cost, /* Vectorizer costs. */
1098 false /* Prefer Neon for 64-bits bitops. */
1101 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1102 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1103 const struct tune_params arm_v6m_tune =
1107 1, /* Constant limit. */
1108 5, /* Max cond insns. */
1109 ARM_PREFETCH_NOT_BENEFICIAL,
1110 false, /* Prefer constant pool. */
1111 arm_default_branch_cost,
1112 false, /* Prefer LDRD/STRD. */
1113 {false, false}, /* Prefer non short circuit. */
1114 &arm_default_vec_cost, /* Vectorizer costs. */
1115 false /* Prefer Neon for 64-bits bitops. */
1118 const struct tune_params arm_fa726te_tune =
1121 fa726te_sched_adjust_cost,
1122 1, /* Constant limit. */
1123 5, /* Max cond insns. */
1124 ARM_PREFETCH_NOT_BENEFICIAL,
1125 true, /* Prefer constant pool. */
1126 arm_default_branch_cost,
1127 false, /* Prefer LDRD/STRD. */
1128 {true, true}, /* Prefer non short circuit. */
1129 &arm_default_vec_cost, /* Vectorizer costs. */
1130 false /* Prefer Neon for 64-bits bitops. */
1134 /* Not all of these give usefully different compilation alternatives,
1135 but there is no simple way of generalizing them. */
1136 static const struct processors all_cores[] =
1139 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1140 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1141 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1142 #include "arm-cores.def"
1144 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1147 static const struct processors all_architectures[] =
1149 /* ARM Architectures */
1150 /* We don't specify tuning costs here as it will be figured out
1153 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1154 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1155 #include "arm-arches.def"
1157 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1161 /* These are populated as commandline arguments are processed, or NULL
1162 if not specified. */
1163 static const struct processors *arm_selected_arch;
1164 static const struct processors *arm_selected_cpu;
1165 static const struct processors *arm_selected_tune;
1167 /* The name of the preprocessor macro to define for this architecture. */
1169 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1171 /* Available values for -mfpu=. */
1173 static const struct arm_fpu_desc all_fpus[] =
1175 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1176 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1177 #include "arm-fpus.def"
1182 /* Supported TLS relocations. */
1190 TLS_DESCSEQ /* GNU scheme */
1193 /* The maximum number of insns to be used when loading a constant. */
1195 arm_constant_limit (bool size_p)
1197 return size_p ? 1 : current_tune->constant_limit;
1200 /* Emit an insn that's a simple single-set. Both the operands must be known
1203 emit_set_insn (rtx x, rtx y)
1205 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1208 /* Return the number of bits set in VALUE. */
1210 bit_count (unsigned long value)
1212 unsigned long count = 0;
1217 value &= value - 1; /* Clear the least-significant set bit. */
1225 enum machine_mode mode;
1227 } arm_fixed_mode_set;
1229 /* A small helper for setting fixed-point library libfuncs. */
1232 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1233 const char *funcname, const char *modename,
1238 if (num_suffix == 0)
1239 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1241 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1243 set_optab_libfunc (optable, mode, buffer);
1247 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1248 enum machine_mode from, const char *funcname,
1249 const char *toname, const char *fromname)
1252 const char *maybe_suffix_2 = "";
1254 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1255 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1256 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1257 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1258 maybe_suffix_2 = "2";
1260 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1263 set_conv_libfunc (optable, to, from, buffer);
1266 /* Set up library functions unique to ARM. */
1269 arm_init_libfuncs (void)
1271 /* For Linux, we have access to kernel support for atomic operations. */
1272 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1273 init_sync_libfuncs (2 * UNITS_PER_WORD);
1275 /* There are no special library functions unless we are using the
1280 /* The functions below are described in Section 4 of the "Run-Time
1281 ABI for the ARM architecture", Version 1.0. */
1283 /* Double-precision floating-point arithmetic. Table 2. */
1284 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1285 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1286 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1287 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1288 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1290 /* Double-precision comparisons. Table 3. */
1291 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1292 set_optab_libfunc (ne_optab, DFmode, NULL);
1293 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1294 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1295 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1296 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1297 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1299 /* Single-precision floating-point arithmetic. Table 4. */
1300 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1301 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1302 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1303 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1304 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1306 /* Single-precision comparisons. Table 5. */
1307 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1308 set_optab_libfunc (ne_optab, SFmode, NULL);
1309 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1310 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1311 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1312 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1313 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1315 /* Floating-point to integer conversions. Table 6. */
1316 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1317 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1318 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1319 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1320 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1321 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1322 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1323 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1325 /* Conversions between floating types. Table 7. */
1326 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1327 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1329 /* Integer to floating-point conversions. Table 8. */
1330 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1331 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1332 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1333 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1334 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1335 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1336 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1337 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1339 /* Long long. Table 9. */
1340 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1341 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1342 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1343 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1344 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1345 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1346 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1347 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1349 /* Integer (32/32->32) division. \S 4.3.1. */
1350 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1351 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1353 /* The divmod functions are designed so that they can be used for
1354 plain division, even though they return both the quotient and the
1355 remainder. The quotient is returned in the usual location (i.e.,
1356 r0 for SImode, {r0, r1} for DImode), just as would be expected
1357 for an ordinary division routine. Because the AAPCS calling
1358 conventions specify that all of { r0, r1, r2, r3 } are
1359 callee-saved registers, there is no need to tell the compiler
1360 explicitly that those registers are clobbered by these
1362 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1363 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1365 /* For SImode division the ABI provides div-without-mod routines,
1366 which are faster. */
1367 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1368 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1370 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1371 divmod libcalls instead. */
1372 set_optab_libfunc (smod_optab, DImode, NULL);
1373 set_optab_libfunc (umod_optab, DImode, NULL);
1374 set_optab_libfunc (smod_optab, SImode, NULL);
1375 set_optab_libfunc (umod_optab, SImode, NULL);
1377 /* Half-precision float operations. The compiler handles all operations
1378 with NULL libfuncs by converting the SFmode. */
1379 switch (arm_fp16_format)
1381 case ARM_FP16_FORMAT_IEEE:
1382 case ARM_FP16_FORMAT_ALTERNATIVE:
1385 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1386 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1388 : "__gnu_f2h_alternative"));
1389 set_conv_libfunc (sext_optab, SFmode, HFmode,
1390 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1392 : "__gnu_h2f_alternative"));
1395 set_optab_libfunc (add_optab, HFmode, NULL);
1396 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1397 set_optab_libfunc (smul_optab, HFmode, NULL);
1398 set_optab_libfunc (neg_optab, HFmode, NULL);
1399 set_optab_libfunc (sub_optab, HFmode, NULL);
1402 set_optab_libfunc (eq_optab, HFmode, NULL);
1403 set_optab_libfunc (ne_optab, HFmode, NULL);
1404 set_optab_libfunc (lt_optab, HFmode, NULL);
1405 set_optab_libfunc (le_optab, HFmode, NULL);
1406 set_optab_libfunc (ge_optab, HFmode, NULL);
1407 set_optab_libfunc (gt_optab, HFmode, NULL);
1408 set_optab_libfunc (unord_optab, HFmode, NULL);
1415 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1417 const arm_fixed_mode_set fixed_arith_modes[] =
1438 const arm_fixed_mode_set fixed_conv_modes[] =
1468 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1470 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1471 "add", fixed_arith_modes[i].name, 3);
1472 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1473 "ssadd", fixed_arith_modes[i].name, 3);
1474 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1475 "usadd", fixed_arith_modes[i].name, 3);
1476 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1477 "sub", fixed_arith_modes[i].name, 3);
1478 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1479 "sssub", fixed_arith_modes[i].name, 3);
1480 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1481 "ussub", fixed_arith_modes[i].name, 3);
1482 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1483 "mul", fixed_arith_modes[i].name, 3);
1484 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1485 "ssmul", fixed_arith_modes[i].name, 3);
1486 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1487 "usmul", fixed_arith_modes[i].name, 3);
1488 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1489 "div", fixed_arith_modes[i].name, 3);
1490 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1491 "udiv", fixed_arith_modes[i].name, 3);
1492 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1493 "ssdiv", fixed_arith_modes[i].name, 3);
1494 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1495 "usdiv", fixed_arith_modes[i].name, 3);
1496 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1497 "neg", fixed_arith_modes[i].name, 2);
1498 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1499 "ssneg", fixed_arith_modes[i].name, 2);
1500 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1501 "usneg", fixed_arith_modes[i].name, 2);
1502 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1503 "ashl", fixed_arith_modes[i].name, 3);
1504 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1505 "ashr", fixed_arith_modes[i].name, 3);
1506 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1507 "lshr", fixed_arith_modes[i].name, 3);
1508 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1509 "ssashl", fixed_arith_modes[i].name, 3);
1510 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1511 "usashl", fixed_arith_modes[i].name, 3);
1512 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1513 "cmp", fixed_arith_modes[i].name, 2);
1516 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1517 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1520 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1521 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1524 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1525 fixed_conv_modes[j].mode, "fract",
1526 fixed_conv_modes[i].name,
1527 fixed_conv_modes[j].name);
1528 arm_set_fixed_conv_libfunc (satfract_optab,
1529 fixed_conv_modes[i].mode,
1530 fixed_conv_modes[j].mode, "satfract",
1531 fixed_conv_modes[i].name,
1532 fixed_conv_modes[j].name);
1533 arm_set_fixed_conv_libfunc (fractuns_optab,
1534 fixed_conv_modes[i].mode,
1535 fixed_conv_modes[j].mode, "fractuns",
1536 fixed_conv_modes[i].name,
1537 fixed_conv_modes[j].name);
1538 arm_set_fixed_conv_libfunc (satfractuns_optab,
1539 fixed_conv_modes[i].mode,
1540 fixed_conv_modes[j].mode, "satfractuns",
1541 fixed_conv_modes[i].name,
1542 fixed_conv_modes[j].name);
1546 if (TARGET_AAPCS_BASED)
1547 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1550 /* On AAPCS systems, this is the "struct __va_list". */
1551 static GTY(()) tree va_list_type;
1553 /* Return the type to use as __builtin_va_list. */
1555 arm_build_builtin_va_list (void)
1560 if (!TARGET_AAPCS_BASED)
1561 return std_build_builtin_va_list ();
1563 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1571 The C Library ABI further reinforces this definition in \S
1574 We must follow this definition exactly. The structure tag
1575 name is visible in C++ mangled names, and thus forms a part
1576 of the ABI. The field name may be used by people who
1577 #include <stdarg.h>. */
1578 /* Create the type. */
1579 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1580 /* Give it the required name. */
1581 va_list_name = build_decl (BUILTINS_LOCATION,
1583 get_identifier ("__va_list"),
1585 DECL_ARTIFICIAL (va_list_name) = 1;
1586 TYPE_NAME (va_list_type) = va_list_name;
1587 TYPE_STUB_DECL (va_list_type) = va_list_name;
1588 /* Create the __ap field. */
1589 ap_field = build_decl (BUILTINS_LOCATION,
1591 get_identifier ("__ap"),
1593 DECL_ARTIFICIAL (ap_field) = 1;
1594 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1595 TYPE_FIELDS (va_list_type) = ap_field;
1596 /* Compute its layout. */
1597 layout_type (va_list_type);
1599 return va_list_type;
1602 /* Return an expression of type "void *" pointing to the next
1603 available argument in a variable-argument list. VALIST is the
1604 user-level va_list object, of type __builtin_va_list. */
1606 arm_extract_valist_ptr (tree valist)
1608 if (TREE_TYPE (valist) == error_mark_node)
1609 return error_mark_node;
1611 /* On an AAPCS target, the pointer is stored within "struct
1613 if (TARGET_AAPCS_BASED)
1615 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1616 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1617 valist, ap_field, NULL_TREE);
1623 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1625 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1627 valist = arm_extract_valist_ptr (valist);
1628 std_expand_builtin_va_start (valist, nextarg);
1631 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1633 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1636 valist = arm_extract_valist_ptr (valist);
1637 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1640 /* Fix up any incompatible options that the user has specified. */
1642 arm_option_override (void)
1644 if (global_options_set.x_arm_arch_option)
1645 arm_selected_arch = &all_architectures[arm_arch_option];
1647 if (global_options_set.x_arm_cpu_option)
1648 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1650 if (global_options_set.x_arm_tune_option)
1651 arm_selected_tune = &all_cores[(int) arm_tune_option];
1653 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1654 SUBTARGET_OVERRIDE_OPTIONS;
1657 if (arm_selected_arch)
1659 if (arm_selected_cpu)
1661 /* Check for conflict between mcpu and march. */
1662 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1664 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1665 arm_selected_cpu->name, arm_selected_arch->name);
1666 /* -march wins for code generation.
1667 -mcpu wins for default tuning. */
1668 if (!arm_selected_tune)
1669 arm_selected_tune = arm_selected_cpu;
1671 arm_selected_cpu = arm_selected_arch;
1675 arm_selected_arch = NULL;
1678 /* Pick a CPU based on the architecture. */
1679 arm_selected_cpu = arm_selected_arch;
1682 /* If the user did not specify a processor, choose one for them. */
1683 if (!arm_selected_cpu)
1685 const struct processors * sel;
1686 unsigned int sought;
1688 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1689 if (!arm_selected_cpu->name)
1691 #ifdef SUBTARGET_CPU_DEFAULT
1692 /* Use the subtarget default CPU if none was specified by
1694 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1696 /* Default to ARM6. */
1697 if (!arm_selected_cpu->name)
1698 arm_selected_cpu = &all_cores[arm6];
1701 sel = arm_selected_cpu;
1702 insn_flags = sel->flags;
1704 /* Now check to see if the user has specified some command line
1705 switch that require certain abilities from the cpu. */
1708 if (TARGET_INTERWORK || TARGET_THUMB)
1710 sought |= (FL_THUMB | FL_MODE32);
1712 /* There are no ARM processors that support both APCS-26 and
1713 interworking. Therefore we force FL_MODE26 to be removed
1714 from insn_flags here (if it was set), so that the search
1715 below will always be able to find a compatible processor. */
1716 insn_flags &= ~FL_MODE26;
1719 if (sought != 0 && ((sought & insn_flags) != sought))
1721 /* Try to locate a CPU type that supports all of the abilities
1722 of the default CPU, plus the extra abilities requested by
1724 for (sel = all_cores; sel->name != NULL; sel++)
1725 if ((sel->flags & sought) == (sought | insn_flags))
1728 if (sel->name == NULL)
1730 unsigned current_bit_count = 0;
1731 const struct processors * best_fit = NULL;
1733 /* Ideally we would like to issue an error message here
1734 saying that it was not possible to find a CPU compatible
1735 with the default CPU, but which also supports the command
1736 line options specified by the programmer, and so they
1737 ought to use the -mcpu=<name> command line option to
1738 override the default CPU type.
1740 If we cannot find a cpu that has both the
1741 characteristics of the default cpu and the given
1742 command line options we scan the array again looking
1743 for a best match. */
1744 for (sel = all_cores; sel->name != NULL; sel++)
1745 if ((sel->flags & sought) == sought)
1749 count = bit_count (sel->flags & insn_flags);
1751 if (count >= current_bit_count)
1754 current_bit_count = count;
1758 gcc_assert (best_fit);
1762 arm_selected_cpu = sel;
1766 gcc_assert (arm_selected_cpu);
1767 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1768 if (!arm_selected_tune)
1769 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1771 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1772 insn_flags = arm_selected_cpu->flags;
1773 arm_base_arch = arm_selected_cpu->base_arch;
1775 arm_tune = arm_selected_tune->core;
1776 tune_flags = arm_selected_tune->flags;
1777 current_tune = arm_selected_tune->tune;
1779 /* Make sure that the processor choice does not conflict with any of the
1780 other command line choices. */
1781 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1782 error ("target CPU does not support ARM mode");
1784 /* BPABI targets use linker tricks to allow interworking on cores
1785 without thumb support. */
1786 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1788 warning (0, "target CPU does not support interworking" );
1789 target_flags &= ~MASK_INTERWORK;
1792 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1794 warning (0, "target CPU does not support THUMB instructions");
1795 target_flags &= ~MASK_THUMB;
1798 if (TARGET_APCS_FRAME && TARGET_THUMB)
1800 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1801 target_flags &= ~MASK_APCS_FRAME;
1804 /* Callee super interworking implies thumb interworking. Adding
1805 this to the flags here simplifies the logic elsewhere. */
1806 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1807 target_flags |= MASK_INTERWORK;
1809 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1810 from here where no function is being compiled currently. */
1811 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1812 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1814 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1815 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1817 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1819 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1820 target_flags |= MASK_APCS_FRAME;
1823 if (TARGET_POKE_FUNCTION_NAME)
1824 target_flags |= MASK_APCS_FRAME;
1826 if (TARGET_APCS_REENT && flag_pic)
1827 error ("-fpic and -mapcs-reent are incompatible");
1829 if (TARGET_APCS_REENT)
1830 warning (0, "APCS reentrant code not supported. Ignored");
1832 /* If this target is normally configured to use APCS frames, warn if they
1833 are turned off and debugging is turned on. */
1835 && write_symbols != NO_DEBUG
1836 && !TARGET_APCS_FRAME
1837 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1838 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1840 if (TARGET_APCS_FLOAT)
1841 warning (0, "passing floating point arguments in fp regs not yet supported");
1843 if (TARGET_LITTLE_WORDS)
1844 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1845 "will be removed in a future release");
1847 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1848 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1849 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1850 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1851 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1852 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1853 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1854 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1855 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1856 arm_arch6m = arm_arch6 && !arm_arch_notm;
1857 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1858 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1859 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1860 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1861 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1863 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1864 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1865 thumb_code = TARGET_ARM == 0;
1866 thumb1_code = TARGET_THUMB1 != 0;
1867 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1868 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1869 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1870 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1871 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1872 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1873 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1875 /* If we are not using the default (ARM mode) section anchor offset
1876 ranges, then set the correct ranges now. */
1879 /* Thumb-1 LDR instructions cannot have negative offsets.
1880 Permissible positive offset ranges are 5-bit (for byte loads),
1881 6-bit (for halfword loads), or 7-bit (for word loads).
1882 Empirical results suggest a 7-bit anchor range gives the best
1883 overall code size. */
1884 targetm.min_anchor_offset = 0;
1885 targetm.max_anchor_offset = 127;
1887 else if (TARGET_THUMB2)
1889 /* The minimum is set such that the total size of the block
1890 for a particular anchor is 248 + 1 + 4095 bytes, which is
1891 divisible by eight, ensuring natural spacing of anchors. */
1892 targetm.min_anchor_offset = -248;
1893 targetm.max_anchor_offset = 4095;
1896 /* V5 code we generate is completely interworking capable, so we turn off
1897 TARGET_INTERWORK here to avoid many tests later on. */
1899 /* XXX However, we must pass the right pre-processor defines to CPP
1900 or GLD can get confused. This is a hack. */
1901 if (TARGET_INTERWORK)
1902 arm_cpp_interwork = 1;
1905 target_flags &= ~MASK_INTERWORK;
1907 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1908 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1910 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1911 error ("iwmmxt abi requires an iwmmxt capable cpu");
1913 if (!global_options_set.x_arm_fpu_index)
1915 const char *target_fpu_name;
1918 #ifdef FPUTYPE_DEFAULT
1919 target_fpu_name = FPUTYPE_DEFAULT;
1921 target_fpu_name = "vfp";
1924 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1929 arm_fpu_desc = &all_fpus[arm_fpu_index];
1931 switch (arm_fpu_desc->model)
1933 case ARM_FP_MODEL_VFP:
1934 arm_fpu_attr = FPU_VFP;
1941 if (TARGET_AAPCS_BASED)
1943 if (TARGET_CALLER_INTERWORKING)
1944 error ("AAPCS does not support -mcaller-super-interworking");
1946 if (TARGET_CALLEE_INTERWORKING)
1947 error ("AAPCS does not support -mcallee-super-interworking");
1950 /* iWMMXt and NEON are incompatible. */
1951 if (TARGET_IWMMXT && TARGET_NEON)
1952 error ("iWMMXt and NEON are incompatible");
1954 /* iWMMXt unsupported under Thumb mode. */
1955 if (TARGET_THUMB && TARGET_IWMMXT)
1956 error ("iWMMXt unsupported under Thumb mode");
1958 /* __fp16 support currently assumes the core has ldrh. */
1959 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1960 sorry ("__fp16 and no ldrh");
1962 /* If soft-float is specified then don't use FPU. */
1963 if (TARGET_SOFT_FLOAT)
1964 arm_fpu_attr = FPU_NONE;
1966 if (TARGET_AAPCS_BASED)
1968 if (arm_abi == ARM_ABI_IWMMXT)
1969 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1970 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1971 && TARGET_HARD_FLOAT
1973 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1975 arm_pcs_default = ARM_PCS_AAPCS;
1979 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1980 sorry ("-mfloat-abi=hard and VFP");
1982 if (arm_abi == ARM_ABI_APCS)
1983 arm_pcs_default = ARM_PCS_APCS;
1985 arm_pcs_default = ARM_PCS_ATPCS;
1988 /* For arm2/3 there is no need to do any scheduling if we are doing
1989 software floating-point. */
1990 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1991 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1993 /* Use the cp15 method if it is available. */
1994 if (target_thread_pointer == TP_AUTO)
1996 if (arm_arch6k && !TARGET_THUMB1)
1997 target_thread_pointer = TP_CP15;
1999 target_thread_pointer = TP_SOFT;
2002 if (TARGET_HARD_TP && TARGET_THUMB1)
2003 error ("can not use -mtp=cp15 with 16-bit Thumb");
2005 /* Override the default structure alignment for AAPCS ABI. */
2006 if (!global_options_set.x_arm_structure_size_boundary)
2008 if (TARGET_AAPCS_BASED)
2009 arm_structure_size_boundary = 8;
2013 if (arm_structure_size_boundary != 8
2014 && arm_structure_size_boundary != 32
2015 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2017 if (ARM_DOUBLEWORD_ALIGN)
2019 "structure size boundary can only be set to 8, 32 or 64");
2021 warning (0, "structure size boundary can only be set to 8 or 32");
2022 arm_structure_size_boundary
2023 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2027 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2029 error ("RTP PIC is incompatible with Thumb");
2033 /* If stack checking is disabled, we can use r10 as the PIC register,
2034 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2035 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2037 if (TARGET_VXWORKS_RTP)
2038 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2039 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2042 if (flag_pic && TARGET_VXWORKS_RTP)
2043 arm_pic_register = 9;
2045 if (arm_pic_register_string != NULL)
2047 int pic_register = decode_reg_name (arm_pic_register_string);
2050 warning (0, "-mpic-register= is useless without -fpic");
2052 /* Prevent the user from choosing an obviously stupid PIC register. */
2053 else if (pic_register < 0 || call_used_regs[pic_register]
2054 || pic_register == HARD_FRAME_POINTER_REGNUM
2055 || pic_register == STACK_POINTER_REGNUM
2056 || pic_register >= PC_REGNUM
2057 || (TARGET_VXWORKS_RTP
2058 && (unsigned int) pic_register != arm_pic_register))
2059 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2061 arm_pic_register = pic_register;
2064 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2065 if (fix_cm3_ldrd == 2)
2067 if (arm_selected_cpu->core == cortexm3)
2073 /* Enable -munaligned-access by default for
2074 - all ARMv6 architecture-based processors
2075 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2076 - ARMv8 architecture-base processors.
2078 Disable -munaligned-access by default for
2079 - all pre-ARMv6 architecture-based processors
2080 - ARMv6-M architecture-based processors. */
2082 if (unaligned_access == 2)
2084 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2085 unaligned_access = 1;
2087 unaligned_access = 0;
2089 else if (unaligned_access == 1
2090 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2092 warning (0, "target CPU does not support unaligned accesses");
2093 unaligned_access = 0;
2096 if (TARGET_THUMB1 && flag_schedule_insns)
2098 /* Don't warn since it's on by default in -O2. */
2099 flag_schedule_insns = 0;
2104 /* If optimizing for size, bump the number of instructions that we
2105 are prepared to conditionally execute (even on a StrongARM). */
2106 max_insns_skipped = 6;
2109 max_insns_skipped = current_tune->max_insns_skipped;
2111 /* Hot/Cold partitioning is not currently supported, since we can't
2112 handle literal pool placement in that case. */
2113 if (flag_reorder_blocks_and_partition)
2115 inform (input_location,
2116 "-freorder-blocks-and-partition not supported on this architecture");
2117 flag_reorder_blocks_and_partition = 0;
2118 flag_reorder_blocks = 1;
2122 /* Hoisting PIC address calculations more aggressively provides a small,
2123 but measurable, size reduction for PIC code. Therefore, we decrease
2124 the bar for unrestricted expression hoisting to the cost of PIC address
2125 calculation, which is 2 instructions. */
2126 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2127 global_options.x_param_values,
2128 global_options_set.x_param_values);
2130 /* ARM EABI defaults to strict volatile bitfields. */
2131 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2132 && abi_version_at_least(2))
2133 flag_strict_volatile_bitfields = 1;
2135 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2136 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2137 if (flag_prefetch_loop_arrays < 0
2140 && current_tune->num_prefetch_slots > 0)
2141 flag_prefetch_loop_arrays = 1;
2143 /* Set up parameters to be used in prefetching algorithm. Do not override the
2144 defaults unless we are tuning for a core we have researched values for. */
2145 if (current_tune->num_prefetch_slots > 0)
2146 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2147 current_tune->num_prefetch_slots,
2148 global_options.x_param_values,
2149 global_options_set.x_param_values);
2150 if (current_tune->l1_cache_line_size >= 0)
2151 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2152 current_tune->l1_cache_line_size,
2153 global_options.x_param_values,
2154 global_options_set.x_param_values);
2155 if (current_tune->l1_cache_size >= 0)
2156 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2157 current_tune->l1_cache_size,
2158 global_options.x_param_values,
2159 global_options_set.x_param_values);
2161 /* Use Neon to perform 64-bits operations rather than core
2163 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2164 if (use_neon_for_64bits == 1)
2165 prefer_neon_for_64bits = true;
2167 /* Use the alternative scheduling-pressure algorithm by default. */
2168 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2169 global_options.x_param_values,
2170 global_options_set.x_param_values);
2172 /* Disable shrink-wrap when optimizing function for size, since it tends to
2173 generate additional returns. */
2174 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2175 flag_shrink_wrap = false;
2176 /* TBD: Dwarf info for apcs frame is not handled yet. */
2177 if (TARGET_APCS_FRAME)
2178 flag_shrink_wrap = false;
2180 /* Register global variables with the garbage collector. */
2181 arm_add_gc_roots ();
2185 arm_add_gc_roots (void)
2187 gcc_obstack_init(&minipool_obstack);
2188 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2191 /* A table of known ARM exception types.
2192 For use with the interrupt function attribute. */
2196 const char *const arg;
2197 const unsigned long return_value;
2201 static const isr_attribute_arg isr_attribute_args [] =
2203 { "IRQ", ARM_FT_ISR },
2204 { "irq", ARM_FT_ISR },
2205 { "FIQ", ARM_FT_FIQ },
2206 { "fiq", ARM_FT_FIQ },
2207 { "ABORT", ARM_FT_ISR },
2208 { "abort", ARM_FT_ISR },
2209 { "ABORT", ARM_FT_ISR },
2210 { "abort", ARM_FT_ISR },
2211 { "UNDEF", ARM_FT_EXCEPTION },
2212 { "undef", ARM_FT_EXCEPTION },
2213 { "SWI", ARM_FT_EXCEPTION },
2214 { "swi", ARM_FT_EXCEPTION },
2215 { NULL, ARM_FT_NORMAL }
2218 /* Returns the (interrupt) function type of the current
2219 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2221 static unsigned long
2222 arm_isr_value (tree argument)
2224 const isr_attribute_arg * ptr;
2228 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2230 /* No argument - default to IRQ. */
2231 if (argument == NULL_TREE)
2234 /* Get the value of the argument. */
2235 if (TREE_VALUE (argument) == NULL_TREE
2236 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2237 return ARM_FT_UNKNOWN;
2239 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2241 /* Check it against the list of known arguments. */
2242 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2243 if (streq (arg, ptr->arg))
2244 return ptr->return_value;
2246 /* An unrecognized interrupt type. */
2247 return ARM_FT_UNKNOWN;
2250 /* Computes the type of the current function. */
2252 static unsigned long
2253 arm_compute_func_type (void)
2255 unsigned long type = ARM_FT_UNKNOWN;
2259 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2261 /* Decide if the current function is volatile. Such functions
2262 never return, and many memory cycles can be saved by not storing
2263 register values that will never be needed again. This optimization
2264 was added to speed up context switching in a kernel application. */
2266 && (TREE_NOTHROW (current_function_decl)
2267 || !(flag_unwind_tables
2269 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2270 && TREE_THIS_VOLATILE (current_function_decl))
2271 type |= ARM_FT_VOLATILE;
2273 if (cfun->static_chain_decl != NULL)
2274 type |= ARM_FT_NESTED;
2276 attr = DECL_ATTRIBUTES (current_function_decl);
2278 a = lookup_attribute ("naked", attr);
2280 type |= ARM_FT_NAKED;
2282 a = lookup_attribute ("isr", attr);
2284 a = lookup_attribute ("interrupt", attr);
2287 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2289 type |= arm_isr_value (TREE_VALUE (a));
2294 /* Returns the type of the current function. */
2297 arm_current_func_type (void)
2299 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2300 cfun->machine->func_type = arm_compute_func_type ();
2302 return cfun->machine->func_type;
2306 arm_allocate_stack_slots_for_args (void)
2308 /* Naked functions should not allocate stack slots for arguments. */
2309 return !IS_NAKED (arm_current_func_type ());
2313 arm_warn_func_return (tree decl)
2315 /* Naked functions are implemented entirely in assembly, including the
2316 return sequence, so suppress warnings about this. */
2317 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2321 /* Output assembler code for a block containing the constant parts
2322 of a trampoline, leaving space for the variable parts.
2324 On the ARM, (if r8 is the static chain regnum, and remembering that
2325 referencing pc adds an offset of 8) the trampoline looks like:
2328 .word static chain value
2329 .word function's address
2330 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2333 arm_asm_trampoline_template (FILE *f)
2337 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2338 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2340 else if (TARGET_THUMB2)
2342 /* The Thumb-2 trampoline is similar to the arm implementation.
2343 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2344 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2345 STATIC_CHAIN_REGNUM, PC_REGNUM);
2346 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2350 ASM_OUTPUT_ALIGN (f, 2);
2351 fprintf (f, "\t.code\t16\n");
2352 fprintf (f, ".Ltrampoline_start:\n");
2353 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2354 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2355 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2356 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2357 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2358 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2360 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2361 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2364 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2367 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2369 rtx fnaddr, mem, a_tramp;
2371 emit_block_move (m_tramp, assemble_trampoline_template (),
2372 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2374 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2375 emit_move_insn (mem, chain_value);
2377 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2378 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2379 emit_move_insn (mem, fnaddr);
2381 a_tramp = XEXP (m_tramp, 0);
2382 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2383 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2384 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2387 /* Thumb trampolines should be entered in thumb mode, so set
2388 the bottom bit of the address. */
2391 arm_trampoline_adjust_address (rtx addr)
2394 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2395 NULL, 0, OPTAB_LIB_WIDEN);
2399 /* Return 1 if it is possible to return using a single instruction.
2400 If SIBLING is non-null, this is a test for a return before a sibling
2401 call. SIBLING is the call insn, so we can examine its register usage. */
2404 use_return_insn (int iscond, rtx sibling)
2407 unsigned int func_type;
2408 unsigned long saved_int_regs;
2409 unsigned HOST_WIDE_INT stack_adjust;
2410 arm_stack_offsets *offsets;
2412 /* Never use a return instruction before reload has run. */
2413 if (!reload_completed)
2416 func_type = arm_current_func_type ();
2418 /* Naked, volatile and stack alignment functions need special
2420 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2423 /* So do interrupt functions that use the frame pointer and Thumb
2424 interrupt functions. */
2425 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2428 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2429 && !optimize_function_for_size_p (cfun))
2432 offsets = arm_get_frame_offsets ();
2433 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2435 /* As do variadic functions. */
2436 if (crtl->args.pretend_args_size
2437 || cfun->machine->uses_anonymous_args
2438 /* Or if the function calls __builtin_eh_return () */
2439 || crtl->calls_eh_return
2440 /* Or if the function calls alloca */
2441 || cfun->calls_alloca
2442 /* Or if there is a stack adjustment. However, if the stack pointer
2443 is saved on the stack, we can use a pre-incrementing stack load. */
2444 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2445 && stack_adjust == 4)))
2448 saved_int_regs = offsets->saved_regs_mask;
2450 /* Unfortunately, the insn
2452 ldmib sp, {..., sp, ...}
2454 triggers a bug on most SA-110 based devices, such that the stack
2455 pointer won't be correctly restored if the instruction takes a
2456 page fault. We work around this problem by popping r3 along with
2457 the other registers, since that is never slower than executing
2458 another instruction.
2460 We test for !arm_arch5 here, because code for any architecture
2461 less than this could potentially be run on one of the buggy
2463 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2465 /* Validate that r3 is a call-clobbered register (always true in
2466 the default abi) ... */
2467 if (!call_used_regs[3])
2470 /* ... that it isn't being used for a return value ... */
2471 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2474 /* ... or for a tail-call argument ... */
2477 gcc_assert (CALL_P (sibling));
2479 if (find_regno_fusage (sibling, USE, 3))
2483 /* ... and that there are no call-saved registers in r0-r2
2484 (always true in the default ABI). */
2485 if (saved_int_regs & 0x7)
2489 /* Can't be done if interworking with Thumb, and any registers have been
2491 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2494 /* On StrongARM, conditional returns are expensive if they aren't
2495 taken and multiple registers have been stacked. */
2496 if (iscond && arm_tune_strongarm)
2498 /* Conditional return when just the LR is stored is a simple
2499 conditional-load instruction, that's not expensive. */
2500 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2504 && arm_pic_register != INVALID_REGNUM
2505 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2509 /* If there are saved registers but the LR isn't saved, then we need
2510 two instructions for the return. */
2511 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2514 /* Can't be done if any of the VFP regs are pushed,
2515 since this also requires an insn. */
2516 if (TARGET_HARD_FLOAT && TARGET_VFP)
2517 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2518 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2521 if (TARGET_REALLY_IWMMXT)
2522 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2523 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2529 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2530 shrink-wrapping if possible. This is the case if we need to emit a
2531 prologue, which we can test by looking at the offsets. */
2533 use_simple_return_p (void)
2535 arm_stack_offsets *offsets;
2537 offsets = arm_get_frame_offsets ();
2538 return offsets->outgoing_args != 0;
2541 /* Return TRUE if int I is a valid immediate ARM constant. */
2544 const_ok_for_arm (HOST_WIDE_INT i)
2548 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2549 be all zero, or all one. */
2550 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2551 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2552 != ((~(unsigned HOST_WIDE_INT) 0)
2553 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2556 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2558 /* Fast return for 0 and small values. We must do this for zero, since
2559 the code below can't handle that one case. */
2560 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2563 /* Get the number of trailing zeros. */
2564 lowbit = ffs((int) i) - 1;
2566 /* Only even shifts are allowed in ARM mode so round down to the
2567 nearest even number. */
2571 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2576 /* Allow rotated constants in ARM mode. */
2578 && ((i & ~0xc000003f) == 0
2579 || (i & ~0xf000000f) == 0
2580 || (i & ~0xfc000003) == 0))
2587 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2590 if (i == v || i == (v | (v << 8)))
2593 /* Allow repeated pattern 0xXY00XY00. */
2603 /* Return true if I is a valid constant for the operation CODE. */
2605 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2607 if (const_ok_for_arm (i))
2613 /* See if we can use movw. */
2614 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2617 /* Otherwise, try mvn. */
2618 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2621 /* See if we can use addw or subw. */
2623 && ((i & 0xfffff000) == 0
2624 || ((-i) & 0xfffff000) == 0))
2626 /* else fall through. */
2646 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2648 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2654 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2658 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2665 /* Return true if I is a valid di mode constant for the operation CODE. */
2667 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2669 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2670 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2671 rtx hi = GEN_INT (hi_val);
2672 rtx lo = GEN_INT (lo_val);
2680 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
2681 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
2683 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2690 /* Emit a sequence of insns to handle a large constant.
2691 CODE is the code of the operation required, it can be any of SET, PLUS,
2692 IOR, AND, XOR, MINUS;
2693 MODE is the mode in which the operation is being performed;
2694 VAL is the integer to operate on;
2695 SOURCE is the other operand (a register, or a null-pointer for SET);
2696 SUBTARGETS means it is safe to create scratch registers if that will
2697 either produce a simpler sequence, or we will want to cse the values.
2698 Return value is the number of insns emitted. */
2700 /* ??? Tweak this for thumb2. */
2702 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2703 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2707 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2708 cond = COND_EXEC_TEST (PATTERN (insn));
2712 if (subtargets || code == SET
2713 || (REG_P (target) && REG_P (source)
2714 && REGNO (target) != REGNO (source)))
2716 /* After arm_reorg has been called, we can't fix up expensive
2717 constants by pushing them into memory so we must synthesize
2718 them in-line, regardless of the cost. This is only likely to
2719 be more costly on chips that have load delay slots and we are
2720 compiling without running the scheduler (so no splitting
2721 occurred before the final instruction emission).
2723 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2725 if (!after_arm_reorg
2727 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2729 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2734 /* Currently SET is the only monadic value for CODE, all
2735 the rest are diadic. */
2736 if (TARGET_USE_MOVT)
2737 arm_emit_movpair (target, GEN_INT (val));
2739 emit_set_insn (target, GEN_INT (val));
2745 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2747 if (TARGET_USE_MOVT)
2748 arm_emit_movpair (temp, GEN_INT (val));
2750 emit_set_insn (temp, GEN_INT (val));
2752 /* For MINUS, the value is subtracted from, since we never
2753 have subtraction of a constant. */
2755 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2757 emit_set_insn (target,
2758 gen_rtx_fmt_ee (code, mode, source, temp));
2764 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2768 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2769 ARM/THUMB2 immediates, and add up to VAL.
2770 Thr function return value gives the number of insns required. */
2772 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2773 struct four_ints *return_sequence)
2775 int best_consecutive_zeros = 0;
2779 struct four_ints tmp_sequence;
2781 /* If we aren't targeting ARM, the best place to start is always at
2782 the bottom, otherwise look more closely. */
2785 for (i = 0; i < 32; i += 2)
2787 int consecutive_zeros = 0;
2789 if (!(val & (3 << i)))
2791 while ((i < 32) && !(val & (3 << i)))
2793 consecutive_zeros += 2;
2796 if (consecutive_zeros > best_consecutive_zeros)
2798 best_consecutive_zeros = consecutive_zeros;
2799 best_start = i - consecutive_zeros;
2806 /* So long as it won't require any more insns to do so, it's
2807 desirable to emit a small constant (in bits 0...9) in the last
2808 insn. This way there is more chance that it can be combined with
2809 a later addressing insn to form a pre-indexed load or store
2810 operation. Consider:
2812 *((volatile int *)0xe0000100) = 1;
2813 *((volatile int *)0xe0000110) = 2;
2815 We want this to wind up as:
2819 str rB, [rA, #0x100]
2821 str rB, [rA, #0x110]
2823 rather than having to synthesize both large constants from scratch.
2825 Therefore, we calculate how many insns would be required to emit
2826 the constant starting from `best_start', and also starting from
2827 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2828 yield a shorter sequence, we may as well use zero. */
2829 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2831 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2833 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2834 if (insns2 <= insns1)
2836 *return_sequence = tmp_sequence;
2844 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2846 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2847 struct four_ints *return_sequence, int i)
2849 int remainder = val & 0xffffffff;
2852 /* Try and find a way of doing the job in either two or three
2855 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2856 location. We start at position I. This may be the MSB, or
2857 optimial_immediate_sequence may have positioned it at the largest block
2858 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2859 wrapping around to the top of the word when we drop off the bottom.
2860 In the worst case this code should produce no more than four insns.
2862 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2863 constants, shifted to any arbitrary location. We should always start
2868 unsigned int b1, b2, b3, b4;
2869 unsigned HOST_WIDE_INT result;
2872 gcc_assert (insns < 4);
2877 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2878 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2881 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2882 /* We can use addw/subw for the last 12 bits. */
2886 /* Use an 8-bit shifted/rotated immediate. */
2890 result = remainder & ((0x0ff << end)
2891 | ((i < end) ? (0xff >> (32 - end))
2898 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2899 arbitrary shifts. */
2900 i -= TARGET_ARM ? 2 : 1;
2904 /* Next, see if we can do a better job with a thumb2 replicated
2907 We do it this way around to catch the cases like 0x01F001E0 where
2908 two 8-bit immediates would work, but a replicated constant would
2911 TODO: 16-bit constants that don't clear all the bits, but still win.
2912 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2915 b1 = (remainder & 0xff000000) >> 24;
2916 b2 = (remainder & 0x00ff0000) >> 16;
2917 b3 = (remainder & 0x0000ff00) >> 8;
2918 b4 = remainder & 0xff;
2922 /* The 8-bit immediate already found clears b1 (and maybe b2),
2923 but must leave b3 and b4 alone. */
2925 /* First try to find a 32-bit replicated constant that clears
2926 almost everything. We can assume that we can't do it in one,
2927 or else we wouldn't be here. */
2928 unsigned int tmp = b1 & b2 & b3 & b4;
2929 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2931 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2932 + (tmp == b3) + (tmp == b4);
2934 && (matching_bytes >= 3
2935 || (matching_bytes == 2
2936 && const_ok_for_op (remainder & ~tmp2, code))))
2938 /* At least 3 of the bytes match, and the fourth has at
2939 least as many bits set, or two of the bytes match
2940 and it will only require one more insn to finish. */
2948 /* Second, try to find a 16-bit replicated constant that can
2949 leave three of the bytes clear. If b2 or b4 is already
2950 zero, then we can. If the 8-bit from above would not
2951 clear b2 anyway, then we still win. */
2952 else if (b1 == b3 && (!b2 || !b4
2953 || (remainder & 0x00ff0000 & ~result)))
2955 result = remainder & 0xff00ff00;
2961 /* The 8-bit immediate already found clears b2 (and maybe b3)
2962 and we don't get here unless b1 is alredy clear, but it will
2963 leave b4 unchanged. */
2965 /* If we can clear b2 and b4 at once, then we win, since the
2966 8-bits couldn't possibly reach that far. */
2969 result = remainder & 0x00ff00ff;
2975 return_sequence->i[insns++] = result;
2976 remainder &= ~result;
2978 if (code == SET || code == MINUS)
2986 /* Emit an instruction with the indicated PATTERN. If COND is
2987 non-NULL, conditionalize the execution of the instruction on COND
2991 emit_constant_insn (rtx cond, rtx pattern)
2994 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2995 emit_insn (pattern);
2998 /* As above, but extra parameter GENERATE which, if clear, suppresses
3002 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3003 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3008 int final_invert = 0;
3010 int set_sign_bit_copies = 0;
3011 int clear_sign_bit_copies = 0;
3012 int clear_zero_bit_copies = 0;
3013 int set_zero_bit_copies = 0;
3014 int insns = 0, neg_insns, inv_insns;
3015 unsigned HOST_WIDE_INT temp1, temp2;
3016 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3017 struct four_ints *immediates;
3018 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3020 /* Find out which operations are safe for a given CODE. Also do a quick
3021 check for degenerate cases; these can occur when DImode operations
3034 if (remainder == 0xffffffff)
3037 emit_constant_insn (cond,
3038 gen_rtx_SET (VOIDmode, target,
3039 GEN_INT (ARM_SIGN_EXTEND (val))));
3045 if (reload_completed && rtx_equal_p (target, source))
3049 emit_constant_insn (cond,
3050 gen_rtx_SET (VOIDmode, target, source));
3059 emit_constant_insn (cond,
3060 gen_rtx_SET (VOIDmode, target, const0_rtx));
3063 if (remainder == 0xffffffff)
3065 if (reload_completed && rtx_equal_p (target, source))
3068 emit_constant_insn (cond,
3069 gen_rtx_SET (VOIDmode, target, source));
3078 if (reload_completed && rtx_equal_p (target, source))
3081 emit_constant_insn (cond,
3082 gen_rtx_SET (VOIDmode, target, source));
3086 if (remainder == 0xffffffff)
3089 emit_constant_insn (cond,
3090 gen_rtx_SET (VOIDmode, target,
3091 gen_rtx_NOT (mode, source)));
3098 /* We treat MINUS as (val - source), since (source - val) is always
3099 passed as (source + (-val)). */
3103 emit_constant_insn (cond,
3104 gen_rtx_SET (VOIDmode, target,
3105 gen_rtx_NEG (mode, source)));
3108 if (const_ok_for_arm (val))
3111 emit_constant_insn (cond,
3112 gen_rtx_SET (VOIDmode, target,
3113 gen_rtx_MINUS (mode, GEN_INT (val),
3124 /* If we can do it in one insn get out quickly. */
3125 if (const_ok_for_op (val, code))
3128 emit_constant_insn (cond,
3129 gen_rtx_SET (VOIDmode, target,
3131 ? gen_rtx_fmt_ee (code, mode, source,
3137 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3139 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3140 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3144 if (mode == SImode && i == 16)
3145 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3147 emit_constant_insn (cond,
3148 gen_zero_extendhisi2
3149 (target, gen_lowpart (HImode, source)));
3151 /* Extz only supports SImode, but we can coerce the operands
3153 emit_constant_insn (cond,
3154 gen_extzv_t2 (gen_lowpart (SImode, target),
3155 gen_lowpart (SImode, source),
3156 GEN_INT (i), const0_rtx));
3162 /* Calculate a few attributes that may be useful for specific
3164 /* Count number of leading zeros. */
3165 for (i = 31; i >= 0; i--)
3167 if ((remainder & (1 << i)) == 0)
3168 clear_sign_bit_copies++;
3173 /* Count number of leading 1's. */
3174 for (i = 31; i >= 0; i--)
3176 if ((remainder & (1 << i)) != 0)
3177 set_sign_bit_copies++;
3182 /* Count number of trailing zero's. */
3183 for (i = 0; i <= 31; i++)
3185 if ((remainder & (1 << i)) == 0)
3186 clear_zero_bit_copies++;
3191 /* Count number of trailing 1's. */
3192 for (i = 0; i <= 31; i++)
3194 if ((remainder & (1 << i)) != 0)
3195 set_zero_bit_copies++;
3203 /* See if we can do this by sign_extending a constant that is known
3204 to be negative. This is a good, way of doing it, since the shift
3205 may well merge into a subsequent insn. */
3206 if (set_sign_bit_copies > 1)
3208 if (const_ok_for_arm
3209 (temp1 = ARM_SIGN_EXTEND (remainder
3210 << (set_sign_bit_copies - 1))))
3214 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3215 emit_constant_insn (cond,
3216 gen_rtx_SET (VOIDmode, new_src,
3218 emit_constant_insn (cond,
3219 gen_ashrsi3 (target, new_src,
3220 GEN_INT (set_sign_bit_copies - 1)));
3224 /* For an inverted constant, we will need to set the low bits,
3225 these will be shifted out of harm's way. */
3226 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3227 if (const_ok_for_arm (~temp1))
3231 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3232 emit_constant_insn (cond,
3233 gen_rtx_SET (VOIDmode, new_src,
3235 emit_constant_insn (cond,
3236 gen_ashrsi3 (target, new_src,
3237 GEN_INT (set_sign_bit_copies - 1)));
3243 /* See if we can calculate the value as the difference between two
3244 valid immediates. */
3245 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3247 int topshift = clear_sign_bit_copies & ~1;
3249 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3250 & (0xff000000 >> topshift));
3252 /* If temp1 is zero, then that means the 9 most significant
3253 bits of remainder were 1 and we've caused it to overflow.
3254 When topshift is 0 we don't need to do anything since we
3255 can borrow from 'bit 32'. */
3256 if (temp1 == 0 && topshift != 0)
3257 temp1 = 0x80000000 >> (topshift - 1);
3259 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3261 if (const_ok_for_arm (temp2))
3265 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3266 emit_constant_insn (cond,
3267 gen_rtx_SET (VOIDmode, new_src,
3269 emit_constant_insn (cond,
3270 gen_addsi3 (target, new_src,
3278 /* See if we can generate this by setting the bottom (or the top)
3279 16 bits, and then shifting these into the other half of the
3280 word. We only look for the simplest cases, to do more would cost
3281 too much. Be careful, however, not to generate this when the
3282 alternative would take fewer insns. */
3283 if (val & 0xffff0000)
3285 temp1 = remainder & 0xffff0000;
3286 temp2 = remainder & 0x0000ffff;
3288 /* Overlaps outside this range are best done using other methods. */
3289 for (i = 9; i < 24; i++)
3291 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3292 && !const_ok_for_arm (temp2))
3294 rtx new_src = (subtargets
3295 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3297 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3298 source, subtargets, generate);
3306 gen_rtx_ASHIFT (mode, source,
3313 /* Don't duplicate cases already considered. */
3314 for (i = 17; i < 24; i++)
3316 if (((temp1 | (temp1 >> i)) == remainder)
3317 && !const_ok_for_arm (temp1))
3319 rtx new_src = (subtargets
3320 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3322 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3323 source, subtargets, generate);
3328 gen_rtx_SET (VOIDmode, target,
3331 gen_rtx_LSHIFTRT (mode, source,
3342 /* If we have IOR or XOR, and the constant can be loaded in a
3343 single instruction, and we can find a temporary to put it in,
3344 then this can be done in two instructions instead of 3-4. */
3346 /* TARGET can't be NULL if SUBTARGETS is 0 */
3347 || (reload_completed && !reg_mentioned_p (target, source)))
3349 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3353 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3355 emit_constant_insn (cond,
3356 gen_rtx_SET (VOIDmode, sub,
3358 emit_constant_insn (cond,
3359 gen_rtx_SET (VOIDmode, target,
3360 gen_rtx_fmt_ee (code, mode,
3371 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3372 and the remainder 0s for e.g. 0xfff00000)
3373 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3375 This can be done in 2 instructions by using shifts with mov or mvn.
3380 mvn r0, r0, lsr #12 */
3381 if (set_sign_bit_copies > 8
3382 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3386 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3387 rtx shift = GEN_INT (set_sign_bit_copies);
3391 gen_rtx_SET (VOIDmode, sub,
3393 gen_rtx_ASHIFT (mode,
3398 gen_rtx_SET (VOIDmode, target,
3400 gen_rtx_LSHIFTRT (mode, sub,
3407 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3409 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3411 For eg. r0 = r0 | 0xfff
3416 if (set_zero_bit_copies > 8
3417 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3421 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3422 rtx shift = GEN_INT (set_zero_bit_copies);
3426 gen_rtx_SET (VOIDmode, sub,
3428 gen_rtx_LSHIFTRT (mode,
3433 gen_rtx_SET (VOIDmode, target,
3435 gen_rtx_ASHIFT (mode, sub,
3441 /* This will never be reached for Thumb2 because orn is a valid
3442 instruction. This is for Thumb1 and the ARM 32 bit cases.
3444 x = y | constant (such that ~constant is a valid constant)
3446 x = ~(~y & ~constant).
3448 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3452 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3453 emit_constant_insn (cond,
3454 gen_rtx_SET (VOIDmode, sub,
3455 gen_rtx_NOT (mode, source)));
3458 sub = gen_reg_rtx (mode);
3459 emit_constant_insn (cond,
3460 gen_rtx_SET (VOIDmode, sub,
3461 gen_rtx_AND (mode, source,
3463 emit_constant_insn (cond,
3464 gen_rtx_SET (VOIDmode, target,
3465 gen_rtx_NOT (mode, sub)));
3472 /* See if two shifts will do 2 or more insn's worth of work. */
3473 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3475 HOST_WIDE_INT shift_mask = ((0xffffffff
3476 << (32 - clear_sign_bit_copies))
3479 if ((remainder | shift_mask) != 0xffffffff)
3483 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3484 insns = arm_gen_constant (AND, mode, cond,
3485 remainder | shift_mask,
3486 new_src, source, subtargets, 1);
3491 rtx targ = subtargets ? NULL_RTX : target;
3492 insns = arm_gen_constant (AND, mode, cond,
3493 remainder | shift_mask,
3494 targ, source, subtargets, 0);
3500 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3501 rtx shift = GEN_INT (clear_sign_bit_copies);
3503 emit_insn (gen_ashlsi3 (new_src, source, shift));
3504 emit_insn (gen_lshrsi3 (target, new_src, shift));
3510 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3512 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3514 if ((remainder | shift_mask) != 0xffffffff)
3518 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3520 insns = arm_gen_constant (AND, mode, cond,
3521 remainder | shift_mask,
3522 new_src, source, subtargets, 1);
3527 rtx targ = subtargets ? NULL_RTX : target;
3529 insns = arm_gen_constant (AND, mode, cond,
3530 remainder | shift_mask,
3531 targ, source, subtargets, 0);
3537 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3538 rtx shift = GEN_INT (clear_zero_bit_copies);
3540 emit_insn (gen_lshrsi3 (new_src, source, shift));
3541 emit_insn (gen_ashlsi3 (target, new_src, shift));
3553 /* Calculate what the instruction sequences would be if we generated it
3554 normally, negated, or inverted. */
3556 /* AND cannot be split into multiple insns, so invert and use BIC. */
3559 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3562 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3567 if (can_invert || final_invert)
3568 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3573 immediates = &pos_immediates;
3575 /* Is the negated immediate sequence more efficient? */
3576 if (neg_insns < insns && neg_insns <= inv_insns)
3579 immediates = &neg_immediates;
3584 /* Is the inverted immediate sequence more efficient?
3585 We must allow for an extra NOT instruction for XOR operations, although
3586 there is some chance that the final 'mvn' will get optimized later. */
3587 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3590 immediates = &inv_immediates;
3598 /* Now output the chosen sequence as instructions. */
3601 for (i = 0; i < insns; i++)
3603 rtx new_src, temp1_rtx;
3605 temp1 = immediates->i[i];
3607 if (code == SET || code == MINUS)
3608 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3609 else if ((final_invert || i < (insns - 1)) && subtargets)
3610 new_src = gen_reg_rtx (mode);
3616 else if (can_negate)
3619 temp1 = trunc_int_for_mode (temp1, mode);
3620 temp1_rtx = GEN_INT (temp1);
3624 else if (code == MINUS)
3625 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3627 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3629 emit_constant_insn (cond,
3630 gen_rtx_SET (VOIDmode, new_src,
3636 can_negate = can_invert;
3640 else if (code == MINUS)
3648 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3649 gen_rtx_NOT (mode, source)));
3656 /* Canonicalize a comparison so that we are more likely to recognize it.
3657 This can be done for a few constant compares, where we can make the
3658 immediate value easier to load. */
3661 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3662 bool op0_preserve_value)
3664 enum machine_mode mode;
3665 unsigned HOST_WIDE_INT i, maxval;
3667 mode = GET_MODE (*op0);
3668 if (mode == VOIDmode)
3669 mode = GET_MODE (*op1);
3671 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3673 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3674 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3675 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3676 for GTU/LEU in Thumb mode. */
3681 if (*code == GT || *code == LE
3682 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3684 /* Missing comparison. First try to use an available
3686 if (CONST_INT_P (*op1))
3694 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3696 *op1 = GEN_INT (i + 1);
3697 *code = *code == GT ? GE : LT;
3703 if (i != ~((unsigned HOST_WIDE_INT) 0)
3704 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3706 *op1 = GEN_INT (i + 1);
3707 *code = *code == GTU ? GEU : LTU;
3716 /* If that did not work, reverse the condition. */
3717 if (!op0_preserve_value)
3722 *code = (int)swap_condition ((enum rtx_code)*code);
3728 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3729 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3730 to facilitate possible combining with a cmp into 'ands'. */
3732 && GET_CODE (*op0) == ZERO_EXTEND
3733 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3734 && GET_MODE (XEXP (*op0, 0)) == QImode
3735 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3736 && subreg_lowpart_p (XEXP (*op0, 0))
3737 && *op1 == const0_rtx)
3738 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3741 /* Comparisons smaller than DImode. Only adjust comparisons against
3742 an out-of-range constant. */
3743 if (!CONST_INT_P (*op1)
3744 || const_ok_for_arm (INTVAL (*op1))
3745 || const_ok_for_arm (- INTVAL (*op1)))
3759 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3761 *op1 = GEN_INT (i + 1);
3762 *code = *code == GT ? GE : LT;
3770 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3772 *op1 = GEN_INT (i - 1);
3773 *code = *code == GE ? GT : LE;
3780 if (i != ~((unsigned HOST_WIDE_INT) 0)
3781 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3783 *op1 = GEN_INT (i + 1);
3784 *code = *code == GTU ? GEU : LTU;
3792 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3794 *op1 = GEN_INT (i - 1);
3795 *code = *code == GEU ? GTU : LEU;
3806 /* Define how to find the value returned by a function. */
3809 arm_function_value(const_tree type, const_tree func,
3810 bool outgoing ATTRIBUTE_UNUSED)
3812 enum machine_mode mode;
3813 int unsignedp ATTRIBUTE_UNUSED;
3814 rtx r ATTRIBUTE_UNUSED;
3816 mode = TYPE_MODE (type);
3818 if (TARGET_AAPCS_BASED)
3819 return aapcs_allocate_return_reg (mode, type, func);
3821 /* Promote integer types. */
3822 if (INTEGRAL_TYPE_P (type))
3823 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3825 /* Promotes small structs returned in a register to full-word size
3826 for big-endian AAPCS. */
3827 if (arm_return_in_msb (type))
3829 HOST_WIDE_INT size = int_size_in_bytes (type);
3830 if (size % UNITS_PER_WORD != 0)
3832 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3833 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3837 return arm_libcall_value_1 (mode);
3840 /* libcall hashtable helpers. */
3842 struct libcall_hasher : typed_noop_remove <rtx_def>
3844 typedef rtx_def value_type;
3845 typedef rtx_def compare_type;
3846 static inline hashval_t hash (const value_type *);
3847 static inline bool equal (const value_type *, const compare_type *);
3848 static inline void remove (value_type *);
3852 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
3854 return rtx_equal_p (p1, p2);
3858 libcall_hasher::hash (const value_type *p1)
3860 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
3863 typedef hash_table <libcall_hasher> libcall_table_type;
3866 add_libcall (libcall_table_type htab, rtx libcall)
3868 *htab.find_slot (libcall, INSERT) = libcall;
3872 arm_libcall_uses_aapcs_base (const_rtx libcall)
3874 static bool init_done = false;
3875 static libcall_table_type libcall_htab;
3881 libcall_htab.create (31);
3882 add_libcall (libcall_htab,
3883 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3884 add_libcall (libcall_htab,
3885 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3886 add_libcall (libcall_htab,
3887 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3888 add_libcall (libcall_htab,
3889 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3891 add_libcall (libcall_htab,
3892 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3893 add_libcall (libcall_htab,
3894 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3895 add_libcall (libcall_htab,
3896 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3897 add_libcall (libcall_htab,
3898 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3900 add_libcall (libcall_htab,
3901 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3902 add_libcall (libcall_htab,
3903 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3904 add_libcall (libcall_htab,
3905 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3906 add_libcall (libcall_htab,
3907 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3908 add_libcall (libcall_htab,
3909 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3910 add_libcall (libcall_htab,
3911 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3912 add_libcall (libcall_htab,
3913 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3914 add_libcall (libcall_htab,
3915 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3917 /* Values from double-precision helper functions are returned in core
3918 registers if the selected core only supports single-precision
3919 arithmetic, even if we are using the hard-float ABI. The same is
3920 true for single-precision helpers, but we will never be using the
3921 hard-float ABI on a CPU which doesn't support single-precision
3922 operations in hardware. */
3923 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3924 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3925 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3926 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3927 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3928 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3929 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3930 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3931 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3932 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3933 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3934 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3936 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3940 return libcall && libcall_htab.find (libcall) != NULL;
3944 arm_libcall_value_1 (enum machine_mode mode)
3946 if (TARGET_AAPCS_BASED)
3947 return aapcs_libcall_value (mode);
3948 else if (TARGET_IWMMXT_ABI
3949 && arm_vector_mode_supported_p (mode))
3950 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3952 return gen_rtx_REG (mode, ARG_REGISTER (1));
3955 /* Define how to find the value returned by a library function
3956 assuming the value has mode MODE. */
3959 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3961 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3962 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3964 /* The following libcalls return their result in integer registers,
3965 even though they return a floating point value. */
3966 if (arm_libcall_uses_aapcs_base (libcall))
3967 return gen_rtx_REG (mode, ARG_REGISTER(1));
3971 return arm_libcall_value_1 (mode);
3974 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3977 arm_function_value_regno_p (const unsigned int regno)
3979 if (regno == ARG_REGISTER (1)
3981 && TARGET_AAPCS_BASED
3983 && TARGET_HARD_FLOAT
3984 && regno == FIRST_VFP_REGNUM)
3985 || (TARGET_IWMMXT_ABI
3986 && regno == FIRST_IWMMXT_REGNUM))
3992 /* Determine the amount of memory needed to store the possible return
3993 registers of an untyped call. */
3995 arm_apply_result_size (void)
4001 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4003 if (TARGET_IWMMXT_ABI)
4010 /* Decide whether TYPE should be returned in memory (true)
4011 or in a register (false). FNTYPE is the type of the function making
4014 arm_return_in_memory (const_tree type, const_tree fntype)
4018 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4020 if (TARGET_AAPCS_BASED)
4022 /* Simple, non-aggregate types (ie not including vectors and
4023 complex) are always returned in a register (or registers).
4024 We don't care about which register here, so we can short-cut
4025 some of the detail. */
4026 if (!AGGREGATE_TYPE_P (type)
4027 && TREE_CODE (type) != VECTOR_TYPE
4028 && TREE_CODE (type) != COMPLEX_TYPE)
4031 /* Any return value that is no larger than one word can be
4033 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4036 /* Check any available co-processors to see if they accept the
4037 type as a register candidate (VFP, for example, can return
4038 some aggregates in consecutive registers). These aren't
4039 available if the call is variadic. */
4040 if (aapcs_select_return_coproc (type, fntype) >= 0)
4043 /* Vector values should be returned using ARM registers, not
4044 memory (unless they're over 16 bytes, which will break since
4045 we only have four call-clobbered registers to play with). */
4046 if (TREE_CODE (type) == VECTOR_TYPE)
4047 return (size < 0 || size > (4 * UNITS_PER_WORD));
4049 /* The rest go in memory. */
4053 if (TREE_CODE (type) == VECTOR_TYPE)
4054 return (size < 0 || size > (4 * UNITS_PER_WORD));
4056 if (!AGGREGATE_TYPE_P (type) &&
4057 (TREE_CODE (type) != VECTOR_TYPE))
4058 /* All simple types are returned in registers. */
4061 if (arm_abi != ARM_ABI_APCS)
4063 /* ATPCS and later return aggregate types in memory only if they are
4064 larger than a word (or are variable size). */
4065 return (size < 0 || size > UNITS_PER_WORD);
4068 /* For the arm-wince targets we choose to be compatible with Microsoft's
4069 ARM and Thumb compilers, which always return aggregates in memory. */
4071 /* All structures/unions bigger than one word are returned in memory.
4072 Also catch the case where int_size_in_bytes returns -1. In this case
4073 the aggregate is either huge or of variable size, and in either case
4074 we will want to return it via memory and not in a register. */
4075 if (size < 0 || size > UNITS_PER_WORD)
4078 if (TREE_CODE (type) == RECORD_TYPE)
4082 /* For a struct the APCS says that we only return in a register
4083 if the type is 'integer like' and every addressable element
4084 has an offset of zero. For practical purposes this means
4085 that the structure can have at most one non bit-field element
4086 and that this element must be the first one in the structure. */
4088 /* Find the first field, ignoring non FIELD_DECL things which will
4089 have been created by C++. */
4090 for (field = TYPE_FIELDS (type);
4091 field && TREE_CODE (field) != FIELD_DECL;
4092 field = DECL_CHAIN (field))
4096 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4098 /* Check that the first field is valid for returning in a register. */
4100 /* ... Floats are not allowed */
4101 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4104 /* ... Aggregates that are not themselves valid for returning in
4105 a register are not allowed. */
4106 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4109 /* Now check the remaining fields, if any. Only bitfields are allowed,
4110 since they are not addressable. */
4111 for (field = DECL_CHAIN (field);
4113 field = DECL_CHAIN (field))
4115 if (TREE_CODE (field) != FIELD_DECL)
4118 if (!DECL_BIT_FIELD_TYPE (field))
4125 if (TREE_CODE (type) == UNION_TYPE)
4129 /* Unions can be returned in registers if every element is
4130 integral, or can be returned in an integer register. */
4131 for (field = TYPE_FIELDS (type);
4133 field = DECL_CHAIN (field))
4135 if (TREE_CODE (field) != FIELD_DECL)
4138 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4141 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4147 #endif /* not ARM_WINCE */
4149 /* Return all other types in memory. */
4153 const struct pcs_attribute_arg
4157 } pcs_attribute_args[] =
4159 {"aapcs", ARM_PCS_AAPCS},
4160 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4162 /* We could recognize these, but changes would be needed elsewhere
4163 * to implement them. */
4164 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4165 {"atpcs", ARM_PCS_ATPCS},
4166 {"apcs", ARM_PCS_APCS},
4168 {NULL, ARM_PCS_UNKNOWN}
4172 arm_pcs_from_attribute (tree attr)
4174 const struct pcs_attribute_arg *ptr;
4177 /* Get the value of the argument. */
4178 if (TREE_VALUE (attr) == NULL_TREE
4179 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4180 return ARM_PCS_UNKNOWN;
4182 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4184 /* Check it against the list of known arguments. */
4185 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4186 if (streq (arg, ptr->arg))
4189 /* An unrecognized interrupt type. */
4190 return ARM_PCS_UNKNOWN;
4193 /* Get the PCS variant to use for this call. TYPE is the function's type
4194 specification, DECL is the specific declartion. DECL may be null if
4195 the call could be indirect or if this is a library call. */
4197 arm_get_pcs_model (const_tree type, const_tree decl)
4199 bool user_convention = false;
4200 enum arm_pcs user_pcs = arm_pcs_default;
4205 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4208 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4209 user_convention = true;
4212 if (TARGET_AAPCS_BASED)
4214 /* Detect varargs functions. These always use the base rules
4215 (no argument is ever a candidate for a co-processor
4217 bool base_rules = stdarg_p (type);
4219 if (user_convention)
4221 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4222 sorry ("non-AAPCS derived PCS variant");
4223 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4224 error ("variadic functions must use the base AAPCS variant");
4228 return ARM_PCS_AAPCS;
4229 else if (user_convention)
4231 else if (decl && flag_unit_at_a_time)
4233 /* Local functions never leak outside this compilation unit,
4234 so we are free to use whatever conventions are
4236 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4237 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4239 return ARM_PCS_AAPCS_LOCAL;
4242 else if (user_convention && user_pcs != arm_pcs_default)
4243 sorry ("PCS variant");
4245 /* For everything else we use the target's default. */
4246 return arm_pcs_default;
4251 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4252 const_tree fntype ATTRIBUTE_UNUSED,
4253 rtx libcall ATTRIBUTE_UNUSED,
4254 const_tree fndecl ATTRIBUTE_UNUSED)
4256 /* Record the unallocated VFP registers. */
4257 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4258 pcum->aapcs_vfp_reg_alloc = 0;
4261 /* Walk down the type tree of TYPE counting consecutive base elements.
4262 If *MODEP is VOIDmode, then set it to the first valid floating point
4263 type. If a non-floating point type is found, or if a floating point
4264 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4265 otherwise return the count in the sub-tree. */
4267 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4269 enum machine_mode mode;
4272 switch (TREE_CODE (type))
4275 mode = TYPE_MODE (type);
4276 if (mode != DFmode && mode != SFmode)
4279 if (*modep == VOIDmode)
4288 mode = TYPE_MODE (TREE_TYPE (type));
4289 if (mode != DFmode && mode != SFmode)
4292 if (*modep == VOIDmode)
4301 /* Use V2SImode and V4SImode as representatives of all 64-bit
4302 and 128-bit vector types, whether or not those modes are
4303 supported with the present options. */
4304 size = int_size_in_bytes (type);
4317 if (*modep == VOIDmode)
4320 /* Vector modes are considered to be opaque: two vectors are
4321 equivalent for the purposes of being homogeneous aggregates
4322 if they are the same size. */
4331 tree index = TYPE_DOMAIN (type);
4333 /* Can't handle incomplete types. */
4334 if (!COMPLETE_TYPE_P (type))
4337 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4340 || !TYPE_MAX_VALUE (index)
4341 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4342 || !TYPE_MIN_VALUE (index)
4343 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4347 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4348 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4350 /* There must be no padding. */
4351 if (!host_integerp (TYPE_SIZE (type), 1)
4352 || (tree_low_cst (TYPE_SIZE (type), 1)
4353 != count * GET_MODE_BITSIZE (*modep)))
4365 /* Can't handle incomplete types. */
4366 if (!COMPLETE_TYPE_P (type))
4369 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4371 if (TREE_CODE (field) != FIELD_DECL)
4374 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4380 /* There must be no padding. */
4381 if (!host_integerp (TYPE_SIZE (type), 1)
4382 || (tree_low_cst (TYPE_SIZE (type), 1)
4383 != count * GET_MODE_BITSIZE (*modep)))
4390 case QUAL_UNION_TYPE:
4392 /* These aren't very interesting except in a degenerate case. */
4397 /* Can't handle incomplete types. */
4398 if (!COMPLETE_TYPE_P (type))
4401 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4403 if (TREE_CODE (field) != FIELD_DECL)
4406 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4409 count = count > sub_count ? count : sub_count;
4412 /* There must be no padding. */
4413 if (!host_integerp (TYPE_SIZE (type), 1)
4414 || (tree_low_cst (TYPE_SIZE (type), 1)
4415 != count * GET_MODE_BITSIZE (*modep)))
4428 /* Return true if PCS_VARIANT should use VFP registers. */
4430 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4432 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4434 static bool seen_thumb1_vfp = false;
4436 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4438 sorry ("Thumb-1 hard-float VFP ABI");
4439 /* sorry() is not immediately fatal, so only display this once. */
4440 seen_thumb1_vfp = true;
4446 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4449 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4450 (TARGET_VFP_DOUBLE || !is_double));
4453 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4454 suitable for passing or returning in VFP registers for the PCS
4455 variant selected. If it is, then *BASE_MODE is updated to contain
4456 a machine mode describing each element of the argument's type and
4457 *COUNT to hold the number of such elements. */
4459 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4460 enum machine_mode mode, const_tree type,
4461 enum machine_mode *base_mode, int *count)
4463 enum machine_mode new_mode = VOIDmode;
4465 /* If we have the type information, prefer that to working things
4466 out from the mode. */
4469 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4471 if (ag_count > 0 && ag_count <= 4)
4476 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4477 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4478 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4483 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4486 new_mode = (mode == DCmode ? DFmode : SFmode);
4492 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4495 *base_mode = new_mode;
4500 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4501 enum machine_mode mode, const_tree type)
4503 int count ATTRIBUTE_UNUSED;
4504 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4506 if (!use_vfp_abi (pcs_variant, false))
4508 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4513 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4516 if (!use_vfp_abi (pcum->pcs_variant, false))
4519 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4520 &pcum->aapcs_vfp_rmode,
4521 &pcum->aapcs_vfp_rcount);
4525 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4526 const_tree type ATTRIBUTE_UNUSED)
4528 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4529 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4532 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4533 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4535 pcum->aapcs_vfp_reg_alloc = mask << regno;
4536 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4539 int rcount = pcum->aapcs_vfp_rcount;
4541 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4545 /* Avoid using unsupported vector modes. */
4546 if (rmode == V2SImode)
4548 else if (rmode == V4SImode)
4555 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4556 for (i = 0; i < rcount; i++)
4558 rtx tmp = gen_rtx_REG (rmode,
4559 FIRST_VFP_REGNUM + regno + i * rshift);
4560 tmp = gen_rtx_EXPR_LIST
4562 GEN_INT (i * GET_MODE_SIZE (rmode)));
4563 XVECEXP (par, 0, i) = tmp;
4566 pcum->aapcs_reg = par;
4569 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4576 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4577 enum machine_mode mode,
4578 const_tree type ATTRIBUTE_UNUSED)
4580 if (!use_vfp_abi (pcs_variant, false))
4583 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4586 enum machine_mode ag_mode;
4591 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4596 if (ag_mode == V2SImode)
4598 else if (ag_mode == V4SImode)
4604 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4605 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4606 for (i = 0; i < count; i++)
4608 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4609 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4610 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4611 XVECEXP (par, 0, i) = tmp;
4617 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4621 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4622 enum machine_mode mode ATTRIBUTE_UNUSED,
4623 const_tree type ATTRIBUTE_UNUSED)
4625 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4626 pcum->aapcs_vfp_reg_alloc = 0;
4630 #define AAPCS_CP(X) \
4632 aapcs_ ## X ## _cum_init, \
4633 aapcs_ ## X ## _is_call_candidate, \
4634 aapcs_ ## X ## _allocate, \
4635 aapcs_ ## X ## _is_return_candidate, \
4636 aapcs_ ## X ## _allocate_return_reg, \
4637 aapcs_ ## X ## _advance \
4640 /* Table of co-processors that can be used to pass arguments in
4641 registers. Idealy no arugment should be a candidate for more than
4642 one co-processor table entry, but the table is processed in order
4643 and stops after the first match. If that entry then fails to put
4644 the argument into a co-processor register, the argument will go on
4648 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4649 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4651 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4652 BLKmode) is a candidate for this co-processor's registers; this
4653 function should ignore any position-dependent state in
4654 CUMULATIVE_ARGS and only use call-type dependent information. */
4655 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4657 /* Return true if the argument does get a co-processor register; it
4658 should set aapcs_reg to an RTX of the register allocated as is
4659 required for a return from FUNCTION_ARG. */
4660 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4662 /* Return true if a result of mode MODE (or type TYPE if MODE is
4663 BLKmode) is can be returned in this co-processor's registers. */
4664 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4666 /* Allocate and return an RTX element to hold the return type of a
4667 call, this routine must not fail and will only be called if
4668 is_return_candidate returned true with the same parameters. */
4669 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4671 /* Finish processing this argument and prepare to start processing
4673 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4674 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4682 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4687 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4688 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4695 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4697 /* We aren't passed a decl, so we can't check that a call is local.
4698 However, it isn't clear that that would be a win anyway, since it
4699 might limit some tail-calling opportunities. */
4700 enum arm_pcs pcs_variant;
4704 const_tree fndecl = NULL_TREE;
4706 if (TREE_CODE (fntype) == FUNCTION_DECL)
4709 fntype = TREE_TYPE (fntype);
4712 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4715 pcs_variant = arm_pcs_default;
4717 if (pcs_variant != ARM_PCS_AAPCS)
4721 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4722 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4731 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4734 /* We aren't passed a decl, so we can't check that a call is local.
4735 However, it isn't clear that that would be a win anyway, since it
4736 might limit some tail-calling opportunities. */
4737 enum arm_pcs pcs_variant;
4738 int unsignedp ATTRIBUTE_UNUSED;
4742 const_tree fndecl = NULL_TREE;
4744 if (TREE_CODE (fntype) == FUNCTION_DECL)
4747 fntype = TREE_TYPE (fntype);
4750 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4753 pcs_variant = arm_pcs_default;
4755 /* Promote integer types. */
4756 if (type && INTEGRAL_TYPE_P (type))
4757 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4759 if (pcs_variant != ARM_PCS_AAPCS)
4763 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4764 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4766 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4770 /* Promotes small structs returned in a register to full-word size
4771 for big-endian AAPCS. */
4772 if (type && arm_return_in_msb (type))
4774 HOST_WIDE_INT size = int_size_in_bytes (type);
4775 if (size % UNITS_PER_WORD != 0)
4777 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4778 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4782 return gen_rtx_REG (mode, R0_REGNUM);
4786 aapcs_libcall_value (enum machine_mode mode)
4788 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4789 && GET_MODE_SIZE (mode) <= 4)
4792 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4795 /* Lay out a function argument using the AAPCS rules. The rule
4796 numbers referred to here are those in the AAPCS. */
4798 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4799 const_tree type, bool named)
4804 /* We only need to do this once per argument. */
4805 if (pcum->aapcs_arg_processed)
4808 pcum->aapcs_arg_processed = true;
4810 /* Special case: if named is false then we are handling an incoming
4811 anonymous argument which is on the stack. */
4815 /* Is this a potential co-processor register candidate? */
4816 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4818 int slot = aapcs_select_call_coproc (pcum, mode, type);
4819 pcum->aapcs_cprc_slot = slot;
4821 /* We don't have to apply any of the rules from part B of the
4822 preparation phase, these are handled elsewhere in the
4827 /* A Co-processor register candidate goes either in its own
4828 class of registers or on the stack. */
4829 if (!pcum->aapcs_cprc_failed[slot])
4831 /* C1.cp - Try to allocate the argument to co-processor
4833 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4836 /* C2.cp - Put the argument on the stack and note that we
4837 can't assign any more candidates in this slot. We also
4838 need to note that we have allocated stack space, so that
4839 we won't later try to split a non-cprc candidate between
4840 core registers and the stack. */
4841 pcum->aapcs_cprc_failed[slot] = true;
4842 pcum->can_split = false;
4845 /* We didn't get a register, so this argument goes on the
4847 gcc_assert (pcum->can_split == false);
4852 /* C3 - For double-word aligned arguments, round the NCRN up to the
4853 next even number. */
4854 ncrn = pcum->aapcs_ncrn;
4855 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4858 nregs = ARM_NUM_REGS2(mode, type);
4860 /* Sigh, this test should really assert that nregs > 0, but a GCC
4861 extension allows empty structs and then gives them empty size; it
4862 then allows such a structure to be passed by value. For some of
4863 the code below we have to pretend that such an argument has
4864 non-zero size so that we 'locate' it correctly either in
4865 registers or on the stack. */
4866 gcc_assert (nregs >= 0);
4868 nregs2 = nregs ? nregs : 1;
4870 /* C4 - Argument fits entirely in core registers. */
4871 if (ncrn + nregs2 <= NUM_ARG_REGS)
4873 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4874 pcum->aapcs_next_ncrn = ncrn + nregs;
4878 /* C5 - Some core registers left and there are no arguments already
4879 on the stack: split this argument between the remaining core
4880 registers and the stack. */
4881 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4883 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4884 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4885 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4889 /* C6 - NCRN is set to 4. */
4890 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4892 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4896 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4897 for a call to a function whose data type is FNTYPE.
4898 For a library call, FNTYPE is NULL. */
4900 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4902 tree fndecl ATTRIBUTE_UNUSED)
4904 /* Long call handling. */
4906 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4908 pcum->pcs_variant = arm_pcs_default;
4910 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4912 if (arm_libcall_uses_aapcs_base (libname))
4913 pcum->pcs_variant = ARM_PCS_AAPCS;
4915 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4916 pcum->aapcs_reg = NULL_RTX;
4917 pcum->aapcs_partial = 0;
4918 pcum->aapcs_arg_processed = false;
4919 pcum->aapcs_cprc_slot = -1;
4920 pcum->can_split = true;
4922 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4926 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4928 pcum->aapcs_cprc_failed[i] = false;
4929 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4937 /* On the ARM, the offset starts at 0. */
4939 pcum->iwmmxt_nregs = 0;
4940 pcum->can_split = true;
4942 /* Varargs vectors are treated the same as long long.
4943 named_count avoids having to change the way arm handles 'named' */
4944 pcum->named_count = 0;
4947 if (TARGET_REALLY_IWMMXT && fntype)
4951 for (fn_arg = TYPE_ARG_TYPES (fntype);
4953 fn_arg = TREE_CHAIN (fn_arg))
4954 pcum->named_count += 1;
4956 if (! pcum->named_count)
4957 pcum->named_count = INT_MAX;
4962 /* Return true if mode/type need doubleword alignment. */
4964 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4966 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4967 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4971 /* Determine where to put an argument to a function.
4972 Value is zero to push the argument on the stack,
4973 or a hard register in which to store the argument.
4975 MODE is the argument's machine mode.
4976 TYPE is the data type of the argument (as a tree).
4977 This is null for libcalls where that information may
4979 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4980 the preceding args and about the function being called.
4981 NAMED is nonzero if this argument is a named parameter
4982 (otherwise it is an extra parameter matching an ellipsis).
4984 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4985 other arguments are passed on the stack. If (NAMED == 0) (which happens
4986 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4987 defined), say it is passed in the stack (function_prologue will
4988 indeed make it pass in the stack if necessary). */
4991 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4992 const_tree type, bool named)
4994 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4997 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4998 a call insn (op3 of a call_value insn). */
4999 if (mode == VOIDmode)
5002 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5004 aapcs_layout_arg (pcum, mode, type, named);
5005 return pcum->aapcs_reg;
5008 /* Varargs vectors are treated the same as long long.
5009 named_count avoids having to change the way arm handles 'named' */
5010 if (TARGET_IWMMXT_ABI
5011 && arm_vector_mode_supported_p (mode)
5012 && pcum->named_count > pcum->nargs + 1)
5014 if (pcum->iwmmxt_nregs <= 9)
5015 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5018 pcum->can_split = false;
5023 /* Put doubleword aligned quantities in even register pairs. */
5025 && ARM_DOUBLEWORD_ALIGN
5026 && arm_needs_doubleword_align (mode, type))
5029 /* Only allow splitting an arg between regs and memory if all preceding
5030 args were allocated to regs. For args passed by reference we only count
5031 the reference pointer. */
5032 if (pcum->can_split)
5035 nregs = ARM_NUM_REGS2 (mode, type);
5037 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5040 return gen_rtx_REG (mode, pcum->nregs);
5044 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5046 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5047 ? DOUBLEWORD_ALIGNMENT
5052 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5053 tree type, bool named)
5055 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5056 int nregs = pcum->nregs;
5058 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5060 aapcs_layout_arg (pcum, mode, type, named);
5061 return pcum->aapcs_partial;
5064 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5067 if (NUM_ARG_REGS > nregs
5068 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5070 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5075 /* Update the data in PCUM to advance over an argument
5076 of mode MODE and data type TYPE.
5077 (TYPE is null for libcalls where that information may not be available.) */
5080 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5081 const_tree type, bool named)
5083 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5085 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5087 aapcs_layout_arg (pcum, mode, type, named);
5089 if (pcum->aapcs_cprc_slot >= 0)
5091 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5093 pcum->aapcs_cprc_slot = -1;
5096 /* Generic stuff. */
5097 pcum->aapcs_arg_processed = false;
5098 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5099 pcum->aapcs_reg = NULL_RTX;
5100 pcum->aapcs_partial = 0;
5105 if (arm_vector_mode_supported_p (mode)
5106 && pcum->named_count > pcum->nargs
5107 && TARGET_IWMMXT_ABI)
5108 pcum->iwmmxt_nregs += 1;
5110 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5114 /* Variable sized types are passed by reference. This is a GCC
5115 extension to the ARM ABI. */
5118 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5119 enum machine_mode mode ATTRIBUTE_UNUSED,
5120 const_tree type, bool named ATTRIBUTE_UNUSED)
5122 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5125 /* Encode the current state of the #pragma [no_]long_calls. */
5128 OFF, /* No #pragma [no_]long_calls is in effect. */
5129 LONG, /* #pragma long_calls is in effect. */
5130 SHORT /* #pragma no_long_calls is in effect. */
5133 static arm_pragma_enum arm_pragma_long_calls = OFF;
5136 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5138 arm_pragma_long_calls = LONG;
5142 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5144 arm_pragma_long_calls = SHORT;
5148 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5150 arm_pragma_long_calls = OFF;
5153 /* Handle an attribute requiring a FUNCTION_DECL;
5154 arguments as in struct attribute_spec.handler. */
5156 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5157 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5159 if (TREE_CODE (*node) != FUNCTION_DECL)
5161 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5163 *no_add_attrs = true;
5169 /* Handle an "interrupt" or "isr" attribute;
5170 arguments as in struct attribute_spec.handler. */
5172 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5177 if (TREE_CODE (*node) != FUNCTION_DECL)
5179 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5181 *no_add_attrs = true;
5183 /* FIXME: the argument if any is checked for type attributes;
5184 should it be checked for decl ones? */
5188 if (TREE_CODE (*node) == FUNCTION_TYPE
5189 || TREE_CODE (*node) == METHOD_TYPE)
5191 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5193 warning (OPT_Wattributes, "%qE attribute ignored",
5195 *no_add_attrs = true;
5198 else if (TREE_CODE (*node) == POINTER_TYPE
5199 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5200 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5201 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5203 *node = build_variant_type_copy (*node);
5204 TREE_TYPE (*node) = build_type_attribute_variant
5206 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5207 *no_add_attrs = true;
5211 /* Possibly pass this attribute on from the type to a decl. */
5212 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5213 | (int) ATTR_FLAG_FUNCTION_NEXT
5214 | (int) ATTR_FLAG_ARRAY_NEXT))
5216 *no_add_attrs = true;
5217 return tree_cons (name, args, NULL_TREE);
5221 warning (OPT_Wattributes, "%qE attribute ignored",
5230 /* Handle a "pcs" attribute; arguments as in struct
5231 attribute_spec.handler. */
5233 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5234 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5236 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5238 warning (OPT_Wattributes, "%qE attribute ignored", name);
5239 *no_add_attrs = true;
5244 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5245 /* Handle the "notshared" attribute. This attribute is another way of
5246 requesting hidden visibility. ARM's compiler supports
5247 "__declspec(notshared)"; we support the same thing via an
5251 arm_handle_notshared_attribute (tree *node,
5252 tree name ATTRIBUTE_UNUSED,
5253 tree args ATTRIBUTE_UNUSED,
5254 int flags ATTRIBUTE_UNUSED,
5257 tree decl = TYPE_NAME (*node);
5261 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5262 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5263 *no_add_attrs = false;
5269 /* Return 0 if the attributes for two types are incompatible, 1 if they
5270 are compatible, and 2 if they are nearly compatible (which causes a
5271 warning to be generated). */
5273 arm_comp_type_attributes (const_tree type1, const_tree type2)
5277 /* Check for mismatch of non-default calling convention. */
5278 if (TREE_CODE (type1) != FUNCTION_TYPE)
5281 /* Check for mismatched call attributes. */
5282 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5283 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5284 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5285 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5287 /* Only bother to check if an attribute is defined. */
5288 if (l1 | l2 | s1 | s2)
5290 /* If one type has an attribute, the other must have the same attribute. */
5291 if ((l1 != l2) || (s1 != s2))
5294 /* Disallow mixed attributes. */
5295 if ((l1 & s2) || (l2 & s1))
5299 /* Check for mismatched ISR attribute. */
5300 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5302 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5303 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5305 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5312 /* Assigns default attributes to newly defined type. This is used to
5313 set short_call/long_call attributes for function types of
5314 functions defined inside corresponding #pragma scopes. */
5316 arm_set_default_type_attributes (tree type)
5318 /* Add __attribute__ ((long_call)) to all functions, when
5319 inside #pragma long_calls or __attribute__ ((short_call)),
5320 when inside #pragma no_long_calls. */
5321 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5323 tree type_attr_list, attr_name;
5324 type_attr_list = TYPE_ATTRIBUTES (type);
5326 if (arm_pragma_long_calls == LONG)
5327 attr_name = get_identifier ("long_call");
5328 else if (arm_pragma_long_calls == SHORT)
5329 attr_name = get_identifier ("short_call");
5333 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5334 TYPE_ATTRIBUTES (type) = type_attr_list;
5338 /* Return true if DECL is known to be linked into section SECTION. */
5341 arm_function_in_section_p (tree decl, section *section)
5343 /* We can only be certain about functions defined in the same
5344 compilation unit. */
5345 if (!TREE_STATIC (decl))
5348 /* Make sure that SYMBOL always binds to the definition in this
5349 compilation unit. */
5350 if (!targetm.binds_local_p (decl))
5353 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5354 if (!DECL_SECTION_NAME (decl))
5356 /* Make sure that we will not create a unique section for DECL. */
5357 if (flag_function_sections || DECL_ONE_ONLY (decl))
5361 return function_section (decl) == section;
5364 /* Return nonzero if a 32-bit "long_call" should be generated for
5365 a call from the current function to DECL. We generate a long_call
5368 a. has an __attribute__((long call))
5369 or b. is within the scope of a #pragma long_calls
5370 or c. the -mlong-calls command line switch has been specified
5372 However we do not generate a long call if the function:
5374 d. has an __attribute__ ((short_call))
5375 or e. is inside the scope of a #pragma no_long_calls
5376 or f. is defined in the same section as the current function. */
5379 arm_is_long_call_p (tree decl)
5384 return TARGET_LONG_CALLS;
5386 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5387 if (lookup_attribute ("short_call", attrs))
5390 /* For "f", be conservative, and only cater for cases in which the
5391 whole of the current function is placed in the same section. */
5392 if (!flag_reorder_blocks_and_partition
5393 && TREE_CODE (decl) == FUNCTION_DECL
5394 && arm_function_in_section_p (decl, current_function_section ()))
5397 if (lookup_attribute ("long_call", attrs))
5400 return TARGET_LONG_CALLS;
5403 /* Return nonzero if it is ok to make a tail-call to DECL. */
5405 arm_function_ok_for_sibcall (tree decl, tree exp)
5407 unsigned long func_type;
5409 if (cfun->machine->sibcall_blocked)
5412 /* Never tailcall something if we are generating code for Thumb-1. */
5416 /* The PIC register is live on entry to VxWorks PLT entries, so we
5417 must make the call before restoring the PIC register. */
5418 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5421 /* Cannot tail-call to long calls, since these are out of range of
5422 a branch instruction. */
5423 if (decl && arm_is_long_call_p (decl))
5426 /* If we are interworking and the function is not declared static
5427 then we can't tail-call it unless we know that it exists in this
5428 compilation unit (since it might be a Thumb routine). */
5429 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5430 && !TREE_ASM_WRITTEN (decl))
5433 func_type = arm_current_func_type ();
5434 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5435 if (IS_INTERRUPT (func_type))
5438 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5440 /* Check that the return value locations are the same. For
5441 example that we aren't returning a value from the sibling in
5442 a VFP register but then need to transfer it to a core
5446 a = arm_function_value (TREE_TYPE (exp), decl, false);
5447 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5449 if (!rtx_equal_p (a, b))
5453 /* Never tailcall if function may be called with a misaligned SP. */
5454 if (IS_STACKALIGN (func_type))
5457 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5458 references should become a NOP. Don't convert such calls into
5460 if (TARGET_AAPCS_BASED
5461 && arm_abi == ARM_ABI_AAPCS
5463 && DECL_WEAK (decl))
5466 /* Everything else is ok. */
5471 /* Addressing mode support functions. */
5473 /* Return nonzero if X is a legitimate immediate operand when compiling
5474 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5476 legitimate_pic_operand_p (rtx x)
5478 if (GET_CODE (x) == SYMBOL_REF
5479 || (GET_CODE (x) == CONST
5480 && GET_CODE (XEXP (x, 0)) == PLUS
5481 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5487 /* Record that the current function needs a PIC register. Initialize
5488 cfun->machine->pic_reg if we have not already done so. */
5491 require_pic_register (void)
5493 /* A lot of the logic here is made obscure by the fact that this
5494 routine gets called as part of the rtx cost estimation process.
5495 We don't want those calls to affect any assumptions about the real
5496 function; and further, we can't call entry_of_function() until we
5497 start the real expansion process. */
5498 if (!crtl->uses_pic_offset_table)
5500 gcc_assert (can_create_pseudo_p ());
5501 if (arm_pic_register != INVALID_REGNUM)
5503 if (!cfun->machine->pic_reg)
5504 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5506 /* Play games to avoid marking the function as needing pic
5507 if we are being called as part of the cost-estimation
5509 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5510 crtl->uses_pic_offset_table = 1;
5516 if (!cfun->machine->pic_reg)
5517 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5519 /* Play games to avoid marking the function as needing pic
5520 if we are being called as part of the cost-estimation
5522 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5524 crtl->uses_pic_offset_table = 1;
5527 arm_load_pic_register (0UL);
5532 for (insn = seq; insn; insn = NEXT_INSN (insn))
5534 INSN_LOCATION (insn) = prologue_location;
5536 /* We can be called during expansion of PHI nodes, where
5537 we can't yet emit instructions directly in the final
5538 insn stream. Queue the insns on the entry edge, they will
5539 be committed after everything else is expanded. */
5540 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5547 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5549 if (GET_CODE (orig) == SYMBOL_REF
5550 || GET_CODE (orig) == LABEL_REF)
5556 gcc_assert (can_create_pseudo_p ());
5557 reg = gen_reg_rtx (Pmode);
5560 /* VxWorks does not impose a fixed gap between segments; the run-time
5561 gap can be different from the object-file gap. We therefore can't
5562 use GOTOFF unless we are absolutely sure that the symbol is in the
5563 same segment as the GOT. Unfortunately, the flexibility of linker
5564 scripts means that we can't be sure of that in general, so assume
5565 that GOTOFF is never valid on VxWorks. */
5566 if ((GET_CODE (orig) == LABEL_REF
5567 || (GET_CODE (orig) == SYMBOL_REF &&
5568 SYMBOL_REF_LOCAL_P (orig)))
5570 && !TARGET_VXWORKS_RTP)
5571 insn = arm_pic_static_addr (orig, reg);
5577 /* If this function doesn't have a pic register, create one now. */
5578 require_pic_register ();
5580 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5582 /* Make the MEM as close to a constant as possible. */
5583 mem = SET_SRC (pat);
5584 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5585 MEM_READONLY_P (mem) = 1;
5586 MEM_NOTRAP_P (mem) = 1;
5588 insn = emit_insn (pat);
5591 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5593 set_unique_reg_note (insn, REG_EQUAL, orig);
5597 else if (GET_CODE (orig) == CONST)
5601 if (GET_CODE (XEXP (orig, 0)) == PLUS
5602 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5605 /* Handle the case where we have: const (UNSPEC_TLS). */
5606 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5607 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5610 /* Handle the case where we have:
5611 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5613 if (GET_CODE (XEXP (orig, 0)) == PLUS
5614 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5615 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5617 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5623 gcc_assert (can_create_pseudo_p ());
5624 reg = gen_reg_rtx (Pmode);
5627 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5629 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5630 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5631 base == reg ? 0 : reg);
5633 if (CONST_INT_P (offset))
5635 /* The base register doesn't really matter, we only want to
5636 test the index for the appropriate mode. */
5637 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5639 gcc_assert (can_create_pseudo_p ());
5640 offset = force_reg (Pmode, offset);
5643 if (CONST_INT_P (offset))
5644 return plus_constant (Pmode, base, INTVAL (offset));
5647 if (GET_MODE_SIZE (mode) > 4
5648 && (GET_MODE_CLASS (mode) == MODE_INT
5649 || TARGET_SOFT_FLOAT))
5651 emit_insn (gen_addsi3 (reg, base, offset));
5655 return gen_rtx_PLUS (Pmode, base, offset);
5662 /* Find a spare register to use during the prolog of a function. */
5665 thumb_find_work_register (unsigned long pushed_regs_mask)
5669 /* Check the argument registers first as these are call-used. The
5670 register allocation order means that sometimes r3 might be used
5671 but earlier argument registers might not, so check them all. */
5672 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5673 if (!df_regs_ever_live_p (reg))
5676 /* Before going on to check the call-saved registers we can try a couple
5677 more ways of deducing that r3 is available. The first is when we are
5678 pushing anonymous arguments onto the stack and we have less than 4
5679 registers worth of fixed arguments(*). In this case r3 will be part of
5680 the variable argument list and so we can be sure that it will be
5681 pushed right at the start of the function. Hence it will be available
5682 for the rest of the prologue.
5683 (*): ie crtl->args.pretend_args_size is greater than 0. */
5684 if (cfun->machine->uses_anonymous_args
5685 && crtl->args.pretend_args_size > 0)
5686 return LAST_ARG_REGNUM;
5688 /* The other case is when we have fixed arguments but less than 4 registers
5689 worth. In this case r3 might be used in the body of the function, but
5690 it is not being used to convey an argument into the function. In theory
5691 we could just check crtl->args.size to see how many bytes are
5692 being passed in argument registers, but it seems that it is unreliable.
5693 Sometimes it will have the value 0 when in fact arguments are being
5694 passed. (See testcase execute/20021111-1.c for an example). So we also
5695 check the args_info.nregs field as well. The problem with this field is
5696 that it makes no allowances for arguments that are passed to the
5697 function but which are not used. Hence we could miss an opportunity
5698 when a function has an unused argument in r3. But it is better to be
5699 safe than to be sorry. */
5700 if (! cfun->machine->uses_anonymous_args
5701 && crtl->args.size >= 0
5702 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5703 && (TARGET_AAPCS_BASED
5704 ? crtl->args.info.aapcs_ncrn < 4
5705 : crtl->args.info.nregs < 4))
5706 return LAST_ARG_REGNUM;
5708 /* Otherwise look for a call-saved register that is going to be pushed. */
5709 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5710 if (pushed_regs_mask & (1 << reg))
5715 /* Thumb-2 can use high regs. */
5716 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5717 if (pushed_regs_mask & (1 << reg))
5720 /* Something went wrong - thumb_compute_save_reg_mask()
5721 should have arranged for a suitable register to be pushed. */
5725 static GTY(()) int pic_labelno;
5727 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5731 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5733 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5735 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5738 gcc_assert (flag_pic);
5740 pic_reg = cfun->machine->pic_reg;
5741 if (TARGET_VXWORKS_RTP)
5743 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5744 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5745 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5747 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5749 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5750 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5754 /* We use an UNSPEC rather than a LABEL_REF because this label
5755 never appears in the code stream. */
5757 labelno = GEN_INT (pic_labelno++);
5758 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5759 l1 = gen_rtx_CONST (VOIDmode, l1);
5761 /* On the ARM the PC register contains 'dot + 8' at the time of the
5762 addition, on the Thumb it is 'dot + 4'. */
5763 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5764 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5766 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5770 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5772 else /* TARGET_THUMB1 */
5774 if (arm_pic_register != INVALID_REGNUM
5775 && REGNO (pic_reg) > LAST_LO_REGNUM)
5777 /* We will have pushed the pic register, so we should always be
5778 able to find a work register. */
5779 pic_tmp = gen_rtx_REG (SImode,
5780 thumb_find_work_register (saved_regs));
5781 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5782 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5783 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5786 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5790 /* Need to emit this whether or not we obey regdecls,
5791 since setjmp/longjmp can cause life info to screw up. */
5795 /* Generate code to load the address of a static var when flag_pic is set. */
5797 arm_pic_static_addr (rtx orig, rtx reg)
5799 rtx l1, labelno, offset_rtx, insn;
5801 gcc_assert (flag_pic);
5803 /* We use an UNSPEC rather than a LABEL_REF because this label
5804 never appears in the code stream. */
5805 labelno = GEN_INT (pic_labelno++);
5806 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5807 l1 = gen_rtx_CONST (VOIDmode, l1);
5809 /* On the ARM the PC register contains 'dot + 8' at the time of the
5810 addition, on the Thumb it is 'dot + 4'. */
5811 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5812 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5813 UNSPEC_SYMBOL_OFFSET);
5814 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5816 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5820 /* Return nonzero if X is valid as an ARM state addressing register. */
5822 arm_address_register_rtx_p (rtx x, int strict_p)
5832 return ARM_REGNO_OK_FOR_BASE_P (regno);
5834 return (regno <= LAST_ARM_REGNUM
5835 || regno >= FIRST_PSEUDO_REGISTER
5836 || regno == FRAME_POINTER_REGNUM
5837 || regno == ARG_POINTER_REGNUM);
5840 /* Return TRUE if this rtx is the difference of a symbol and a label,
5841 and will reduce to a PC-relative relocation in the object file.
5842 Expressions like this can be left alone when generating PIC, rather
5843 than forced through the GOT. */
5845 pcrel_constant_p (rtx x)
5847 if (GET_CODE (x) == MINUS)
5848 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5853 /* Return true if X will surely end up in an index register after next
5856 will_be_in_index_register (const_rtx x)
5858 /* arm.md: calculate_pic_address will split this into a register. */
5859 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5862 /* Return nonzero if X is a valid ARM state address operand. */
5864 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5868 enum rtx_code code = GET_CODE (x);
5870 if (arm_address_register_rtx_p (x, strict_p))
5873 use_ldrd = (TARGET_LDRD
5875 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5877 if (code == POST_INC || code == PRE_DEC
5878 || ((code == PRE_INC || code == POST_DEC)
5879 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5880 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5882 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5883 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5884 && GET_CODE (XEXP (x, 1)) == PLUS
5885 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5887 rtx addend = XEXP (XEXP (x, 1), 1);
5889 /* Don't allow ldrd post increment by register because it's hard
5890 to fixup invalid register choices. */
5892 && GET_CODE (x) == POST_MODIFY
5896 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5897 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5900 /* After reload constants split into minipools will have addresses
5901 from a LABEL_REF. */
5902 else if (reload_completed
5903 && (code == LABEL_REF
5905 && GET_CODE (XEXP (x, 0)) == PLUS
5906 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5907 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5910 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5913 else if (code == PLUS)
5915 rtx xop0 = XEXP (x, 0);
5916 rtx xop1 = XEXP (x, 1);
5918 return ((arm_address_register_rtx_p (xop0, strict_p)
5919 && ((CONST_INT_P (xop1)
5920 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5921 || (!strict_p && will_be_in_index_register (xop1))))
5922 || (arm_address_register_rtx_p (xop1, strict_p)
5923 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5927 /* Reload currently can't handle MINUS, so disable this for now */
5928 else if (GET_CODE (x) == MINUS)
5930 rtx xop0 = XEXP (x, 0);
5931 rtx xop1 = XEXP (x, 1);
5933 return (arm_address_register_rtx_p (xop0, strict_p)
5934 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5938 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5939 && code == SYMBOL_REF
5940 && CONSTANT_POOL_ADDRESS_P (x)
5942 && symbol_mentioned_p (get_pool_constant (x))
5943 && ! pcrel_constant_p (get_pool_constant (x))))
5949 /* Return nonzero if X is a valid Thumb-2 address operand. */
5951 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5954 enum rtx_code code = GET_CODE (x);
5956 if (arm_address_register_rtx_p (x, strict_p))
5959 use_ldrd = (TARGET_LDRD
5961 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5963 if (code == POST_INC || code == PRE_DEC
5964 || ((code == PRE_INC || code == POST_DEC)
5965 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5966 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5968 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5969 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5970 && GET_CODE (XEXP (x, 1)) == PLUS
5971 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5973 /* Thumb-2 only has autoincrement by constant. */
5974 rtx addend = XEXP (XEXP (x, 1), 1);
5975 HOST_WIDE_INT offset;
5977 if (!CONST_INT_P (addend))
5980 offset = INTVAL(addend);
5981 if (GET_MODE_SIZE (mode) <= 4)
5982 return (offset > -256 && offset < 256);
5984 return (use_ldrd && offset > -1024 && offset < 1024
5985 && (offset & 3) == 0);
5988 /* After reload constants split into minipools will have addresses
5989 from a LABEL_REF. */
5990 else if (reload_completed
5991 && (code == LABEL_REF
5993 && GET_CODE (XEXP (x, 0)) == PLUS
5994 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5995 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5998 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6001 else if (code == PLUS)
6003 rtx xop0 = XEXP (x, 0);
6004 rtx xop1 = XEXP (x, 1);
6006 return ((arm_address_register_rtx_p (xop0, strict_p)
6007 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6008 || (!strict_p && will_be_in_index_register (xop1))))
6009 || (arm_address_register_rtx_p (xop1, strict_p)
6010 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6013 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6014 && code == SYMBOL_REF
6015 && CONSTANT_POOL_ADDRESS_P (x)
6017 && symbol_mentioned_p (get_pool_constant (x))
6018 && ! pcrel_constant_p (get_pool_constant (x))))
6024 /* Return nonzero if INDEX is valid for an address index operand in
6027 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6030 HOST_WIDE_INT range;
6031 enum rtx_code code = GET_CODE (index);
6033 /* Standard coprocessor addressing modes. */
6034 if (TARGET_HARD_FLOAT
6036 && (mode == SFmode || mode == DFmode))
6037 return (code == CONST_INT && INTVAL (index) < 1024
6038 && INTVAL (index) > -1024
6039 && (INTVAL (index) & 3) == 0);
6041 /* For quad modes, we restrict the constant offset to be slightly less
6042 than what the instruction format permits. We do this because for
6043 quad mode moves, we will actually decompose them into two separate
6044 double-mode reads or writes. INDEX must therefore be a valid
6045 (double-mode) offset and so should INDEX+8. */
6046 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6047 return (code == CONST_INT
6048 && INTVAL (index) < 1016
6049 && INTVAL (index) > -1024
6050 && (INTVAL (index) & 3) == 0);
6052 /* We have no such constraint on double mode offsets, so we permit the
6053 full range of the instruction format. */
6054 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6055 return (code == CONST_INT
6056 && INTVAL (index) < 1024
6057 && INTVAL (index) > -1024
6058 && (INTVAL (index) & 3) == 0);
6060 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6061 return (code == CONST_INT
6062 && INTVAL (index) < 1024
6063 && INTVAL (index) > -1024
6064 && (INTVAL (index) & 3) == 0);
6066 if (arm_address_register_rtx_p (index, strict_p)
6067 && (GET_MODE_SIZE (mode) <= 4))
6070 if (mode == DImode || mode == DFmode)
6072 if (code == CONST_INT)
6074 HOST_WIDE_INT val = INTVAL (index);
6077 return val > -256 && val < 256;
6079 return val > -4096 && val < 4092;
6082 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6085 if (GET_MODE_SIZE (mode) <= 4
6089 || (mode == QImode && outer == SIGN_EXTEND))))
6093 rtx xiop0 = XEXP (index, 0);
6094 rtx xiop1 = XEXP (index, 1);
6096 return ((arm_address_register_rtx_p (xiop0, strict_p)
6097 && power_of_two_operand (xiop1, SImode))
6098 || (arm_address_register_rtx_p (xiop1, strict_p)
6099 && power_of_two_operand (xiop0, SImode)));
6101 else if (code == LSHIFTRT || code == ASHIFTRT
6102 || code == ASHIFT || code == ROTATERT)
6104 rtx op = XEXP (index, 1);
6106 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6109 && INTVAL (op) <= 31);
6113 /* For ARM v4 we may be doing a sign-extend operation during the
6119 || (outer == SIGN_EXTEND && mode == QImode))
6125 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6127 return (code == CONST_INT
6128 && INTVAL (index) < range
6129 && INTVAL (index) > -range);
6132 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6133 index operand. i.e. 1, 2, 4 or 8. */
6135 thumb2_index_mul_operand (rtx op)
6139 if (!CONST_INT_P (op))
6143 return (val == 1 || val == 2 || val == 4 || val == 8);
6146 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6148 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6150 enum rtx_code code = GET_CODE (index);
6152 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6153 /* Standard coprocessor addressing modes. */
6154 if (TARGET_HARD_FLOAT
6156 && (mode == SFmode || mode == DFmode))
6157 return (code == CONST_INT && INTVAL (index) < 1024
6158 /* Thumb-2 allows only > -256 index range for it's core register
6159 load/stores. Since we allow SF/DF in core registers, we have
6160 to use the intersection between -256~4096 (core) and -1024~1024
6162 && INTVAL (index) > -256
6163 && (INTVAL (index) & 3) == 0);
6165 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6167 /* For DImode assume values will usually live in core regs
6168 and only allow LDRD addressing modes. */
6169 if (!TARGET_LDRD || mode != DImode)
6170 return (code == CONST_INT
6171 && INTVAL (index) < 1024
6172 && INTVAL (index) > -1024
6173 && (INTVAL (index) & 3) == 0);
6176 /* For quad modes, we restrict the constant offset to be slightly less
6177 than what the instruction format permits. We do this because for
6178 quad mode moves, we will actually decompose them into two separate
6179 double-mode reads or writes. INDEX must therefore be a valid
6180 (double-mode) offset and so should INDEX+8. */
6181 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6182 return (code == CONST_INT
6183 && INTVAL (index) < 1016
6184 && INTVAL (index) > -1024
6185 && (INTVAL (index) & 3) == 0);
6187 /* We have no such constraint on double mode offsets, so we permit the
6188 full range of the instruction format. */
6189 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6190 return (code == CONST_INT
6191 && INTVAL (index) < 1024
6192 && INTVAL (index) > -1024
6193 && (INTVAL (index) & 3) == 0);
6195 if (arm_address_register_rtx_p (index, strict_p)
6196 && (GET_MODE_SIZE (mode) <= 4))
6199 if (mode == DImode || mode == DFmode)
6201 if (code == CONST_INT)
6203 HOST_WIDE_INT val = INTVAL (index);
6204 /* ??? Can we assume ldrd for thumb2? */
6205 /* Thumb-2 ldrd only has reg+const addressing modes. */
6206 /* ldrd supports offsets of +-1020.
6207 However the ldr fallback does not. */
6208 return val > -256 && val < 256 && (val & 3) == 0;
6216 rtx xiop0 = XEXP (index, 0);
6217 rtx xiop1 = XEXP (index, 1);
6219 return ((arm_address_register_rtx_p (xiop0, strict_p)
6220 && thumb2_index_mul_operand (xiop1))
6221 || (arm_address_register_rtx_p (xiop1, strict_p)
6222 && thumb2_index_mul_operand (xiop0)));
6224 else if (code == ASHIFT)
6226 rtx op = XEXP (index, 1);
6228 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6231 && INTVAL (op) <= 3);
6234 return (code == CONST_INT
6235 && INTVAL (index) < 4096
6236 && INTVAL (index) > -256);
6239 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6241 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6251 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6253 return (regno <= LAST_LO_REGNUM
6254 || regno > LAST_VIRTUAL_REGISTER
6255 || regno == FRAME_POINTER_REGNUM
6256 || (GET_MODE_SIZE (mode) >= 4
6257 && (regno == STACK_POINTER_REGNUM
6258 || regno >= FIRST_PSEUDO_REGISTER
6259 || x == hard_frame_pointer_rtx
6260 || x == arg_pointer_rtx)));
6263 /* Return nonzero if x is a legitimate index register. This is the case
6264 for any base register that can access a QImode object. */
6266 thumb1_index_register_rtx_p (rtx x, int strict_p)
6268 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6271 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6273 The AP may be eliminated to either the SP or the FP, so we use the
6274 least common denominator, e.g. SImode, and offsets from 0 to 64.
6276 ??? Verify whether the above is the right approach.
6278 ??? Also, the FP may be eliminated to the SP, so perhaps that
6279 needs special handling also.
6281 ??? Look at how the mips16 port solves this problem. It probably uses
6282 better ways to solve some of these problems.
6284 Although it is not incorrect, we don't accept QImode and HImode
6285 addresses based on the frame pointer or arg pointer until the
6286 reload pass starts. This is so that eliminating such addresses
6287 into stack based ones won't produce impossible code. */
6289 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6291 /* ??? Not clear if this is right. Experiment. */
6292 if (GET_MODE_SIZE (mode) < 4
6293 && !(reload_in_progress || reload_completed)
6294 && (reg_mentioned_p (frame_pointer_rtx, x)
6295 || reg_mentioned_p (arg_pointer_rtx, x)
6296 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6297 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6298 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6299 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6302 /* Accept any base register. SP only in SImode or larger. */
6303 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6306 /* This is PC relative data before arm_reorg runs. */
6307 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6308 && GET_CODE (x) == SYMBOL_REF
6309 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6312 /* This is PC relative data after arm_reorg runs. */
6313 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6315 && (GET_CODE (x) == LABEL_REF
6316 || (GET_CODE (x) == CONST
6317 && GET_CODE (XEXP (x, 0)) == PLUS
6318 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6319 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6322 /* Post-inc indexing only supported for SImode and larger. */
6323 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6324 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6327 else if (GET_CODE (x) == PLUS)
6329 /* REG+REG address can be any two index registers. */
6330 /* We disallow FRAME+REG addressing since we know that FRAME
6331 will be replaced with STACK, and SP relative addressing only
6332 permits SP+OFFSET. */
6333 if (GET_MODE_SIZE (mode) <= 4
6334 && XEXP (x, 0) != frame_pointer_rtx
6335 && XEXP (x, 1) != frame_pointer_rtx
6336 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6337 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6338 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6341 /* REG+const has 5-7 bit offset for non-SP registers. */
6342 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6343 || XEXP (x, 0) == arg_pointer_rtx)
6344 && CONST_INT_P (XEXP (x, 1))
6345 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6348 /* REG+const has 10-bit offset for SP, but only SImode and
6349 larger is supported. */
6350 /* ??? Should probably check for DI/DFmode overflow here
6351 just like GO_IF_LEGITIMATE_OFFSET does. */
6352 else if (REG_P (XEXP (x, 0))
6353 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6354 && GET_MODE_SIZE (mode) >= 4
6355 && CONST_INT_P (XEXP (x, 1))
6356 && INTVAL (XEXP (x, 1)) >= 0
6357 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6358 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6361 else if (REG_P (XEXP (x, 0))
6362 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6363 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6364 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6365 && REGNO (XEXP (x, 0))
6366 <= LAST_VIRTUAL_POINTER_REGISTER))
6367 && GET_MODE_SIZE (mode) >= 4
6368 && CONST_INT_P (XEXP (x, 1))
6369 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6373 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6374 && GET_MODE_SIZE (mode) == 4
6375 && GET_CODE (x) == SYMBOL_REF
6376 && CONSTANT_POOL_ADDRESS_P (x)
6378 && symbol_mentioned_p (get_pool_constant (x))
6379 && ! pcrel_constant_p (get_pool_constant (x))))
6385 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6386 instruction of mode MODE. */
6388 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6390 switch (GET_MODE_SIZE (mode))
6393 return val >= 0 && val < 32;
6396 return val >= 0 && val < 64 && (val & 1) == 0;
6400 && (val + GET_MODE_SIZE (mode)) <= 128
6406 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6409 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6410 else if (TARGET_THUMB2)
6411 return thumb2_legitimate_address_p (mode, x, strict_p);
6412 else /* if (TARGET_THUMB1) */
6413 return thumb1_legitimate_address_p (mode, x, strict_p);
6416 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6418 Given an rtx X being reloaded into a reg required to be
6419 in class CLASS, return the class of reg to actually use.
6420 In general this is just CLASS, but for the Thumb core registers and
6421 immediate constants we prefer a LO_REGS class or a subset. */
6424 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6430 if (rclass == GENERAL_REGS
6431 || rclass == HI_REGS
6432 || rclass == NO_REGS
6433 || rclass == STACK_REG)
6440 /* Build the SYMBOL_REF for __tls_get_addr. */
6442 static GTY(()) rtx tls_get_addr_libfunc;
6445 get_tls_get_addr (void)
6447 if (!tls_get_addr_libfunc)
6448 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6449 return tls_get_addr_libfunc;
6453 arm_load_tp (rtx target)
6456 target = gen_reg_rtx (SImode);
6460 /* Can return in any reg. */
6461 emit_insn (gen_load_tp_hard (target));
6465 /* Always returned in r0. Immediately copy the result into a pseudo,
6466 otherwise other uses of r0 (e.g. setting up function arguments) may
6467 clobber the value. */
6471 emit_insn (gen_load_tp_soft ());
6473 tmp = gen_rtx_REG (SImode, 0);
6474 emit_move_insn (target, tmp);
6480 load_tls_operand (rtx x, rtx reg)
6484 if (reg == NULL_RTX)
6485 reg = gen_reg_rtx (SImode);
6487 tmp = gen_rtx_CONST (SImode, x);
6489 emit_move_insn (reg, tmp);
6495 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6497 rtx insns, label, labelno, sum;
6499 gcc_assert (reloc != TLS_DESCSEQ);
6502 labelno = GEN_INT (pic_labelno++);
6503 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6504 label = gen_rtx_CONST (VOIDmode, label);
6506 sum = gen_rtx_UNSPEC (Pmode,
6507 gen_rtvec (4, x, GEN_INT (reloc), label,
6508 GEN_INT (TARGET_ARM ? 8 : 4)),
6510 reg = load_tls_operand (sum, reg);
6513 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6515 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6517 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6518 LCT_PURE, /* LCT_CONST? */
6519 Pmode, 1, reg, Pmode);
6521 insns = get_insns ();
6528 arm_tls_descseq_addr (rtx x, rtx reg)
6530 rtx labelno = GEN_INT (pic_labelno++);
6531 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6532 rtx sum = gen_rtx_UNSPEC (Pmode,
6533 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6534 gen_rtx_CONST (VOIDmode, label),
6535 GEN_INT (!TARGET_ARM)),
6537 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6539 emit_insn (gen_tlscall (x, labelno));
6541 reg = gen_reg_rtx (SImode);
6543 gcc_assert (REGNO (reg) != 0);
6545 emit_move_insn (reg, reg0);
6551 legitimize_tls_address (rtx x, rtx reg)
6553 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6554 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6558 case TLS_MODEL_GLOBAL_DYNAMIC:
6559 if (TARGET_GNU2_TLS)
6561 reg = arm_tls_descseq_addr (x, reg);
6563 tp = arm_load_tp (NULL_RTX);
6565 dest = gen_rtx_PLUS (Pmode, tp, reg);
6569 /* Original scheme */
6570 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6571 dest = gen_reg_rtx (Pmode);
6572 emit_libcall_block (insns, dest, ret, x);
6576 case TLS_MODEL_LOCAL_DYNAMIC:
6577 if (TARGET_GNU2_TLS)
6579 reg = arm_tls_descseq_addr (x, reg);
6581 tp = arm_load_tp (NULL_RTX);
6583 dest = gen_rtx_PLUS (Pmode, tp, reg);
6587 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6589 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6590 share the LDM result with other LD model accesses. */
6591 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6593 dest = gen_reg_rtx (Pmode);
6594 emit_libcall_block (insns, dest, ret, eqv);
6596 /* Load the addend. */
6597 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6598 GEN_INT (TLS_LDO32)),
6600 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6601 dest = gen_rtx_PLUS (Pmode, dest, addend);
6605 case TLS_MODEL_INITIAL_EXEC:
6606 labelno = GEN_INT (pic_labelno++);
6607 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6608 label = gen_rtx_CONST (VOIDmode, label);
6609 sum = gen_rtx_UNSPEC (Pmode,
6610 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6611 GEN_INT (TARGET_ARM ? 8 : 4)),
6613 reg = load_tls_operand (sum, reg);
6616 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6617 else if (TARGET_THUMB2)
6618 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6621 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6622 emit_move_insn (reg, gen_const_mem (SImode, reg));
6625 tp = arm_load_tp (NULL_RTX);
6627 return gen_rtx_PLUS (Pmode, tp, reg);
6629 case TLS_MODEL_LOCAL_EXEC:
6630 tp = arm_load_tp (NULL_RTX);
6632 reg = gen_rtx_UNSPEC (Pmode,
6633 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6635 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6637 return gen_rtx_PLUS (Pmode, tp, reg);
6644 /* Try machine-dependent ways of modifying an illegitimate address
6645 to be legitimate. If we find one, return the new, valid address. */
6647 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6651 /* TODO: legitimize_address for Thumb2. */
6654 return thumb_legitimize_address (x, orig_x, mode);
6657 if (arm_tls_symbol_p (x))
6658 return legitimize_tls_address (x, NULL_RTX);
6660 if (GET_CODE (x) == PLUS)
6662 rtx xop0 = XEXP (x, 0);
6663 rtx xop1 = XEXP (x, 1);
6665 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6666 xop0 = force_reg (SImode, xop0);
6668 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6669 xop1 = force_reg (SImode, xop1);
6671 if (ARM_BASE_REGISTER_RTX_P (xop0)
6672 && CONST_INT_P (xop1))
6674 HOST_WIDE_INT n, low_n;
6678 /* VFP addressing modes actually allow greater offsets, but for
6679 now we just stick with the lowest common denominator. */
6681 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6693 low_n = ((mode) == TImode ? 0
6694 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6698 base_reg = gen_reg_rtx (SImode);
6699 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6700 emit_move_insn (base_reg, val);
6701 x = plus_constant (Pmode, base_reg, low_n);
6703 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6704 x = gen_rtx_PLUS (SImode, xop0, xop1);
6707 /* XXX We don't allow MINUS any more -- see comment in
6708 arm_legitimate_address_outer_p (). */
6709 else if (GET_CODE (x) == MINUS)
6711 rtx xop0 = XEXP (x, 0);
6712 rtx xop1 = XEXP (x, 1);
6714 if (CONSTANT_P (xop0))
6715 xop0 = force_reg (SImode, xop0);
6717 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6718 xop1 = force_reg (SImode, xop1);
6720 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6721 x = gen_rtx_MINUS (SImode, xop0, xop1);
6724 /* Make sure to take full advantage of the pre-indexed addressing mode
6725 with absolute addresses which often allows for the base register to
6726 be factorized for multiple adjacent memory references, and it might
6727 even allows for the mini pool to be avoided entirely. */
6728 else if (CONST_INT_P (x) && optimize > 0)
6731 HOST_WIDE_INT mask, base, index;
6734 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6735 use a 8-bit index. So let's use a 12-bit index for SImode only and
6736 hope that arm_gen_constant will enable ldrb to use more bits. */
6737 bits = (mode == SImode) ? 12 : 8;
6738 mask = (1 << bits) - 1;
6739 base = INTVAL (x) & ~mask;
6740 index = INTVAL (x) & mask;
6741 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6743 /* It'll most probably be more efficient to generate the base
6744 with more bits set and use a negative index instead. */
6748 base_reg = force_reg (SImode, GEN_INT (base));
6749 x = plus_constant (Pmode, base_reg, index);
6754 /* We need to find and carefully transform any SYMBOL and LABEL
6755 references; so go back to the original address expression. */
6756 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6758 if (new_x != orig_x)
6766 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6767 to be legitimate. If we find one, return the new, valid address. */
6769 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6771 if (arm_tls_symbol_p (x))
6772 return legitimize_tls_address (x, NULL_RTX);
6774 if (GET_CODE (x) == PLUS
6775 && CONST_INT_P (XEXP (x, 1))
6776 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6777 || INTVAL (XEXP (x, 1)) < 0))
6779 rtx xop0 = XEXP (x, 0);
6780 rtx xop1 = XEXP (x, 1);
6781 HOST_WIDE_INT offset = INTVAL (xop1);
6783 /* Try and fold the offset into a biasing of the base register and
6784 then offsetting that. Don't do this when optimizing for space
6785 since it can cause too many CSEs. */
6786 if (optimize_size && offset >= 0
6787 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6789 HOST_WIDE_INT delta;
6792 delta = offset - (256 - GET_MODE_SIZE (mode));
6793 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6794 delta = 31 * GET_MODE_SIZE (mode);
6796 delta = offset & (~31 * GET_MODE_SIZE (mode));
6798 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6800 x = plus_constant (Pmode, xop0, delta);
6802 else if (offset < 0 && offset > -256)
6803 /* Small negative offsets are best done with a subtract before the
6804 dereference, forcing these into a register normally takes two
6806 x = force_operand (x, NULL_RTX);
6809 /* For the remaining cases, force the constant into a register. */
6810 xop1 = force_reg (SImode, xop1);
6811 x = gen_rtx_PLUS (SImode, xop0, xop1);
6814 else if (GET_CODE (x) == PLUS
6815 && s_register_operand (XEXP (x, 1), SImode)
6816 && !s_register_operand (XEXP (x, 0), SImode))
6818 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6820 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6825 /* We need to find and carefully transform any SYMBOL and LABEL
6826 references; so go back to the original address expression. */
6827 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6829 if (new_x != orig_x)
6837 arm_legitimize_reload_address (rtx *p,
6838 enum machine_mode mode,
6839 int opnum, int type,
6840 int ind_levels ATTRIBUTE_UNUSED)
6842 /* We must recognize output that we have already generated ourselves. */
6843 if (GET_CODE (*p) == PLUS
6844 && GET_CODE (XEXP (*p, 0)) == PLUS
6845 && REG_P (XEXP (XEXP (*p, 0), 0))
6846 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6847 && CONST_INT_P (XEXP (*p, 1)))
6849 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6850 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6851 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6855 if (GET_CODE (*p) == PLUS
6856 && REG_P (XEXP (*p, 0))
6857 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6858 /* If the base register is equivalent to a constant, let the generic
6859 code handle it. Otherwise we will run into problems if a future
6860 reload pass decides to rematerialize the constant. */
6861 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6862 && CONST_INT_P (XEXP (*p, 1)))
6864 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6865 HOST_WIDE_INT low, high;
6867 /* Detect coprocessor load/stores. */
6868 bool coproc_p = ((TARGET_HARD_FLOAT
6870 && (mode == SFmode || mode == DFmode))
6871 || (TARGET_REALLY_IWMMXT
6872 && VALID_IWMMXT_REG_MODE (mode))
6874 && (VALID_NEON_DREG_MODE (mode)
6875 || VALID_NEON_QREG_MODE (mode))));
6877 /* For some conditions, bail out when lower two bits are unaligned. */
6878 if ((val & 0x3) != 0
6879 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6881 /* For DI, and DF under soft-float: */
6882 || ((mode == DImode || mode == DFmode)
6883 /* Without ldrd, we use stm/ldm, which does not
6884 fair well with unaligned bits. */
6886 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6887 || TARGET_THUMB2))))
6890 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6891 of which the (reg+high) gets turned into a reload add insn,
6892 we try to decompose the index into high/low values that can often
6893 also lead to better reload CSE.
6895 ldr r0, [r2, #4100] // Offset too large
6896 ldr r1, [r2, #4104] // Offset too large
6898 is best reloaded as:
6904 which post-reload CSE can simplify in most cases to eliminate the
6905 second add instruction:
6910 The idea here is that we want to split out the bits of the constant
6911 as a mask, rather than as subtracting the maximum offset that the
6912 respective type of load/store used can handle.
6914 When encountering negative offsets, we can still utilize it even if
6915 the overall offset is positive; sometimes this may lead to an immediate
6916 that can be constructed with fewer instructions.
6918 ldr r0, [r2, #0x3FFFFC]
6920 This is best reloaded as:
6921 add t1, r2, #0x400000
6924 The trick for spotting this for a load insn with N bits of offset
6925 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6926 negative offset that is going to make bit N and all the bits below
6927 it become zero in the remainder part.
6929 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6930 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6931 used in most cases of ARM load/store instructions. */
6933 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6934 (((VAL) & ((1 << (N)) - 1)) \
6935 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6940 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6942 /* NEON quad-word load/stores are made of two double-word accesses,
6943 so the valid index range is reduced by 8. Treat as 9-bit range if
6945 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6946 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6948 else if (GET_MODE_SIZE (mode) == 8)
6951 low = (TARGET_THUMB2
6952 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6953 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6955 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6956 to access doublewords. The supported load/store offsets are
6957 -8, -4, and 4, which we try to produce here. */
6958 low = ((val & 0xf) ^ 0x8) - 0x8;
6960 else if (GET_MODE_SIZE (mode) < 8)
6962 /* NEON element load/stores do not have an offset. */
6963 if (TARGET_NEON_FP16 && mode == HFmode)
6968 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6969 Try the wider 12-bit range first, and re-try if the result
6971 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6973 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6977 if (mode == HImode || mode == HFmode)
6980 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6983 /* The storehi/movhi_bytes fallbacks can use only
6984 [-4094,+4094] of the full ldrb/strb index range. */
6985 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6986 if (low == 4095 || low == -4095)
6991 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6997 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6998 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6999 - (unsigned HOST_WIDE_INT) 0x80000000);
7000 /* Check for overflow or zero */
7001 if (low == 0 || high == 0 || (high + low != val))
7004 /* Reload the high part into a base reg; leave the low part
7006 *p = gen_rtx_PLUS (GET_MODE (*p),
7007 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7010 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7011 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7012 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7020 thumb_legitimize_reload_address (rtx *x_p,
7021 enum machine_mode mode,
7022 int opnum, int type,
7023 int ind_levels ATTRIBUTE_UNUSED)
7027 if (GET_CODE (x) == PLUS
7028 && GET_MODE_SIZE (mode) < 4
7029 && REG_P (XEXP (x, 0))
7030 && XEXP (x, 0) == stack_pointer_rtx
7031 && CONST_INT_P (XEXP (x, 1))
7032 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7037 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7038 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7042 /* If both registers are hi-regs, then it's better to reload the
7043 entire expression rather than each register individually. That
7044 only requires one reload register rather than two. */
7045 if (GET_CODE (x) == PLUS
7046 && REG_P (XEXP (x, 0))
7047 && REG_P (XEXP (x, 1))
7048 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7049 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7054 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7055 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7062 /* Test for various thread-local symbols. */
7064 /* Return TRUE if X is a thread-local symbol. */
7067 arm_tls_symbol_p (rtx x)
7069 if (! TARGET_HAVE_TLS)
7072 if (GET_CODE (x) != SYMBOL_REF)
7075 return SYMBOL_REF_TLS_MODEL (x) != 0;
7078 /* Helper for arm_tls_referenced_p. */
7081 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7083 if (GET_CODE (*x) == SYMBOL_REF)
7084 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7086 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7087 TLS offsets, not real symbol references. */
7088 if (GET_CODE (*x) == UNSPEC
7089 && XINT (*x, 1) == UNSPEC_TLS)
7095 /* Return TRUE if X contains any TLS symbol references. */
7098 arm_tls_referenced_p (rtx x)
7100 if (! TARGET_HAVE_TLS)
7103 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7106 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7108 On the ARM, allow any integer (invalid ones are removed later by insn
7109 patterns), nice doubles and symbol_refs which refer to the function's
7112 When generating pic allow anything. */
7115 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7117 /* At present, we have no support for Neon structure constants, so forbid
7118 them here. It might be possible to handle simple cases like 0 and -1
7120 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7123 return flag_pic || !label_mentioned_p (x);
7127 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7129 return (CONST_INT_P (x)
7130 || CONST_DOUBLE_P (x)
7131 || CONSTANT_ADDRESS_P (x)
7136 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7138 return (!arm_cannot_force_const_mem (mode, x)
7140 ? arm_legitimate_constant_p_1 (mode, x)
7141 : thumb_legitimate_constant_p (mode, x)));
7144 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7147 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7151 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7153 split_const (x, &base, &offset);
7154 if (GET_CODE (base) == SYMBOL_REF
7155 && !offset_within_block_p (base, INTVAL (offset)))
7158 return arm_tls_referenced_p (x);
7161 #define REG_OR_SUBREG_REG(X) \
7163 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7165 #define REG_OR_SUBREG_RTX(X) \
7166 (REG_P (X) ? (X) : SUBREG_REG (X))
7169 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7171 enum machine_mode mode = GET_MODE (x);
7180 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7187 return COSTS_N_INSNS (1);
7190 if (CONST_INT_P (XEXP (x, 1)))
7193 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7200 return COSTS_N_INSNS (2) + cycles;
7202 return COSTS_N_INSNS (1) + 16;
7205 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7207 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7208 return (COSTS_N_INSNS (words)
7209 + 4 * ((MEM_P (SET_SRC (x)))
7210 + MEM_P (SET_DEST (x))));
7215 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7217 if (thumb_shiftable_const (INTVAL (x)))
7218 return COSTS_N_INSNS (2);
7219 return COSTS_N_INSNS (3);
7221 else if ((outer == PLUS || outer == COMPARE)
7222 && INTVAL (x) < 256 && INTVAL (x) > -256)
7224 else if ((outer == IOR || outer == XOR || outer == AND)
7225 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7226 return COSTS_N_INSNS (1);
7227 else if (outer == AND)
7230 /* This duplicates the tests in the andsi3 expander. */
7231 for (i = 9; i <= 31; i++)
7232 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7233 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7234 return COSTS_N_INSNS (2);
7236 else if (outer == ASHIFT || outer == ASHIFTRT
7237 || outer == LSHIFTRT)
7239 return COSTS_N_INSNS (2);
7245 return COSTS_N_INSNS (3);
7263 /* XXX another guess. */
7264 /* Memory costs quite a lot for the first word, but subsequent words
7265 load at the equivalent of a single insn each. */
7266 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7267 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7272 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7278 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7279 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7285 return total + COSTS_N_INSNS (1);
7287 /* Assume a two-shift sequence. Increase the cost slightly so
7288 we prefer actual shifts over an extend operation. */
7289 return total + 1 + COSTS_N_INSNS (2);
7297 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7299 enum machine_mode mode = GET_MODE (x);
7300 enum rtx_code subcode;
7302 enum rtx_code code = GET_CODE (x);
7308 /* Memory costs quite a lot for the first word, but subsequent words
7309 load at the equivalent of a single insn each. */
7310 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7317 if (TARGET_HARD_FLOAT && mode == SFmode)
7318 *total = COSTS_N_INSNS (2);
7319 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7320 *total = COSTS_N_INSNS (4);
7322 *total = COSTS_N_INSNS (20);
7326 if (REG_P (XEXP (x, 1)))
7327 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7328 else if (!CONST_INT_P (XEXP (x, 1)))
7329 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7335 *total += COSTS_N_INSNS (4);
7340 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7341 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7344 *total += COSTS_N_INSNS (3);
7348 *total += COSTS_N_INSNS (1);
7349 /* Increase the cost of complex shifts because they aren't any faster,
7350 and reduce dual issue opportunities. */
7351 if (arm_tune_cortex_a9
7352 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7360 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7361 if (CONST_INT_P (XEXP (x, 0))
7362 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7364 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7368 if (CONST_INT_P (XEXP (x, 1))
7369 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7371 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7378 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7380 if (TARGET_HARD_FLOAT
7382 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7384 *total = COSTS_N_INSNS (1);
7385 if (CONST_DOUBLE_P (XEXP (x, 0))
7386 && arm_const_double_rtx (XEXP (x, 0)))
7388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7392 if (CONST_DOUBLE_P (XEXP (x, 1))
7393 && arm_const_double_rtx (XEXP (x, 1)))
7395 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7401 *total = COSTS_N_INSNS (20);
7405 *total = COSTS_N_INSNS (1);
7406 if (CONST_INT_P (XEXP (x, 0))
7407 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7409 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7413 subcode = GET_CODE (XEXP (x, 1));
7414 if (subcode == ASHIFT || subcode == ASHIFTRT
7415 || subcode == LSHIFTRT
7416 || subcode == ROTATE || subcode == ROTATERT)
7418 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7419 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7423 /* A shift as a part of RSB costs no more than RSB itself. */
7424 if (GET_CODE (XEXP (x, 0)) == MULT
7425 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7427 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7428 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7433 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7435 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7436 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7440 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7441 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7443 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7444 if (REG_P (XEXP (XEXP (x, 1), 0))
7445 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7446 *total += COSTS_N_INSNS (1);
7454 if (code == PLUS && arm_arch6 && mode == SImode
7455 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7456 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7458 *total = COSTS_N_INSNS (1);
7459 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7461 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7465 /* MLA: All arguments must be registers. We filter out
7466 multiplication by a power of two, so that we fall down into
7468 if (GET_CODE (XEXP (x, 0)) == MULT
7469 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7471 /* The cost comes from the cost of the multiply. */
7475 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7477 if (TARGET_HARD_FLOAT
7479 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7481 *total = COSTS_N_INSNS (1);
7482 if (CONST_DOUBLE_P (XEXP (x, 1))
7483 && arm_const_double_rtx (XEXP (x, 1)))
7485 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7492 *total = COSTS_N_INSNS (20);
7496 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7497 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7499 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7500 if (REG_P (XEXP (XEXP (x, 0), 0))
7501 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7502 *total += COSTS_N_INSNS (1);
7508 case AND: case XOR: case IOR:
7510 /* Normally the frame registers will be spilt into reg+const during
7511 reload, so it is a bad idea to combine them with other instructions,
7512 since then they might not be moved outside of loops. As a compromise
7513 we allow integration with ops that have a constant as their second
7515 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7516 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7517 && !CONST_INT_P (XEXP (x, 1)))
7518 *total = COSTS_N_INSNS (1);
7522 *total += COSTS_N_INSNS (2);
7523 if (CONST_INT_P (XEXP (x, 1))
7524 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7526 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7533 *total += COSTS_N_INSNS (1);
7534 if (CONST_INT_P (XEXP (x, 1))
7535 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7537 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7540 subcode = GET_CODE (XEXP (x, 0));
7541 if (subcode == ASHIFT || subcode == ASHIFTRT
7542 || subcode == LSHIFTRT
7543 || subcode == ROTATE || subcode == ROTATERT)
7545 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7551 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7553 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7558 if (subcode == UMIN || subcode == UMAX
7559 || subcode == SMIN || subcode == SMAX)
7561 *total = COSTS_N_INSNS (3);
7568 /* This should have been handled by the CPU specific routines. */
7572 if (arm_arch3m && mode == SImode
7573 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7574 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7575 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7576 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7577 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7578 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7580 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7583 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7587 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7589 if (TARGET_HARD_FLOAT
7591 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7593 *total = COSTS_N_INSNS (1);
7596 *total = COSTS_N_INSNS (2);
7602 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7603 if (mode == SImode && code == NOT)
7605 subcode = GET_CODE (XEXP (x, 0));
7606 if (subcode == ASHIFT || subcode == ASHIFTRT
7607 || subcode == LSHIFTRT
7608 || subcode == ROTATE || subcode == ROTATERT
7610 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7612 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7613 /* Register shifts cost an extra cycle. */
7614 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7615 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7624 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7626 *total = COSTS_N_INSNS (4);
7630 operand = XEXP (x, 0);
7632 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7633 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7634 && REG_P (XEXP (operand, 0))
7635 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7636 *total += COSTS_N_INSNS (1);
7637 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7638 + rtx_cost (XEXP (x, 2), code, 2, speed));
7642 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7644 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7650 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7651 && mode == SImode && XEXP (x, 1) == const0_rtx)
7653 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7659 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7660 && mode == SImode && XEXP (x, 1) == const0_rtx)
7662 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7682 /* SCC insns. In the case where the comparison has already been
7683 performed, then they cost 2 instructions. Otherwise they need
7684 an additional comparison before them. */
7685 *total = COSTS_N_INSNS (2);
7686 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7693 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7699 *total += COSTS_N_INSNS (1);
7700 if (CONST_INT_P (XEXP (x, 1))
7701 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7703 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7707 subcode = GET_CODE (XEXP (x, 0));
7708 if (subcode == ASHIFT || subcode == ASHIFTRT
7709 || subcode == LSHIFTRT
7710 || subcode == ROTATE || subcode == ROTATERT)
7712 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7713 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7718 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7720 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7721 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7731 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7732 if (!CONST_INT_P (XEXP (x, 1))
7733 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7734 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7738 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7740 if (TARGET_HARD_FLOAT
7742 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7744 *total = COSTS_N_INSNS (1);
7747 *total = COSTS_N_INSNS (20);
7750 *total = COSTS_N_INSNS (1);
7752 *total += COSTS_N_INSNS (3);
7758 if (GET_MODE_CLASS (mode) == MODE_INT)
7760 rtx op = XEXP (x, 0);
7761 enum machine_mode opmode = GET_MODE (op);
7764 *total += COSTS_N_INSNS (1);
7766 if (opmode != SImode)
7770 /* If !arm_arch4, we use one of the extendhisi2_mem
7771 or movhi_bytes patterns for HImode. For a QImode
7772 sign extension, we first zero-extend from memory
7773 and then perform a shift sequence. */
7774 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7775 *total += COSTS_N_INSNS (2);
7778 *total += COSTS_N_INSNS (1);
7780 /* We don't have the necessary insn, so we need to perform some
7782 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7783 /* An and with constant 255. */
7784 *total += COSTS_N_INSNS (1);
7786 /* A shift sequence. Increase costs slightly to avoid
7787 combining two shifts into an extend operation. */
7788 *total += COSTS_N_INSNS (2) + 1;
7794 switch (GET_MODE (XEXP (x, 0)))
7801 *total = COSTS_N_INSNS (1);
7811 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7815 if (const_ok_for_arm (INTVAL (x))
7816 || const_ok_for_arm (~INTVAL (x)))
7817 *total = COSTS_N_INSNS (1);
7819 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7820 INTVAL (x), NULL_RTX,
7827 *total = COSTS_N_INSNS (3);
7831 *total = COSTS_N_INSNS (1);
7835 *total = COSTS_N_INSNS (1);
7836 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7840 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7841 && (mode == SFmode || !TARGET_VFP_SINGLE))
7842 *total = COSTS_N_INSNS (1);
7844 *total = COSTS_N_INSNS (4);
7848 /* The vec_extract patterns accept memory operands that require an
7849 address reload. Account for the cost of that reload to give the
7850 auto-inc-dec pass an incentive to try to replace them. */
7851 if (TARGET_NEON && MEM_P (SET_DEST (x))
7852 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7854 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7855 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7856 *total += COSTS_N_INSNS (1);
7859 /* Likewise for the vec_set patterns. */
7860 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7861 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7862 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7864 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7865 *total = rtx_cost (mem, code, 0, speed);
7866 if (!neon_vector_mem_operand (mem, 2))
7867 *total += COSTS_N_INSNS (1);
7873 /* We cost this as high as our memory costs to allow this to
7874 be hoisted from loops. */
7875 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7877 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7883 && TARGET_HARD_FLOAT
7885 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7886 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7887 *total = COSTS_N_INSNS (1);
7889 *total = COSTS_N_INSNS (4);
7893 *total = COSTS_N_INSNS (4);
7898 /* Estimates the size cost of thumb1 instructions.
7899 For now most of the code is copied from thumb1_rtx_costs. We need more
7900 fine grain tuning when we have more related test cases. */
7902 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7904 enum machine_mode mode = GET_MODE (x);
7913 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7920 return COSTS_N_INSNS (1);
7923 if (CONST_INT_P (XEXP (x, 1)))
7925 /* Thumb1 mul instruction can't operate on const. We must Load it
7926 into a register first. */
7927 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7928 return COSTS_N_INSNS (1) + const_size;
7930 return COSTS_N_INSNS (1);
7933 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7935 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7936 return (COSTS_N_INSNS (words)
7937 + 4 * ((MEM_P (SET_SRC (x)))
7938 + MEM_P (SET_DEST (x))));
7943 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7944 return COSTS_N_INSNS (1);
7945 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7946 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7947 return COSTS_N_INSNS (2);
7948 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7949 if (thumb_shiftable_const (INTVAL (x)))
7950 return COSTS_N_INSNS (2);
7951 return COSTS_N_INSNS (3);
7953 else if ((outer == PLUS || outer == COMPARE)
7954 && INTVAL (x) < 256 && INTVAL (x) > -256)
7956 else if ((outer == IOR || outer == XOR || outer == AND)
7957 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7958 return COSTS_N_INSNS (1);
7959 else if (outer == AND)
7962 /* This duplicates the tests in the andsi3 expander. */
7963 for (i = 9; i <= 31; i++)
7964 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7965 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7966 return COSTS_N_INSNS (2);
7968 else if (outer == ASHIFT || outer == ASHIFTRT
7969 || outer == LSHIFTRT)
7971 return COSTS_N_INSNS (2);
7977 return COSTS_N_INSNS (3);
7995 /* XXX another guess. */
7996 /* Memory costs quite a lot for the first word, but subsequent words
7997 load at the equivalent of a single insn each. */
7998 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7999 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8004 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8009 /* XXX still guessing. */
8010 switch (GET_MODE (XEXP (x, 0)))
8013 return (1 + (mode == DImode ? 4 : 0)
8014 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8017 return (4 + (mode == DImode ? 4 : 0)
8018 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8021 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8032 /* RTX costs when optimizing for size. */
8034 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8037 enum machine_mode mode = GET_MODE (x);
8040 *total = thumb1_size_rtx_costs (x, code, outer_code);
8044 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8048 /* A memory access costs 1 insn if the mode is small, or the address is
8049 a single register, otherwise it costs one insn per word. */
8050 if (REG_P (XEXP (x, 0)))
8051 *total = COSTS_N_INSNS (1);
8053 && GET_CODE (XEXP (x, 0)) == PLUS
8054 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8055 /* This will be split into two instructions.
8056 See arm.md:calculate_pic_address. */
8057 *total = COSTS_N_INSNS (2);
8059 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8066 /* Needs a libcall, so it costs about this. */
8067 *total = COSTS_N_INSNS (2);
8071 if (mode == SImode && REG_P (XEXP (x, 1)))
8073 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8081 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8083 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8086 else if (mode == SImode)
8088 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8089 /* Slightly disparage register shifts, but not by much. */
8090 if (!CONST_INT_P (XEXP (x, 1)))
8091 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8095 /* Needs a libcall. */
8096 *total = COSTS_N_INSNS (2);
8100 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8101 && (mode == SFmode || !TARGET_VFP_SINGLE))
8103 *total = COSTS_N_INSNS (1);
8109 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8110 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8112 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8113 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8114 || subcode1 == ROTATE || subcode1 == ROTATERT
8115 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8116 || subcode1 == ASHIFTRT)
8118 /* It's just the cost of the two operands. */
8123 *total = COSTS_N_INSNS (1);
8127 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8131 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8132 && (mode == SFmode || !TARGET_VFP_SINGLE))
8134 *total = COSTS_N_INSNS (1);
8138 /* A shift as a part of ADD costs nothing. */
8139 if (GET_CODE (XEXP (x, 0)) == MULT
8140 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8142 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8143 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8144 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8149 case AND: case XOR: case IOR:
8152 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8154 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8155 || subcode == LSHIFTRT || subcode == ASHIFTRT
8156 || (code == AND && subcode == NOT))
8158 /* It's just the cost of the two operands. */
8164 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8168 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8172 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8173 && (mode == SFmode || !TARGET_VFP_SINGLE))
8175 *total = COSTS_N_INSNS (1);
8181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8190 if (cc_register (XEXP (x, 0), VOIDmode))
8193 *total = COSTS_N_INSNS (1);
8197 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8198 && (mode == SFmode || !TARGET_VFP_SINGLE))
8199 *total = COSTS_N_INSNS (1);
8201 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8206 return arm_rtx_costs_1 (x, outer_code, total, 0);
8209 if (const_ok_for_arm (INTVAL (x)))
8210 /* A multiplication by a constant requires another instruction
8211 to load the constant to a register. */
8212 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8214 else if (const_ok_for_arm (~INTVAL (x)))
8215 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8216 else if (const_ok_for_arm (-INTVAL (x)))
8218 if (outer_code == COMPARE || outer_code == PLUS
8219 || outer_code == MINUS)
8222 *total = COSTS_N_INSNS (1);
8225 *total = COSTS_N_INSNS (2);
8231 *total = COSTS_N_INSNS (2);
8235 *total = COSTS_N_INSNS (4);
8240 && TARGET_HARD_FLOAT
8241 && outer_code == SET
8242 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8243 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8244 *total = COSTS_N_INSNS (1);
8246 *total = COSTS_N_INSNS (4);
8251 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8252 cost of these slightly. */
8253 *total = COSTS_N_INSNS (1) + 1;
8260 if (mode != VOIDmode)
8261 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8263 *total = COSTS_N_INSNS (4); /* How knows? */
8268 /* RTX costs when optimizing for size. */
8270 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8271 int *total, bool speed)
8274 return arm_size_rtx_costs (x, (enum rtx_code) code,
8275 (enum rtx_code) outer_code, total);
8277 return current_tune->rtx_costs (x, (enum rtx_code) code,
8278 (enum rtx_code) outer_code,
8282 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8283 supported on any "slowmul" cores, so it can be ignored. */
8286 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8287 int *total, bool speed)
8289 enum machine_mode mode = GET_MODE (x);
8293 *total = thumb1_rtx_costs (x, code, outer_code);
8300 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8303 *total = COSTS_N_INSNS (20);
8307 if (CONST_INT_P (XEXP (x, 1)))
8309 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8310 & (unsigned HOST_WIDE_INT) 0xffffffff);
8311 int cost, const_ok = const_ok_for_arm (i);
8312 int j, booth_unit_size;
8314 /* Tune as appropriate. */
8315 cost = const_ok ? 4 : 8;
8316 booth_unit_size = 2;
8317 for (j = 0; i && j < 32; j += booth_unit_size)
8319 i >>= booth_unit_size;
8323 *total = COSTS_N_INSNS (cost);
8324 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8328 *total = COSTS_N_INSNS (20);
8332 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8337 /* RTX cost for cores with a fast multiply unit (M variants). */
8340 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8341 int *total, bool speed)
8343 enum machine_mode mode = GET_MODE (x);
8347 *total = thumb1_rtx_costs (x, code, outer_code);
8351 /* ??? should thumb2 use different costs? */
8355 /* There is no point basing this on the tuning, since it is always the
8356 fast variant if it exists at all. */
8358 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8359 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8360 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8362 *total = COSTS_N_INSNS(2);
8369 *total = COSTS_N_INSNS (5);
8373 if (CONST_INT_P (XEXP (x, 1)))
8375 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8376 & (unsigned HOST_WIDE_INT) 0xffffffff);
8377 int cost, const_ok = const_ok_for_arm (i);
8378 int j, booth_unit_size;
8380 /* Tune as appropriate. */
8381 cost = const_ok ? 4 : 8;
8382 booth_unit_size = 8;
8383 for (j = 0; i && j < 32; j += booth_unit_size)
8385 i >>= booth_unit_size;
8389 *total = COSTS_N_INSNS(cost);
8395 *total = COSTS_N_INSNS (4);
8399 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8401 if (TARGET_HARD_FLOAT
8403 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8405 *total = COSTS_N_INSNS (1);
8410 /* Requires a lib call */
8411 *total = COSTS_N_INSNS (20);
8415 return arm_rtx_costs_1 (x, outer_code, total, speed);
8420 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8421 so it can be ignored. */
8424 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8425 int *total, bool speed)
8427 enum machine_mode mode = GET_MODE (x);
8431 *total = thumb1_rtx_costs (x, code, outer_code);
8438 if (GET_CODE (XEXP (x, 0)) != MULT)
8439 return arm_rtx_costs_1 (x, outer_code, total, speed);
8441 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8442 will stall until the multiplication is complete. */
8443 *total = COSTS_N_INSNS (3);
8447 /* There is no point basing this on the tuning, since it is always the
8448 fast variant if it exists at all. */
8450 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8451 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8452 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8454 *total = COSTS_N_INSNS (2);
8461 *total = COSTS_N_INSNS (5);
8465 if (CONST_INT_P (XEXP (x, 1)))
8467 /* If operand 1 is a constant we can more accurately
8468 calculate the cost of the multiply. The multiplier can
8469 retire 15 bits on the first cycle and a further 12 on the
8470 second. We do, of course, have to load the constant into
8471 a register first. */
8472 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8473 /* There's a general overhead of one cycle. */
8475 unsigned HOST_WIDE_INT masked_const;
8480 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8482 masked_const = i & 0xffff8000;
8483 if (masked_const != 0)
8486 masked_const = i & 0xf8000000;
8487 if (masked_const != 0)
8490 *total = COSTS_N_INSNS (cost);
8496 *total = COSTS_N_INSNS (3);
8500 /* Requires a lib call */
8501 *total = COSTS_N_INSNS (20);
8505 return arm_rtx_costs_1 (x, outer_code, total, speed);
8510 /* RTX costs for 9e (and later) cores. */
8513 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8514 int *total, bool speed)
8516 enum machine_mode mode = GET_MODE (x);
8523 *total = COSTS_N_INSNS (3);
8527 *total = thumb1_rtx_costs (x, code, outer_code);
8535 /* There is no point basing this on the tuning, since it is always the
8536 fast variant if it exists at all. */
8538 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8539 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8540 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8542 *total = COSTS_N_INSNS (2);
8549 *total = COSTS_N_INSNS (5);
8555 *total = COSTS_N_INSNS (2);
8559 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8561 if (TARGET_HARD_FLOAT
8563 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8565 *total = COSTS_N_INSNS (1);
8570 *total = COSTS_N_INSNS (20);
8574 return arm_rtx_costs_1 (x, outer_code, total, speed);
8577 /* All address computations that can be done are free, but rtx cost returns
8578 the same for practically all of them. So we weight the different types
8579 of address here in the order (most pref first):
8580 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8582 arm_arm_address_cost (rtx x)
8584 enum rtx_code c = GET_CODE (x);
8586 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8588 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8593 if (CONST_INT_P (XEXP (x, 1)))
8596 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8606 arm_thumb_address_cost (rtx x)
8608 enum rtx_code c = GET_CODE (x);
8613 && REG_P (XEXP (x, 0))
8614 && CONST_INT_P (XEXP (x, 1)))
8621 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8622 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8624 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8627 /* Adjust cost hook for XScale. */
8629 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8631 /* Some true dependencies can have a higher cost depending
8632 on precisely how certain input operands are used. */
8633 if (REG_NOTE_KIND(link) == 0
8634 && recog_memoized (insn) >= 0
8635 && recog_memoized (dep) >= 0)
8637 int shift_opnum = get_attr_shift (insn);
8638 enum attr_type attr_type = get_attr_type (dep);
8640 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8641 operand for INSN. If we have a shifted input operand and the
8642 instruction we depend on is another ALU instruction, then we may
8643 have to account for an additional stall. */
8644 if (shift_opnum != 0
8645 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8647 rtx shifted_operand;
8650 /* Get the shifted operand. */
8651 extract_insn (insn);
8652 shifted_operand = recog_data.operand[shift_opnum];
8654 /* Iterate over all the operands in DEP. If we write an operand
8655 that overlaps with SHIFTED_OPERAND, then we have increase the
8656 cost of this dependency. */
8658 preprocess_constraints ();
8659 for (opno = 0; opno < recog_data.n_operands; opno++)
8661 /* We can ignore strict inputs. */
8662 if (recog_data.operand_type[opno] == OP_IN)
8665 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8677 /* Adjust cost hook for Cortex A9. */
8679 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8681 switch (REG_NOTE_KIND (link))
8688 case REG_DEP_OUTPUT:
8689 if (recog_memoized (insn) >= 0
8690 && recog_memoized (dep) >= 0)
8692 if (GET_CODE (PATTERN (insn)) == SET)
8695 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8697 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8699 enum attr_type attr_type_insn = get_attr_type (insn);
8700 enum attr_type attr_type_dep = get_attr_type (dep);
8702 /* By default all dependencies of the form
8705 have an extra latency of 1 cycle because
8706 of the input and output dependency in this
8707 case. However this gets modeled as an true
8708 dependency and hence all these checks. */
8709 if (REG_P (SET_DEST (PATTERN (insn)))
8710 && REG_P (SET_DEST (PATTERN (dep)))
8711 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8712 SET_DEST (PATTERN (dep))))
8714 /* FMACS is a special case where the dependent
8715 instruction can be issued 3 cycles before
8716 the normal latency in case of an output
8718 if ((attr_type_insn == TYPE_FMACS
8719 || attr_type_insn == TYPE_FMACD)
8720 && (attr_type_dep == TYPE_FMACS
8721 || attr_type_dep == TYPE_FMACD))
8723 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8724 *cost = insn_default_latency (dep) - 3;
8726 *cost = insn_default_latency (dep);
8731 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8732 *cost = insn_default_latency (dep) + 1;
8734 *cost = insn_default_latency (dep);
8750 /* Adjust cost hook for FA726TE. */
8752 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8754 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8755 have penalty of 3. */
8756 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8757 && recog_memoized (insn) >= 0
8758 && recog_memoized (dep) >= 0
8759 && get_attr_conds (dep) == CONDS_SET)
8761 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8762 if (get_attr_conds (insn) == CONDS_USE
8763 && get_attr_type (insn) != TYPE_BRANCH)
8769 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8770 || get_attr_conds (insn) == CONDS_USE)
8780 /* Implement TARGET_REGISTER_MOVE_COST.
8782 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8783 it is typically more expensive than a single memory access. We set
8784 the cost to less than two memory accesses so that floating
8785 point to integer conversion does not go through memory. */
8788 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8789 reg_class_t from, reg_class_t to)
8793 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8794 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8796 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8797 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8799 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8806 if (from == HI_REGS || to == HI_REGS)
8813 /* Implement TARGET_MEMORY_MOVE_COST. */
8816 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8817 bool in ATTRIBUTE_UNUSED)
8823 if (GET_MODE_SIZE (mode) < 4)
8826 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8830 /* Vectorizer cost model implementation. */
8832 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8834 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8836 int misalign ATTRIBUTE_UNUSED)
8840 switch (type_of_cost)
8843 return current_tune->vec_costs->scalar_stmt_cost;
8846 return current_tune->vec_costs->scalar_load_cost;
8849 return current_tune->vec_costs->scalar_store_cost;
8852 return current_tune->vec_costs->vec_stmt_cost;
8855 return current_tune->vec_costs->vec_align_load_cost;
8858 return current_tune->vec_costs->vec_store_cost;
8861 return current_tune->vec_costs->vec_to_scalar_cost;
8864 return current_tune->vec_costs->scalar_to_vec_cost;
8866 case unaligned_load:
8867 return current_tune->vec_costs->vec_unalign_load_cost;
8869 case unaligned_store:
8870 return current_tune->vec_costs->vec_unalign_store_cost;
8872 case cond_branch_taken:
8873 return current_tune->vec_costs->cond_taken_branch_cost;
8875 case cond_branch_not_taken:
8876 return current_tune->vec_costs->cond_not_taken_branch_cost;
8879 case vec_promote_demote:
8880 return current_tune->vec_costs->vec_stmt_cost;
8883 elements = TYPE_VECTOR_SUBPARTS (vectype);
8884 return elements / 2 + 1;
8891 /* Implement targetm.vectorize.add_stmt_cost. */
8894 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8895 struct _stmt_vec_info *stmt_info, int misalign,
8896 enum vect_cost_model_location where)
8898 unsigned *cost = (unsigned *) data;
8899 unsigned retval = 0;
8901 if (flag_vect_cost_model)
8903 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8904 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8906 /* Statements in an inner loop relative to the loop being
8907 vectorized are weighted more heavily. The value here is
8908 arbitrary and could potentially be improved with analysis. */
8909 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8910 count *= 50; /* FIXME. */
8912 retval = (unsigned) (count * stmt_cost);
8913 cost[where] += retval;
8919 /* Return true if and only if this insn can dual-issue only as older. */
8921 cortexa7_older_only (rtx insn)
8923 if (recog_memoized (insn) < 0)
8926 if (get_attr_insn (insn) == INSN_MOV)
8929 switch (get_attr_type (insn))
8932 case TYPE_LOAD_BYTE:
8961 /* Return true if and only if this insn can dual-issue as younger. */
8963 cortexa7_younger (FILE *file, int verbose, rtx insn)
8965 if (recog_memoized (insn) < 0)
8968 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
8972 if (get_attr_insn (insn) == INSN_MOV)
8975 switch (get_attr_type (insn))
8977 case TYPE_SIMPLE_ALU_IMM:
8978 case TYPE_SIMPLE_ALU_SHIFT:
8988 /* Look for an instruction that can dual issue only as an older
8989 instruction, and move it in front of any instructions that can
8990 dual-issue as younger, while preserving the relative order of all
8991 other instructions in the ready list. This is a hueuristic to help
8992 dual-issue in later cycles, by postponing issue of more flexible
8993 instructions. This heuristic may affect dual issue opportunities
8994 in the current cycle. */
8996 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9000 int first_older_only = -1, first_younger = -1;
9004 ";; sched_reorder for cycle %d with %d insns in ready list\n",
9008 /* Traverse the ready list from the head (the instruction to issue
9009 first), and looking for the first instruction that can issue as
9010 younger and the first instruction that can dual-issue only as
9012 for (i = *n_readyp - 1; i >= 0; i--)
9014 rtx insn = ready[i];
9015 if (cortexa7_older_only (insn))
9017 first_older_only = i;
9019 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
9022 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
9026 /* Nothing to reorder because either no younger insn found or insn
9027 that can dual-issue only as older appears before any insn that
9028 can dual-issue as younger. */
9029 if (first_younger == -1)
9032 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
9036 /* Nothing to reorder because no older-only insn in the ready list. */
9037 if (first_older_only == -1)
9040 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
9044 /* Move first_older_only insn before first_younger. */
9046 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
9047 INSN_UID(ready [first_older_only]),
9048 INSN_UID(ready [first_younger]));
9049 rtx first_older_only_insn = ready [first_older_only];
9050 for (i = first_older_only; i < first_younger; i++)
9052 ready[i] = ready[i+1];
9055 ready[i] = first_older_only_insn;
9059 /* Implement TARGET_SCHED_REORDER. */
9061 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9067 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9070 /* Do nothing for other cores. */
9074 return arm_issue_rate ();
9077 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9078 It corrects the value of COST based on the relationship between
9079 INSN and DEP through the dependence LINK. It returns the new
9080 value. There is a per-core adjust_cost hook to adjust scheduler costs
9081 and the per-core hook can choose to completely override the generic
9082 adjust_cost function. Only put bits of code into arm_adjust_cost that
9083 are common across all cores. */
9085 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9089 /* When generating Thumb-1 code, we want to place flag-setting operations
9090 close to a conditional branch which depends on them, so that we can
9091 omit the comparison. */
9093 && REG_NOTE_KIND (link) == 0
9094 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9095 && recog_memoized (dep) >= 0
9096 && get_attr_conds (dep) == CONDS_SET)
9099 if (current_tune->sched_adjust_cost != NULL)
9101 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9105 /* XXX Is this strictly true? */
9106 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9107 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9110 /* Call insns don't incur a stall, even if they follow a load. */
9111 if (REG_NOTE_KIND (link) == 0
9115 if ((i_pat = single_set (insn)) != NULL
9116 && MEM_P (SET_SRC (i_pat))
9117 && (d_pat = single_set (dep)) != NULL
9118 && MEM_P (SET_DEST (d_pat)))
9120 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9121 /* This is a load after a store, there is no conflict if the load reads
9122 from a cached area. Assume that loads from the stack, and from the
9123 constant pool are cached, and that others will miss. This is a
9126 if ((GET_CODE (src_mem) == SYMBOL_REF
9127 && CONSTANT_POOL_ADDRESS_P (src_mem))
9128 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9129 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9130 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9138 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9141 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9143 return (optimize > 0) ? 2 : 0;
9147 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9149 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9152 static bool fp_consts_inited = false;
9154 static REAL_VALUE_TYPE value_fp0;
9157 init_fp_table (void)
9161 r = REAL_VALUE_ATOF ("0", DFmode);
9163 fp_consts_inited = true;
9166 /* Return TRUE if rtx X is a valid immediate FP constant. */
9168 arm_const_double_rtx (rtx x)
9172 if (!fp_consts_inited)
9175 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9176 if (REAL_VALUE_MINUS_ZERO (r))
9179 if (REAL_VALUES_EQUAL (r, value_fp0))
9185 /* VFPv3 has a fairly wide range of representable immediates, formed from
9186 "quarter-precision" floating-point values. These can be evaluated using this
9187 formula (with ^ for exponentiation):
9191 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9192 16 <= n <= 31 and 0 <= r <= 7.
9194 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9196 - A (most-significant) is the sign bit.
9197 - BCD are the exponent (encoded as r XOR 3).
9198 - EFGH are the mantissa (encoded as n - 16).
9201 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9202 fconst[sd] instruction, or -1 if X isn't suitable. */
9204 vfp3_const_double_index (rtx x)
9206 REAL_VALUE_TYPE r, m;
9208 unsigned HOST_WIDE_INT mantissa, mant_hi;
9209 unsigned HOST_WIDE_INT mask;
9210 HOST_WIDE_INT m1, m2;
9211 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9213 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9216 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9218 /* We can't represent these things, so detect them first. */
9219 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9222 /* Extract sign, exponent and mantissa. */
9223 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9224 r = real_value_abs (&r);
9225 exponent = REAL_EXP (&r);
9226 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9227 highest (sign) bit, with a fixed binary point at bit point_pos.
9228 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9229 bits for the mantissa, this may fail (low bits would be lost). */
9230 real_ldexp (&m, &r, point_pos - exponent);
9231 REAL_VALUE_TO_INT (&m1, &m2, m);
9235 /* If there are bits set in the low part of the mantissa, we can't
9236 represent this value. */
9240 /* Now make it so that mantissa contains the most-significant bits, and move
9241 the point_pos to indicate that the least-significant bits have been
9243 point_pos -= HOST_BITS_PER_WIDE_INT;
9246 /* We can permit four significant bits of mantissa only, plus a high bit
9247 which is always 1. */
9248 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9249 if ((mantissa & mask) != 0)
9252 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9253 mantissa >>= point_pos - 5;
9255 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9256 floating-point immediate zero with Neon using an integer-zero load, but
9257 that case is handled elsewhere.) */
9261 gcc_assert (mantissa >= 16 && mantissa <= 31);
9263 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9264 normalized significands are in the range [1, 2). (Our mantissa is shifted
9265 left 4 places at this point relative to normalized IEEE754 values). GCC
9266 internally uses [0.5, 1) (see real.c), so the exponent returned from
9267 REAL_EXP must be altered. */
9268 exponent = 5 - exponent;
9270 if (exponent < 0 || exponent > 7)
9273 /* Sign, mantissa and exponent are now in the correct form to plug into the
9274 formula described in the comment above. */
9275 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9278 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9280 vfp3_const_double_rtx (rtx x)
9285 return vfp3_const_double_index (x) != -1;
9288 /* Recognize immediates which can be used in various Neon instructions. Legal
9289 immediates are described by the following table (for VMVN variants, the
9290 bitwise inverse of the constant shown is recognized. In either case, VMOV
9291 is output and the correct instruction to use for a given constant is chosen
9292 by the assembler). The constant shown is replicated across all elements of
9293 the destination vector.
9295 insn elems variant constant (binary)
9296 ---- ----- ------- -----------------
9297 vmov i32 0 00000000 00000000 00000000 abcdefgh
9298 vmov i32 1 00000000 00000000 abcdefgh 00000000
9299 vmov i32 2 00000000 abcdefgh 00000000 00000000
9300 vmov i32 3 abcdefgh 00000000 00000000 00000000
9301 vmov i16 4 00000000 abcdefgh
9302 vmov i16 5 abcdefgh 00000000
9303 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9304 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9305 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9306 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9307 vmvn i16 10 00000000 abcdefgh
9308 vmvn i16 11 abcdefgh 00000000
9309 vmov i32 12 00000000 00000000 abcdefgh 11111111
9310 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9311 vmov i32 14 00000000 abcdefgh 11111111 11111111
9312 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9314 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9315 eeeeeeee ffffffff gggggggg hhhhhhhh
9316 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9317 vmov f32 19 00000000 00000000 00000000 00000000
9319 For case 18, B = !b. Representable values are exactly those accepted by
9320 vfp3_const_double_index, but are output as floating-point numbers rather
9323 For case 19, we will change it to vmov.i32 when assembling.
9325 Variants 0-5 (inclusive) may also be used as immediates for the second
9326 operand of VORR/VBIC instructions.
9328 The INVERSE argument causes the bitwise inverse of the given operand to be
9329 recognized instead (used for recognizing legal immediates for the VAND/VORN
9330 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9331 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9332 output, rather than the real insns vbic/vorr).
9334 INVERSE makes no difference to the recognition of float vectors.
9336 The return value is the variant of immediate as shown in the above table, or
9337 -1 if the given value doesn't match any of the listed patterns.
9340 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9341 rtx *modconst, int *elementwidth)
9343 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9345 for (i = 0; i < idx; i += (STRIDE)) \
9350 immtype = (CLASS); \
9351 elsize = (ELSIZE); \
9355 unsigned int i, elsize = 0, idx = 0, n_elts;
9356 unsigned int innersize;
9357 unsigned char bytes[16];
9358 int immtype = -1, matches;
9359 unsigned int invmask = inverse ? 0xff : 0;
9360 bool vector = GET_CODE (op) == CONST_VECTOR;
9364 n_elts = CONST_VECTOR_NUNITS (op);
9365 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9370 if (mode == VOIDmode)
9372 innersize = GET_MODE_SIZE (mode);
9375 /* Vectors of float constants. */
9376 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9378 rtx el0 = CONST_VECTOR_ELT (op, 0);
9381 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9384 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9386 for (i = 1; i < n_elts; i++)
9388 rtx elt = CONST_VECTOR_ELT (op, i);
9391 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9393 if (!REAL_VALUES_EQUAL (r0, re))
9398 *modconst = CONST_VECTOR_ELT (op, 0);
9403 if (el0 == CONST0_RTX (GET_MODE (el0)))
9409 /* Splat vector constant out into a byte vector. */
9410 for (i = 0; i < n_elts; i++)
9412 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9413 unsigned HOST_WIDE_INT elpart;
9414 unsigned int part, parts;
9416 if (CONST_INT_P (el))
9418 elpart = INTVAL (el);
9421 else if (CONST_DOUBLE_P (el))
9423 elpart = CONST_DOUBLE_LOW (el);
9429 for (part = 0; part < parts; part++)
9432 for (byte = 0; byte < innersize; byte++)
9434 bytes[idx++] = (elpart & 0xff) ^ invmask;
9435 elpart >>= BITS_PER_UNIT;
9437 if (CONST_DOUBLE_P (el))
9438 elpart = CONST_DOUBLE_HIGH (el);
9443 gcc_assert (idx == GET_MODE_SIZE (mode));
9447 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9448 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9450 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9451 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9453 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9454 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9456 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9457 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9459 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9461 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9463 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9464 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9466 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9467 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9469 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9470 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9472 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9473 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9475 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9477 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9479 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9480 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9482 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9483 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9485 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9486 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9488 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9489 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9491 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9493 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9494 && bytes[i] == bytes[(i + 8) % idx]);
9502 *elementwidth = elsize;
9506 unsigned HOST_WIDE_INT imm = 0;
9508 /* Un-invert bytes of recognized vector, if necessary. */
9510 for (i = 0; i < idx; i++)
9511 bytes[i] ^= invmask;
9515 /* FIXME: Broken on 32-bit H_W_I hosts. */
9516 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9518 for (i = 0; i < 8; i++)
9519 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9520 << (i * BITS_PER_UNIT);
9522 *modconst = GEN_INT (imm);
9526 unsigned HOST_WIDE_INT imm = 0;
9528 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9529 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9531 *modconst = GEN_INT (imm);
9539 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9540 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9541 float elements), and a modified constant (whatever should be output for a
9542 VMOV) in *MODCONST. */
9545 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9546 rtx *modconst, int *elementwidth)
9550 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9556 *modconst = tmpconst;
9559 *elementwidth = tmpwidth;
9564 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9565 the immediate is valid, write a constant suitable for using as an operand
9566 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9567 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9570 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9571 rtx *modconst, int *elementwidth)
9575 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9577 if (retval < 0 || retval > 5)
9581 *modconst = tmpconst;
9584 *elementwidth = tmpwidth;
9589 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9590 the immediate is valid, write a constant suitable for using as an operand
9591 to VSHR/VSHL to *MODCONST and the corresponding element width to
9592 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9593 because they have different limitations. */
9596 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9597 rtx *modconst, int *elementwidth,
9600 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9601 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9602 unsigned HOST_WIDE_INT last_elt = 0;
9603 unsigned HOST_WIDE_INT maxshift;
9605 /* Split vector constant out into a byte vector. */
9606 for (i = 0; i < n_elts; i++)
9608 rtx el = CONST_VECTOR_ELT (op, i);
9609 unsigned HOST_WIDE_INT elpart;
9611 if (CONST_INT_P (el))
9612 elpart = INTVAL (el);
9613 else if (CONST_DOUBLE_P (el))
9618 if (i != 0 && elpart != last_elt)
9624 /* Shift less than element size. */
9625 maxshift = innersize * 8;
9629 /* Left shift immediate value can be from 0 to <size>-1. */
9630 if (last_elt >= maxshift)
9635 /* Right shift immediate value can be from 1 to <size>. */
9636 if (last_elt == 0 || last_elt > maxshift)
9641 *elementwidth = innersize * 8;
9644 *modconst = CONST_VECTOR_ELT (op, 0);
9649 /* Return a string suitable for output of Neon immediate logic operation
9653 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9654 int inverse, int quad)
9656 int width, is_valid;
9657 static char templ[40];
9659 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9661 gcc_assert (is_valid != 0);
9664 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9666 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9671 /* Return a string suitable for output of Neon immediate shift operation
9672 (VSHR or VSHL) MNEM. */
9675 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9676 enum machine_mode mode, int quad,
9679 int width, is_valid;
9680 static char templ[40];
9682 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9683 gcc_assert (is_valid != 0);
9686 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9688 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9693 /* Output a sequence of pairwise operations to implement a reduction.
9694 NOTE: We do "too much work" here, because pairwise operations work on two
9695 registers-worth of operands in one go. Unfortunately we can't exploit those
9696 extra calculations to do the full operation in fewer steps, I don't think.
9697 Although all vector elements of the result but the first are ignored, we
9698 actually calculate the same result in each of the elements. An alternative
9699 such as initially loading a vector with zero to use as each of the second
9700 operands would use up an additional register and take an extra instruction,
9701 for no particular gain. */
9704 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9705 rtx (*reduc) (rtx, rtx, rtx))
9707 enum machine_mode inner = GET_MODE_INNER (mode);
9708 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9711 for (i = parts / 2; i >= 1; i /= 2)
9713 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9714 emit_insn (reduc (dest, tmpsum, tmpsum));
9719 /* If VALS is a vector constant that can be loaded into a register
9720 using VDUP, generate instructions to do so and return an RTX to
9721 assign to the register. Otherwise return NULL_RTX. */
9724 neon_vdup_constant (rtx vals)
9726 enum machine_mode mode = GET_MODE (vals);
9727 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9728 int n_elts = GET_MODE_NUNITS (mode);
9729 bool all_same = true;
9733 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9736 for (i = 0; i < n_elts; ++i)
9738 x = XVECEXP (vals, 0, i);
9739 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9744 /* The elements are not all the same. We could handle repeating
9745 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9746 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9750 /* We can load this constant by using VDUP and a constant in a
9751 single ARM register. This will be cheaper than a vector
9754 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9755 return gen_rtx_VEC_DUPLICATE (mode, x);
9758 /* Generate code to load VALS, which is a PARALLEL containing only
9759 constants (for vec_init) or CONST_VECTOR, efficiently into a
9760 register. Returns an RTX to copy into the register, or NULL_RTX
9761 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9764 neon_make_constant (rtx vals)
9766 enum machine_mode mode = GET_MODE (vals);
9768 rtx const_vec = NULL_RTX;
9769 int n_elts = GET_MODE_NUNITS (mode);
9773 if (GET_CODE (vals) == CONST_VECTOR)
9775 else if (GET_CODE (vals) == PARALLEL)
9777 /* A CONST_VECTOR must contain only CONST_INTs and
9778 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9779 Only store valid constants in a CONST_VECTOR. */
9780 for (i = 0; i < n_elts; ++i)
9782 rtx x = XVECEXP (vals, 0, i);
9783 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9786 if (n_const == n_elts)
9787 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9792 if (const_vec != NULL
9793 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9794 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9796 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9797 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9798 pipeline cycle; creating the constant takes one or two ARM
9801 else if (const_vec != NULL_RTX)
9802 /* Load from constant pool. On Cortex-A8 this takes two cycles
9803 (for either double or quad vectors). We can not take advantage
9804 of single-cycle VLD1 because we need a PC-relative addressing
9808 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9809 We can not construct an initializer. */
9813 /* Initialize vector TARGET to VALS. */
9816 neon_expand_vector_init (rtx target, rtx vals)
9818 enum machine_mode mode = GET_MODE (target);
9819 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9820 int n_elts = GET_MODE_NUNITS (mode);
9821 int n_var = 0, one_var = -1;
9822 bool all_same = true;
9826 for (i = 0; i < n_elts; ++i)
9828 x = XVECEXP (vals, 0, i);
9829 if (!CONSTANT_P (x))
9830 ++n_var, one_var = i;
9832 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9838 rtx constant = neon_make_constant (vals);
9839 if (constant != NULL_RTX)
9841 emit_move_insn (target, constant);
9846 /* Splat a single non-constant element if we can. */
9847 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9849 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9850 emit_insn (gen_rtx_SET (VOIDmode, target,
9851 gen_rtx_VEC_DUPLICATE (mode, x)));
9855 /* One field is non-constant. Load constant then overwrite varying
9856 field. This is more efficient than using the stack. */
9859 rtx copy = copy_rtx (vals);
9860 rtx index = GEN_INT (one_var);
9862 /* Load constant part of vector, substitute neighboring value for
9864 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9865 neon_expand_vector_init (target, copy);
9867 /* Insert variable. */
9868 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9872 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9875 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9878 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9881 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9884 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9887 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9890 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9893 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9896 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9904 /* Construct the vector in memory one field at a time
9905 and load the whole vector. */
9906 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9907 for (i = 0; i < n_elts; i++)
9908 emit_move_insn (adjust_address_nv (mem, inner_mode,
9909 i * GET_MODE_SIZE (inner_mode)),
9910 XVECEXP (vals, 0, i));
9911 emit_move_insn (target, mem);
9914 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9915 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9916 reported source locations are bogus. */
9919 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9924 gcc_assert (CONST_INT_P (operand));
9926 lane = INTVAL (operand);
9928 if (lane < low || lane >= high)
9932 /* Bounds-check lanes. */
9935 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9937 bounds_check (operand, low, high, "lane out of range");
9940 /* Bounds-check constants. */
9943 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9945 bounds_check (operand, low, high, "constant out of range");
9949 neon_element_bits (enum machine_mode mode)
9952 return GET_MODE_BITSIZE (mode);
9954 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9958 /* Predicates for `match_operand' and `match_operator'. */
9960 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9961 WB is true if full writeback address modes are allowed and is false
9962 if limited writeback address modes (POST_INC and PRE_DEC) are
9966 arm_coproc_mem_operand (rtx op, bool wb)
9970 /* Reject eliminable registers. */
9971 if (! (reload_in_progress || reload_completed)
9972 && ( reg_mentioned_p (frame_pointer_rtx, op)
9973 || reg_mentioned_p (arg_pointer_rtx, op)
9974 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9975 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9976 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9977 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9980 /* Constants are converted into offsets from labels. */
9986 if (reload_completed
9987 && (GET_CODE (ind) == LABEL_REF
9988 || (GET_CODE (ind) == CONST
9989 && GET_CODE (XEXP (ind, 0)) == PLUS
9990 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9991 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9994 /* Match: (mem (reg)). */
9996 return arm_address_register_rtx_p (ind, 0);
9998 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9999 acceptable in any case (subject to verification by
10000 arm_address_register_rtx_p). We need WB to be true to accept
10001 PRE_INC and POST_DEC. */
10002 if (GET_CODE (ind) == POST_INC
10003 || GET_CODE (ind) == PRE_DEC
10005 && (GET_CODE (ind) == PRE_INC
10006 || GET_CODE (ind) == POST_DEC)))
10007 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10010 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
10011 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
10012 && GET_CODE (XEXP (ind, 1)) == PLUS
10013 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
10014 ind = XEXP (ind, 1);
10019 if (GET_CODE (ind) == PLUS
10020 && REG_P (XEXP (ind, 0))
10021 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10022 && CONST_INT_P (XEXP (ind, 1))
10023 && INTVAL (XEXP (ind, 1)) > -1024
10024 && INTVAL (XEXP (ind, 1)) < 1024
10025 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10031 /* Return TRUE if OP is a memory operand which we can load or store a vector
10032 to/from. TYPE is one of the following values:
10033 0 - Vector load/stor (vldr)
10034 1 - Core registers (ldm)
10035 2 - Element/structure loads (vld1)
10038 neon_vector_mem_operand (rtx op, int type)
10042 /* Reject eliminable registers. */
10043 if (! (reload_in_progress || reload_completed)
10044 && ( reg_mentioned_p (frame_pointer_rtx, op)
10045 || reg_mentioned_p (arg_pointer_rtx, op)
10046 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10047 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10048 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10049 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10052 /* Constants are converted into offsets from labels. */
10056 ind = XEXP (op, 0);
10058 if (reload_completed
10059 && (GET_CODE (ind) == LABEL_REF
10060 || (GET_CODE (ind) == CONST
10061 && GET_CODE (XEXP (ind, 0)) == PLUS
10062 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10063 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10066 /* Match: (mem (reg)). */
10068 return arm_address_register_rtx_p (ind, 0);
10070 /* Allow post-increment with Neon registers. */
10071 if ((type != 1 && GET_CODE (ind) == POST_INC)
10072 || (type == 0 && GET_CODE (ind) == PRE_DEC))
10073 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10075 /* FIXME: vld1 allows register post-modify. */
10081 && GET_CODE (ind) == PLUS
10082 && REG_P (XEXP (ind, 0))
10083 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10084 && CONST_INT_P (XEXP (ind, 1))
10085 && INTVAL (XEXP (ind, 1)) > -1024
10086 /* For quad modes, we restrict the constant offset to be slightly less
10087 than what the instruction format permits. We have no such constraint
10088 on double mode offsets. (This must match arm_legitimate_index_p.) */
10089 && (INTVAL (XEXP (ind, 1))
10090 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10091 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10097 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10100 neon_struct_mem_operand (rtx op)
10104 /* Reject eliminable registers. */
10105 if (! (reload_in_progress || reload_completed)
10106 && ( reg_mentioned_p (frame_pointer_rtx, op)
10107 || reg_mentioned_p (arg_pointer_rtx, op)
10108 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10109 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10110 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10111 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10114 /* Constants are converted into offsets from labels. */
10118 ind = XEXP (op, 0);
10120 if (reload_completed
10121 && (GET_CODE (ind) == LABEL_REF
10122 || (GET_CODE (ind) == CONST
10123 && GET_CODE (XEXP (ind, 0)) == PLUS
10124 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10125 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10128 /* Match: (mem (reg)). */
10130 return arm_address_register_rtx_p (ind, 0);
10132 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10133 if (GET_CODE (ind) == POST_INC
10134 || GET_CODE (ind) == PRE_DEC)
10135 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10140 /* Return true if X is a register that will be eliminated later on. */
10142 arm_eliminable_register (rtx x)
10144 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10145 || REGNO (x) == ARG_POINTER_REGNUM
10146 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10147 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10150 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10151 coprocessor registers. Otherwise return NO_REGS. */
10154 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10156 if (mode == HFmode)
10158 if (!TARGET_NEON_FP16)
10159 return GENERAL_REGS;
10160 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10162 return GENERAL_REGS;
10165 /* The neon move patterns handle all legitimate vector and struct
10168 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10169 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10170 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10171 || VALID_NEON_STRUCT_MODE (mode)))
10174 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10177 return GENERAL_REGS;
10180 /* Values which must be returned in the most-significant end of the return
10184 arm_return_in_msb (const_tree valtype)
10186 return (TARGET_AAPCS_BASED
10187 && BYTES_BIG_ENDIAN
10188 && (AGGREGATE_TYPE_P (valtype)
10189 || TREE_CODE (valtype) == COMPLEX_TYPE
10190 || FIXED_POINT_TYPE_P (valtype)));
10193 /* Return TRUE if X references a SYMBOL_REF. */
10195 symbol_mentioned_p (rtx x)
10200 if (GET_CODE (x) == SYMBOL_REF)
10203 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10204 are constant offsets, not symbols. */
10205 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10208 fmt = GET_RTX_FORMAT (GET_CODE (x));
10210 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10216 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10217 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10220 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10227 /* Return TRUE if X references a LABEL_REF. */
10229 label_mentioned_p (rtx x)
10234 if (GET_CODE (x) == LABEL_REF)
10237 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10238 instruction, but they are constant offsets, not symbols. */
10239 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10242 fmt = GET_RTX_FORMAT (GET_CODE (x));
10243 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10249 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10250 if (label_mentioned_p (XVECEXP (x, i, j)))
10253 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10261 tls_mentioned_p (rtx x)
10263 switch (GET_CODE (x))
10266 return tls_mentioned_p (XEXP (x, 0));
10269 if (XINT (x, 1) == UNSPEC_TLS)
10277 /* Must not copy any rtx that uses a pc-relative address. */
10280 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10282 if (GET_CODE (*x) == UNSPEC
10283 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10284 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10290 arm_cannot_copy_insn_p (rtx insn)
10292 /* The tls call insn cannot be copied, as it is paired with a data
10294 if (recog_memoized (insn) == CODE_FOR_tlscall)
10297 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10301 minmax_code (rtx x)
10303 enum rtx_code code = GET_CODE (x);
10316 gcc_unreachable ();
10320 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10323 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10324 int *mask, bool *signed_sat)
10326 /* The high bound must be a power of two minus one. */
10327 int log = exact_log2 (INTVAL (hi_bound) + 1);
10331 /* The low bound is either zero (for usat) or one less than the
10332 negation of the high bound (for ssat). */
10333 if (INTVAL (lo_bound) == 0)
10338 *signed_sat = false;
10343 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10348 *signed_sat = true;
10356 /* Return 1 if memory locations are adjacent. */
10358 adjacent_mem_locations (rtx a, rtx b)
10360 /* We don't guarantee to preserve the order of these memory refs. */
10361 if (volatile_refs_p (a) || volatile_refs_p (b))
10364 if ((REG_P (XEXP (a, 0))
10365 || (GET_CODE (XEXP (a, 0)) == PLUS
10366 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10367 && (REG_P (XEXP (b, 0))
10368 || (GET_CODE (XEXP (b, 0)) == PLUS
10369 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10371 HOST_WIDE_INT val0 = 0, val1 = 0;
10375 if (GET_CODE (XEXP (a, 0)) == PLUS)
10377 reg0 = XEXP (XEXP (a, 0), 0);
10378 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10381 reg0 = XEXP (a, 0);
10383 if (GET_CODE (XEXP (b, 0)) == PLUS)
10385 reg1 = XEXP (XEXP (b, 0), 0);
10386 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10389 reg1 = XEXP (b, 0);
10391 /* Don't accept any offset that will require multiple
10392 instructions to handle, since this would cause the
10393 arith_adjacentmem pattern to output an overlong sequence. */
10394 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10397 /* Don't allow an eliminable register: register elimination can make
10398 the offset too large. */
10399 if (arm_eliminable_register (reg0))
10402 val_diff = val1 - val0;
10406 /* If the target has load delay slots, then there's no benefit
10407 to using an ldm instruction unless the offset is zero and
10408 we are optimizing for size. */
10409 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10410 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10411 && (val_diff == 4 || val_diff == -4));
10414 return ((REGNO (reg0) == REGNO (reg1))
10415 && (val_diff == 4 || val_diff == -4));
10421 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10422 for load operations, false for store operations. CONSECUTIVE is true
10423 if the register numbers in the operation must be consecutive in the register
10424 bank. RETURN_PC is true if value is to be loaded in PC.
10425 The pattern we are trying to match for load is:
10426 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10427 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10430 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10433 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10434 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10435 3. If consecutive is TRUE, then for kth register being loaded,
10436 REGNO (R_dk) = REGNO (R_d0) + k.
10437 The pattern for store is similar. */
10439 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10440 bool consecutive, bool return_pc)
10442 HOST_WIDE_INT count = XVECLEN (op, 0);
10443 rtx reg, mem, addr;
10445 unsigned first_regno;
10446 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10448 bool addr_reg_in_reglist = false;
10449 bool update = false;
10454 /* If not in SImode, then registers must be consecutive
10455 (e.g., VLDM instructions for DFmode). */
10456 gcc_assert ((mode == SImode) || consecutive);
10457 /* Setting return_pc for stores is illegal. */
10458 gcc_assert (!return_pc || load);
10460 /* Set up the increments and the regs per val based on the mode. */
10461 reg_increment = GET_MODE_SIZE (mode);
10462 regs_per_val = reg_increment / 4;
10463 offset_adj = return_pc ? 1 : 0;
10466 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10467 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10470 /* Check if this is a write-back. */
10471 elt = XVECEXP (op, 0, offset_adj);
10472 if (GET_CODE (SET_SRC (elt)) == PLUS)
10478 /* The offset adjustment must be the number of registers being
10479 popped times the size of a single register. */
10480 if (!REG_P (SET_DEST (elt))
10481 || !REG_P (XEXP (SET_SRC (elt), 0))
10482 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10483 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10484 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10485 ((count - 1 - offset_adj) * reg_increment))
10489 i = i + offset_adj;
10490 base = base + offset_adj;
10491 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10492 success depends on the type: VLDM can do just one reg,
10493 LDM must do at least two. */
10494 if ((count <= i) && (mode == SImode))
10497 elt = XVECEXP (op, 0, i - 1);
10498 if (GET_CODE (elt) != SET)
10503 reg = SET_DEST (elt);
10504 mem = SET_SRC (elt);
10508 reg = SET_SRC (elt);
10509 mem = SET_DEST (elt);
10512 if (!REG_P (reg) || !MEM_P (mem))
10515 regno = REGNO (reg);
10516 first_regno = regno;
10517 addr = XEXP (mem, 0);
10518 if (GET_CODE (addr) == PLUS)
10520 if (!CONST_INT_P (XEXP (addr, 1)))
10523 offset = INTVAL (XEXP (addr, 1));
10524 addr = XEXP (addr, 0);
10530 /* Don't allow SP to be loaded unless it is also the base register. It
10531 guarantees that SP is reset correctly when an LDM instruction
10532 is interrupted. Otherwise, we might end up with a corrupt stack. */
10533 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10536 for (; i < count; i++)
10538 elt = XVECEXP (op, 0, i);
10539 if (GET_CODE (elt) != SET)
10544 reg = SET_DEST (elt);
10545 mem = SET_SRC (elt);
10549 reg = SET_SRC (elt);
10550 mem = SET_DEST (elt);
10554 || GET_MODE (reg) != mode
10555 || REGNO (reg) <= regno
10558 (unsigned int) (first_regno + regs_per_val * (i - base))))
10559 /* Don't allow SP to be loaded unless it is also the base register. It
10560 guarantees that SP is reset correctly when an LDM instruction
10561 is interrupted. Otherwise, we might end up with a corrupt stack. */
10562 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10564 || GET_MODE (mem) != mode
10565 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10566 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10567 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10568 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10569 offset + (i - base) * reg_increment))
10570 && (!REG_P (XEXP (mem, 0))
10571 || offset + (i - base) * reg_increment != 0)))
10574 regno = REGNO (reg);
10575 if (regno == REGNO (addr))
10576 addr_reg_in_reglist = true;
10581 if (update && addr_reg_in_reglist)
10584 /* For Thumb-1, address register is always modified - either by write-back
10585 or by explicit load. If the pattern does not describe an update,
10586 then the address register must be in the list of loaded registers. */
10588 return update || addr_reg_in_reglist;
10594 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10595 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10596 instruction. ADD_OFFSET is nonzero if the base address register needs
10597 to be modified with an add instruction before we can use it. */
10600 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10601 int nops, HOST_WIDE_INT add_offset)
10603 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10604 if the offset isn't small enough. The reason 2 ldrs are faster
10605 is because these ARMs are able to do more than one cache access
10606 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10607 whilst the ARM8 has a double bandwidth cache. This means that
10608 these cores can do both an instruction fetch and a data fetch in
10609 a single cycle, so the trick of calculating the address into a
10610 scratch register (one of the result regs) and then doing a load
10611 multiple actually becomes slower (and no smaller in code size).
10612 That is the transformation
10614 ldr rd1, [rbase + offset]
10615 ldr rd2, [rbase + offset + 4]
10619 add rd1, rbase, offset
10620 ldmia rd1, {rd1, rd2}
10622 produces worse code -- '3 cycles + any stalls on rd2' instead of
10623 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10624 access per cycle, the first sequence could never complete in less
10625 than 6 cycles, whereas the ldm sequence would only take 5 and
10626 would make better use of sequential accesses if not hitting the
10629 We cheat here and test 'arm_ld_sched' which we currently know to
10630 only be true for the ARM8, ARM9 and StrongARM. If this ever
10631 changes, then the test below needs to be reworked. */
10632 if (nops == 2 && arm_ld_sched && add_offset != 0)
10635 /* XScale has load-store double instructions, but they have stricter
10636 alignment requirements than load-store multiple, so we cannot
10639 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10640 the pipeline until completion.
10648 An ldr instruction takes 1-3 cycles, but does not block the
10657 Best case ldr will always win. However, the more ldr instructions
10658 we issue, the less likely we are to be able to schedule them well.
10659 Using ldr instructions also increases code size.
10661 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10662 for counts of 3 or 4 regs. */
10663 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10668 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10669 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10670 an array ORDER which describes the sequence to use when accessing the
10671 offsets that produces an ascending order. In this sequence, each
10672 offset must be larger by exactly 4 than the previous one. ORDER[0]
10673 must have been filled in with the lowest offset by the caller.
10674 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10675 we use to verify that ORDER produces an ascending order of registers.
10676 Return true if it was possible to construct such an order, false if
10680 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10681 int *unsorted_regs)
10684 for (i = 1; i < nops; i++)
10688 order[i] = order[i - 1];
10689 for (j = 0; j < nops; j++)
10690 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10692 /* We must find exactly one offset that is higher than the
10693 previous one by 4. */
10694 if (order[i] != order[i - 1])
10698 if (order[i] == order[i - 1])
10700 /* The register numbers must be ascending. */
10701 if (unsorted_regs != NULL
10702 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10708 /* Used to determine in a peephole whether a sequence of load
10709 instructions can be changed into a load-multiple instruction.
10710 NOPS is the number of separate load instructions we are examining. The
10711 first NOPS entries in OPERANDS are the destination registers, the
10712 next NOPS entries are memory operands. If this function is
10713 successful, *BASE is set to the common base register of the memory
10714 accesses; *LOAD_OFFSET is set to the first memory location's offset
10715 from that base register.
10716 REGS is an array filled in with the destination register numbers.
10717 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10718 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10719 the sequence of registers in REGS matches the loads from ascending memory
10720 locations, and the function verifies that the register numbers are
10721 themselves ascending. If CHECK_REGS is false, the register numbers
10722 are stored in the order they are found in the operands. */
10724 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10725 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10727 int unsorted_regs[MAX_LDM_STM_OPS];
10728 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10729 int order[MAX_LDM_STM_OPS];
10730 rtx base_reg_rtx = NULL;
10734 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10735 easily extended if required. */
10736 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10738 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10740 /* Loop over the operands and check that the memory references are
10741 suitable (i.e. immediate offsets from the same base register). At
10742 the same time, extract the target register, and the memory
10744 for (i = 0; i < nops; i++)
10749 /* Convert a subreg of a mem into the mem itself. */
10750 if (GET_CODE (operands[nops + i]) == SUBREG)
10751 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10753 gcc_assert (MEM_P (operands[nops + i]));
10755 /* Don't reorder volatile memory references; it doesn't seem worth
10756 looking for the case where the order is ok anyway. */
10757 if (MEM_VOLATILE_P (operands[nops + i]))
10760 offset = const0_rtx;
10762 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10763 || (GET_CODE (reg) == SUBREG
10764 && REG_P (reg = SUBREG_REG (reg))))
10765 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10766 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10767 || (GET_CODE (reg) == SUBREG
10768 && REG_P (reg = SUBREG_REG (reg))))
10769 && (CONST_INT_P (offset
10770 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10774 base_reg = REGNO (reg);
10775 base_reg_rtx = reg;
10776 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10779 else if (base_reg != (int) REGNO (reg))
10780 /* Not addressed from the same base register. */
10783 unsorted_regs[i] = (REG_P (operands[i])
10784 ? REGNO (operands[i])
10785 : REGNO (SUBREG_REG (operands[i])));
10787 /* If it isn't an integer register, or if it overwrites the
10788 base register but isn't the last insn in the list, then
10789 we can't do this. */
10790 if (unsorted_regs[i] < 0
10791 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10792 || unsorted_regs[i] > 14
10793 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10796 /* Don't allow SP to be loaded unless it is also the base
10797 register. It guarantees that SP is reset correctly when
10798 an LDM instruction is interrupted. Otherwise, we might
10799 end up with a corrupt stack. */
10800 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10803 unsorted_offsets[i] = INTVAL (offset);
10804 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10808 /* Not a suitable memory address. */
10812 /* All the useful information has now been extracted from the
10813 operands into unsorted_regs and unsorted_offsets; additionally,
10814 order[0] has been set to the lowest offset in the list. Sort
10815 the offsets into order, verifying that they are adjacent, and
10816 check that the register numbers are ascending. */
10817 if (!compute_offset_order (nops, unsorted_offsets, order,
10818 check_regs ? unsorted_regs : NULL))
10822 memcpy (saved_order, order, sizeof order);
10828 for (i = 0; i < nops; i++)
10829 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10831 *load_offset = unsorted_offsets[order[0]];
10835 && !peep2_reg_dead_p (nops, base_reg_rtx))
10838 if (unsorted_offsets[order[0]] == 0)
10839 ldm_case = 1; /* ldmia */
10840 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10841 ldm_case = 2; /* ldmib */
10842 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10843 ldm_case = 3; /* ldmda */
10844 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10845 ldm_case = 4; /* ldmdb */
10846 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10847 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10852 if (!multiple_operation_profitable_p (false, nops,
10854 ? unsorted_offsets[order[0]] : 0))
10860 /* Used to determine in a peephole whether a sequence of store instructions can
10861 be changed into a store-multiple instruction.
10862 NOPS is the number of separate store instructions we are examining.
10863 NOPS_TOTAL is the total number of instructions recognized by the peephole
10865 The first NOPS entries in OPERANDS are the source registers, the next
10866 NOPS entries are memory operands. If this function is successful, *BASE is
10867 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10868 to the first memory location's offset from that base register. REGS is an
10869 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10870 likewise filled with the corresponding rtx's.
10871 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10872 numbers to an ascending order of stores.
10873 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10874 from ascending memory locations, and the function verifies that the register
10875 numbers are themselves ascending. If CHECK_REGS is false, the register
10876 numbers are stored in the order they are found in the operands. */
10878 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10879 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10880 HOST_WIDE_INT *load_offset, bool check_regs)
10882 int unsorted_regs[MAX_LDM_STM_OPS];
10883 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10884 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10885 int order[MAX_LDM_STM_OPS];
10887 rtx base_reg_rtx = NULL;
10890 /* Write back of base register is currently only supported for Thumb 1. */
10891 int base_writeback = TARGET_THUMB1;
10893 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10894 easily extended if required. */
10895 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10897 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10899 /* Loop over the operands and check that the memory references are
10900 suitable (i.e. immediate offsets from the same base register). At
10901 the same time, extract the target register, and the memory
10903 for (i = 0; i < nops; i++)
10908 /* Convert a subreg of a mem into the mem itself. */
10909 if (GET_CODE (operands[nops + i]) == SUBREG)
10910 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10912 gcc_assert (MEM_P (operands[nops + i]));
10914 /* Don't reorder volatile memory references; it doesn't seem worth
10915 looking for the case where the order is ok anyway. */
10916 if (MEM_VOLATILE_P (operands[nops + i]))
10919 offset = const0_rtx;
10921 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10922 || (GET_CODE (reg) == SUBREG
10923 && REG_P (reg = SUBREG_REG (reg))))
10924 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10925 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10926 || (GET_CODE (reg) == SUBREG
10927 && REG_P (reg = SUBREG_REG (reg))))
10928 && (CONST_INT_P (offset
10929 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10931 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10932 ? operands[i] : SUBREG_REG (operands[i]));
10933 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10937 base_reg = REGNO (reg);
10938 base_reg_rtx = reg;
10939 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10942 else if (base_reg != (int) REGNO (reg))
10943 /* Not addressed from the same base register. */
10946 /* If it isn't an integer register, then we can't do this. */
10947 if (unsorted_regs[i] < 0
10948 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10949 /* The effects are unpredictable if the base register is
10950 both updated and stored. */
10951 || (base_writeback && unsorted_regs[i] == base_reg)
10952 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10953 || unsorted_regs[i] > 14)
10956 unsorted_offsets[i] = INTVAL (offset);
10957 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10961 /* Not a suitable memory address. */
10965 /* All the useful information has now been extracted from the
10966 operands into unsorted_regs and unsorted_offsets; additionally,
10967 order[0] has been set to the lowest offset in the list. Sort
10968 the offsets into order, verifying that they are adjacent, and
10969 check that the register numbers are ascending. */
10970 if (!compute_offset_order (nops, unsorted_offsets, order,
10971 check_regs ? unsorted_regs : NULL))
10975 memcpy (saved_order, order, sizeof order);
10981 for (i = 0; i < nops; i++)
10983 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10985 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10988 *load_offset = unsorted_offsets[order[0]];
10992 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10995 if (unsorted_offsets[order[0]] == 0)
10996 stm_case = 1; /* stmia */
10997 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10998 stm_case = 2; /* stmib */
10999 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
11000 stm_case = 3; /* stmda */
11001 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
11002 stm_case = 4; /* stmdb */
11006 if (!multiple_operation_profitable_p (false, nops, 0))
11012 /* Routines for use in generating RTL. */
11014 /* Generate a load-multiple instruction. COUNT is the number of loads in
11015 the instruction; REGS and MEMS are arrays containing the operands.
11016 BASEREG is the base register to be used in addressing the memory operands.
11017 WBACK_OFFSET is nonzero if the instruction should update the base
11021 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11022 HOST_WIDE_INT wback_offset)
11027 if (!multiple_operation_profitable_p (false, count, 0))
11033 for (i = 0; i < count; i++)
11034 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
11036 if (wback_offset != 0)
11037 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11039 seq = get_insns ();
11045 result = gen_rtx_PARALLEL (VOIDmode,
11046 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11047 if (wback_offset != 0)
11049 XVECEXP (result, 0, 0)
11050 = gen_rtx_SET (VOIDmode, basereg,
11051 plus_constant (Pmode, basereg, wback_offset));
11056 for (j = 0; i < count; i++, j++)
11057 XVECEXP (result, 0, i)
11058 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
11063 /* Generate a store-multiple instruction. COUNT is the number of stores in
11064 the instruction; REGS and MEMS are arrays containing the operands.
11065 BASEREG is the base register to be used in addressing the memory operands.
11066 WBACK_OFFSET is nonzero if the instruction should update the base
11070 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11071 HOST_WIDE_INT wback_offset)
11076 if (GET_CODE (basereg) == PLUS)
11077 basereg = XEXP (basereg, 0);
11079 if (!multiple_operation_profitable_p (false, count, 0))
11085 for (i = 0; i < count; i++)
11086 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11088 if (wback_offset != 0)
11089 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11091 seq = get_insns ();
11097 result = gen_rtx_PARALLEL (VOIDmode,
11098 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11099 if (wback_offset != 0)
11101 XVECEXP (result, 0, 0)
11102 = gen_rtx_SET (VOIDmode, basereg,
11103 plus_constant (Pmode, basereg, wback_offset));
11108 for (j = 0; i < count; i++, j++)
11109 XVECEXP (result, 0, i)
11110 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11115 /* Generate either a load-multiple or a store-multiple instruction. This
11116 function can be used in situations where we can start with a single MEM
11117 rtx and adjust its address upwards.
11118 COUNT is the number of operations in the instruction, not counting a
11119 possible update of the base register. REGS is an array containing the
11121 BASEREG is the base register to be used in addressing the memory operands,
11122 which are constructed from BASEMEM.
11123 WRITE_BACK specifies whether the generated instruction should include an
11124 update of the base register.
11125 OFFSETP is used to pass an offset to and from this function; this offset
11126 is not used when constructing the address (instead BASEMEM should have an
11127 appropriate offset in its address), it is used only for setting
11128 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11131 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11132 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11134 rtx mems[MAX_LDM_STM_OPS];
11135 HOST_WIDE_INT offset = *offsetp;
11138 gcc_assert (count <= MAX_LDM_STM_OPS);
11140 if (GET_CODE (basereg) == PLUS)
11141 basereg = XEXP (basereg, 0);
11143 for (i = 0; i < count; i++)
11145 rtx addr = plus_constant (Pmode, basereg, i * 4);
11146 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11154 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11155 write_back ? 4 * count : 0);
11157 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11158 write_back ? 4 * count : 0);
11162 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11163 rtx basemem, HOST_WIDE_INT *offsetp)
11165 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11170 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11171 rtx basemem, HOST_WIDE_INT *offsetp)
11173 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11177 /* Called from a peephole2 expander to turn a sequence of loads into an
11178 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11179 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11180 is true if we can reorder the registers because they are used commutatively
11182 Returns true iff we could generate a new instruction. */
11185 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11187 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11188 rtx mems[MAX_LDM_STM_OPS];
11189 int i, j, base_reg;
11191 HOST_WIDE_INT offset;
11192 int write_back = FALSE;
11196 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11197 &base_reg, &offset, !sort_regs);
11203 for (i = 0; i < nops - 1; i++)
11204 for (j = i + 1; j < nops; j++)
11205 if (regs[i] > regs[j])
11211 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11215 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11216 gcc_assert (ldm_case == 1 || ldm_case == 5);
11222 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11223 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11225 if (!TARGET_THUMB1)
11227 base_reg = regs[0];
11228 base_reg_rtx = newbase;
11232 for (i = 0; i < nops; i++)
11234 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11235 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11238 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11239 write_back ? offset + i * 4 : 0));
11243 /* Called from a peephole2 expander to turn a sequence of stores into an
11244 STM instruction. OPERANDS are the operands found by the peephole matcher;
11245 NOPS indicates how many separate stores we are trying to combine.
11246 Returns true iff we could generate a new instruction. */
11249 gen_stm_seq (rtx *operands, int nops)
11252 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11253 rtx mems[MAX_LDM_STM_OPS];
11256 HOST_WIDE_INT offset;
11257 int write_back = FALSE;
11260 bool base_reg_dies;
11262 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11263 mem_order, &base_reg, &offset, true);
11268 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11270 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11273 gcc_assert (base_reg_dies);
11279 gcc_assert (base_reg_dies);
11280 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11284 addr = plus_constant (Pmode, base_reg_rtx, offset);
11286 for (i = 0; i < nops; i++)
11288 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11289 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11292 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11293 write_back ? offset + i * 4 : 0));
11297 /* Called from a peephole2 expander to turn a sequence of stores that are
11298 preceded by constant loads into an STM instruction. OPERANDS are the
11299 operands found by the peephole matcher; NOPS indicates how many
11300 separate stores we are trying to combine; there are 2 * NOPS
11301 instructions in the peephole.
11302 Returns true iff we could generate a new instruction. */
11305 gen_const_stm_seq (rtx *operands, int nops)
11307 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11308 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11309 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11310 rtx mems[MAX_LDM_STM_OPS];
11313 HOST_WIDE_INT offset;
11314 int write_back = FALSE;
11317 bool base_reg_dies;
11319 HARD_REG_SET allocated;
11321 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11322 mem_order, &base_reg, &offset, false);
11327 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11329 /* If the same register is used more than once, try to find a free
11331 CLEAR_HARD_REG_SET (allocated);
11332 for (i = 0; i < nops; i++)
11334 for (j = i + 1; j < nops; j++)
11335 if (regs[i] == regs[j])
11337 rtx t = peep2_find_free_register (0, nops * 2,
11338 TARGET_THUMB1 ? "l" : "r",
11339 SImode, &allocated);
11343 regs[i] = REGNO (t);
11347 /* Compute an ordering that maps the register numbers to an ascending
11350 for (i = 0; i < nops; i++)
11351 if (regs[i] < regs[reg_order[0]])
11354 for (i = 1; i < nops; i++)
11356 int this_order = reg_order[i - 1];
11357 for (j = 0; j < nops; j++)
11358 if (regs[j] > regs[reg_order[i - 1]]
11359 && (this_order == reg_order[i - 1]
11360 || regs[j] < regs[this_order]))
11362 reg_order[i] = this_order;
11365 /* Ensure that registers that must be live after the instruction end
11366 up with the correct value. */
11367 for (i = 0; i < nops; i++)
11369 int this_order = reg_order[i];
11370 if ((this_order != mem_order[i]
11371 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11372 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11376 /* Load the constants. */
11377 for (i = 0; i < nops; i++)
11379 rtx op = operands[2 * nops + mem_order[i]];
11380 sorted_regs[i] = regs[reg_order[i]];
11381 emit_move_insn (reg_rtxs[reg_order[i]], op);
11384 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11386 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11389 gcc_assert (base_reg_dies);
11395 gcc_assert (base_reg_dies);
11396 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11400 addr = plus_constant (Pmode, base_reg_rtx, offset);
11402 for (i = 0; i < nops; i++)
11404 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11405 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11408 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11409 write_back ? offset + i * 4 : 0));
11413 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11414 unaligned copies on processors which support unaligned semantics for those
11415 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11416 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11417 An interleave factor of 1 (the minimum) will perform no interleaving.
11418 Load/store multiple are used for aligned addresses where possible. */
11421 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11422 HOST_WIDE_INT length,
11423 unsigned int interleave_factor)
11425 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11426 int *regnos = XALLOCAVEC (int, interleave_factor);
11427 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11428 HOST_WIDE_INT i, j;
11429 HOST_WIDE_INT remaining = length, words;
11430 rtx halfword_tmp = NULL, byte_tmp = NULL;
11432 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11433 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11434 HOST_WIDE_INT srcoffset, dstoffset;
11435 HOST_WIDE_INT src_autoinc, dst_autoinc;
11438 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11440 /* Use hard registers if we have aligned source or destination so we can use
11441 load/store multiple with contiguous registers. */
11442 if (dst_aligned || src_aligned)
11443 for (i = 0; i < interleave_factor; i++)
11444 regs[i] = gen_rtx_REG (SImode, i);
11446 for (i = 0; i < interleave_factor; i++)
11447 regs[i] = gen_reg_rtx (SImode);
11449 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11450 src = copy_addr_to_reg (XEXP (srcbase, 0));
11452 srcoffset = dstoffset = 0;
11454 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11455 For copying the last bytes we want to subtract this offset again. */
11456 src_autoinc = dst_autoinc = 0;
11458 for (i = 0; i < interleave_factor; i++)
11461 /* Copy BLOCK_SIZE_BYTES chunks. */
11463 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11466 if (src_aligned && interleave_factor > 1)
11468 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11469 TRUE, srcbase, &srcoffset));
11470 src_autoinc += UNITS_PER_WORD * interleave_factor;
11474 for (j = 0; j < interleave_factor; j++)
11476 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11478 mem = adjust_automodify_address (srcbase, SImode, addr,
11479 srcoffset + j * UNITS_PER_WORD);
11480 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11482 srcoffset += block_size_bytes;
11486 if (dst_aligned && interleave_factor > 1)
11488 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11489 TRUE, dstbase, &dstoffset));
11490 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11494 for (j = 0; j < interleave_factor; j++)
11496 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11498 mem = adjust_automodify_address (dstbase, SImode, addr,
11499 dstoffset + j * UNITS_PER_WORD);
11500 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11502 dstoffset += block_size_bytes;
11505 remaining -= block_size_bytes;
11508 /* Copy any whole words left (note these aren't interleaved with any
11509 subsequent halfword/byte load/stores in the interests of simplicity). */
11511 words = remaining / UNITS_PER_WORD;
11513 gcc_assert (words < interleave_factor);
11515 if (src_aligned && words > 1)
11517 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11519 src_autoinc += UNITS_PER_WORD * words;
11523 for (j = 0; j < words; j++)
11525 addr = plus_constant (Pmode, src,
11526 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11527 mem = adjust_automodify_address (srcbase, SImode, addr,
11528 srcoffset + j * UNITS_PER_WORD);
11529 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11531 srcoffset += words * UNITS_PER_WORD;
11534 if (dst_aligned && words > 1)
11536 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11538 dst_autoinc += words * UNITS_PER_WORD;
11542 for (j = 0; j < words; j++)
11544 addr = plus_constant (Pmode, dst,
11545 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11546 mem = adjust_automodify_address (dstbase, SImode, addr,
11547 dstoffset + j * UNITS_PER_WORD);
11548 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11550 dstoffset += words * UNITS_PER_WORD;
11553 remaining -= words * UNITS_PER_WORD;
11555 gcc_assert (remaining < 4);
11557 /* Copy a halfword if necessary. */
11559 if (remaining >= 2)
11561 halfword_tmp = gen_reg_rtx (SImode);
11563 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11564 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11565 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11567 /* Either write out immediately, or delay until we've loaded the last
11568 byte, depending on interleave factor. */
11569 if (interleave_factor == 1)
11571 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11572 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11573 emit_insn (gen_unaligned_storehi (mem,
11574 gen_lowpart (HImode, halfword_tmp)));
11575 halfword_tmp = NULL;
11583 gcc_assert (remaining < 2);
11585 /* Copy last byte. */
11587 if ((remaining & 1) != 0)
11589 byte_tmp = gen_reg_rtx (SImode);
11591 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11592 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11593 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11595 if (interleave_factor == 1)
11597 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11598 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11599 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11608 /* Store last halfword if we haven't done so already. */
11612 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11613 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11614 emit_insn (gen_unaligned_storehi (mem,
11615 gen_lowpart (HImode, halfword_tmp)));
11619 /* Likewise for last byte. */
11623 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11624 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11625 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11629 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11632 /* From mips_adjust_block_mem:
11634 Helper function for doing a loop-based block operation on memory
11635 reference MEM. Each iteration of the loop will operate on LENGTH
11638 Create a new base register for use within the loop and point it to
11639 the start of MEM. Create a new memory reference that uses this
11640 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11643 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11646 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11648 /* Although the new mem does not refer to a known location,
11649 it does keep up to LENGTH bytes of alignment. */
11650 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11651 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11654 /* From mips_block_move_loop:
11656 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11657 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11658 the memory regions do not overlap. */
11661 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11662 unsigned int interleave_factor,
11663 HOST_WIDE_INT bytes_per_iter)
11665 rtx label, src_reg, dest_reg, final_src, test;
11666 HOST_WIDE_INT leftover;
11668 leftover = length % bytes_per_iter;
11669 length -= leftover;
11671 /* Create registers and memory references for use within the loop. */
11672 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11673 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11675 /* Calculate the value that SRC_REG should have after the last iteration of
11677 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11678 0, 0, OPTAB_WIDEN);
11680 /* Emit the start of the loop. */
11681 label = gen_label_rtx ();
11682 emit_label (label);
11684 /* Emit the loop body. */
11685 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11686 interleave_factor);
11688 /* Move on to the next block. */
11689 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11690 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11692 /* Emit the loop condition. */
11693 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11694 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11696 /* Mop up any left-over bytes. */
11698 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11701 /* Emit a block move when either the source or destination is unaligned (not
11702 aligned to a four-byte boundary). This may need further tuning depending on
11703 core type, optimize_size setting, etc. */
11706 arm_movmemqi_unaligned (rtx *operands)
11708 HOST_WIDE_INT length = INTVAL (operands[2]);
11712 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11713 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11714 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11715 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11716 or dst_aligned though: allow more interleaving in those cases since the
11717 resulting code can be smaller. */
11718 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11719 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11722 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11723 interleave_factor, bytes_per_iter);
11725 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11726 interleave_factor);
11730 /* Note that the loop created by arm_block_move_unaligned_loop may be
11731 subject to loop unrolling, which makes tuning this condition a little
11734 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11736 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11743 arm_gen_movmemqi (rtx *operands)
11745 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11746 HOST_WIDE_INT srcoffset, dstoffset;
11748 rtx src, dst, srcbase, dstbase;
11749 rtx part_bytes_reg = NULL;
11752 if (!CONST_INT_P (operands[2])
11753 || !CONST_INT_P (operands[3])
11754 || INTVAL (operands[2]) > 64)
11757 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11758 return arm_movmemqi_unaligned (operands);
11760 if (INTVAL (operands[3]) & 3)
11763 dstbase = operands[0];
11764 srcbase = operands[1];
11766 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11767 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11769 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11770 out_words_to_go = INTVAL (operands[2]) / 4;
11771 last_bytes = INTVAL (operands[2]) & 3;
11772 dstoffset = srcoffset = 0;
11774 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11775 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11777 for (i = 0; in_words_to_go >= 2; i+=4)
11779 if (in_words_to_go > 4)
11780 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11781 TRUE, srcbase, &srcoffset));
11783 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11784 src, FALSE, srcbase,
11787 if (out_words_to_go)
11789 if (out_words_to_go > 4)
11790 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11791 TRUE, dstbase, &dstoffset));
11792 else if (out_words_to_go != 1)
11793 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11794 out_words_to_go, dst,
11797 dstbase, &dstoffset));
11800 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11801 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11802 if (last_bytes != 0)
11804 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11810 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11811 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11814 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11815 if (out_words_to_go)
11819 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11820 sreg = copy_to_reg (mem);
11822 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11823 emit_move_insn (mem, sreg);
11826 gcc_assert (!in_words_to_go); /* Sanity check */
11829 if (in_words_to_go)
11831 gcc_assert (in_words_to_go > 0);
11833 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11834 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11837 gcc_assert (!last_bytes || part_bytes_reg);
11839 if (BYTES_BIG_ENDIAN && last_bytes)
11841 rtx tmp = gen_reg_rtx (SImode);
11843 /* The bytes we want are in the top end of the word. */
11844 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11845 GEN_INT (8 * (4 - last_bytes))));
11846 part_bytes_reg = tmp;
11850 mem = adjust_automodify_address (dstbase, QImode,
11851 plus_constant (Pmode, dst,
11853 dstoffset + last_bytes - 1);
11854 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11858 tmp = gen_reg_rtx (SImode);
11859 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11860 part_bytes_reg = tmp;
11867 if (last_bytes > 1)
11869 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11870 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11874 rtx tmp = gen_reg_rtx (SImode);
11875 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11876 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11877 part_bytes_reg = tmp;
11884 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11885 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11892 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
11895 next_consecutive_mem (rtx mem)
11897 enum machine_mode mode = GET_MODE (mem);
11898 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
11899 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
11901 return adjust_automodify_address (mem, mode, addr, offset);
11904 /* Copy using LDRD/STRD instructions whenever possible.
11905 Returns true upon success. */
11907 gen_movmem_ldrd_strd (rtx *operands)
11909 unsigned HOST_WIDE_INT len;
11910 HOST_WIDE_INT align;
11911 rtx src, dst, base;
11913 bool src_aligned, dst_aligned;
11914 bool src_volatile, dst_volatile;
11916 gcc_assert (CONST_INT_P (operands[2]));
11917 gcc_assert (CONST_INT_P (operands[3]));
11919 len = UINTVAL (operands[2]);
11923 /* Maximum alignment we can assume for both src and dst buffers. */
11924 align = INTVAL (operands[3]);
11926 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
11929 /* Place src and dst addresses in registers
11930 and update the corresponding mem rtx. */
11932 dst_volatile = MEM_VOLATILE_P (dst);
11933 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
11934 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
11935 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
11938 src_volatile = MEM_VOLATILE_P (src);
11939 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
11940 base = copy_to_mode_reg (SImode, XEXP (src, 0));
11941 src = adjust_automodify_address (src, VOIDmode, base, 0);
11943 if (!unaligned_access && !(src_aligned && dst_aligned))
11946 if (src_volatile || dst_volatile)
11949 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
11950 if (!(dst_aligned || src_aligned))
11951 return arm_gen_movmemqi (operands);
11953 src = adjust_address (src, DImode, 0);
11954 dst = adjust_address (dst, DImode, 0);
11958 reg0 = gen_reg_rtx (DImode);
11960 emit_move_insn (reg0, src);
11962 emit_insn (gen_unaligned_loaddi (reg0, src));
11965 emit_move_insn (dst, reg0);
11967 emit_insn (gen_unaligned_storedi (dst, reg0));
11969 src = next_consecutive_mem (src);
11970 dst = next_consecutive_mem (dst);
11973 gcc_assert (len < 8);
11976 /* More than a word but less than a double-word to copy. Copy a word. */
11977 reg0 = gen_reg_rtx (SImode);
11978 src = adjust_address (src, SImode, 0);
11979 dst = adjust_address (dst, SImode, 0);
11981 emit_move_insn (reg0, src);
11983 emit_insn (gen_unaligned_loadsi (reg0, src));
11986 emit_move_insn (dst, reg0);
11988 emit_insn (gen_unaligned_storesi (dst, reg0));
11990 src = next_consecutive_mem (src);
11991 dst = next_consecutive_mem (dst);
11998 /* Copy the remaining bytes. */
12001 dst = adjust_address (dst, HImode, 0);
12002 src = adjust_address (src, HImode, 0);
12003 reg0 = gen_reg_rtx (SImode);
12004 emit_insn (gen_unaligned_loadhiu (reg0, src));
12005 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
12006 src = next_consecutive_mem (src);
12007 dst = next_consecutive_mem (dst);
12012 dst = adjust_address (dst, QImode, 0);
12013 src = adjust_address (src, QImode, 0);
12014 reg0 = gen_reg_rtx (QImode);
12015 emit_move_insn (reg0, src);
12016 emit_move_insn (dst, reg0);
12020 /* Select a dominance comparison mode if possible for a test of the general
12021 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
12022 COND_OR == DOM_CC_X_AND_Y => (X && Y)
12023 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
12024 COND_OR == DOM_CC_X_OR_Y => (X || Y)
12025 In all cases OP will be either EQ or NE, but we don't need to know which
12026 here. If we are unable to support a dominance comparison we return
12027 CC mode. This will then fail to match for the RTL expressions that
12028 generate this call. */
12030 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
12032 enum rtx_code cond1, cond2;
12035 /* Currently we will probably get the wrong result if the individual
12036 comparisons are not simple. This also ensures that it is safe to
12037 reverse a comparison if necessary. */
12038 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
12040 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
12044 /* The if_then_else variant of this tests the second condition if the
12045 first passes, but is true if the first fails. Reverse the first
12046 condition to get a true "inclusive-or" expression. */
12047 if (cond_or == DOM_CC_NX_OR_Y)
12048 cond1 = reverse_condition (cond1);
12050 /* If the comparisons are not equal, and one doesn't dominate the other,
12051 then we can't do this. */
12053 && !comparison_dominates_p (cond1, cond2)
12054 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
12059 enum rtx_code temp = cond1;
12067 if (cond_or == DOM_CC_X_AND_Y)
12072 case EQ: return CC_DEQmode;
12073 case LE: return CC_DLEmode;
12074 case LEU: return CC_DLEUmode;
12075 case GE: return CC_DGEmode;
12076 case GEU: return CC_DGEUmode;
12077 default: gcc_unreachable ();
12081 if (cond_or == DOM_CC_X_AND_Y)
12093 gcc_unreachable ();
12097 if (cond_or == DOM_CC_X_AND_Y)
12109 gcc_unreachable ();
12113 if (cond_or == DOM_CC_X_AND_Y)
12114 return CC_DLTUmode;
12119 return CC_DLTUmode;
12121 return CC_DLEUmode;
12125 gcc_unreachable ();
12129 if (cond_or == DOM_CC_X_AND_Y)
12130 return CC_DGTUmode;
12135 return CC_DGTUmode;
12137 return CC_DGEUmode;
12141 gcc_unreachable ();
12144 /* The remaining cases only occur when both comparisons are the
12147 gcc_assert (cond1 == cond2);
12151 gcc_assert (cond1 == cond2);
12155 gcc_assert (cond1 == cond2);
12159 gcc_assert (cond1 == cond2);
12160 return CC_DLEUmode;
12163 gcc_assert (cond1 == cond2);
12164 return CC_DGEUmode;
12167 gcc_unreachable ();
12172 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
12174 /* All floating point compares return CCFP if it is an equality
12175 comparison, and CCFPE otherwise. */
12176 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12199 gcc_unreachable ();
12203 /* A compare with a shifted operand. Because of canonicalization, the
12204 comparison will have to be swapped when we emit the assembler. */
12205 if (GET_MODE (y) == SImode
12206 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12207 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12208 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12209 || GET_CODE (x) == ROTATERT))
12212 /* This operation is performed swapped, but since we only rely on the Z
12213 flag we don't need an additional mode. */
12214 if (GET_MODE (y) == SImode
12215 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12216 && GET_CODE (x) == NEG
12217 && (op == EQ || op == NE))
12220 /* This is a special case that is used by combine to allow a
12221 comparison of a shifted byte load to be split into a zero-extend
12222 followed by a comparison of the shifted integer (only valid for
12223 equalities and unsigned inequalities). */
12224 if (GET_MODE (x) == SImode
12225 && GET_CODE (x) == ASHIFT
12226 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12227 && GET_CODE (XEXP (x, 0)) == SUBREG
12228 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12229 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12230 && (op == EQ || op == NE
12231 || op == GEU || op == GTU || op == LTU || op == LEU)
12232 && CONST_INT_P (y))
12235 /* A construct for a conditional compare, if the false arm contains
12236 0, then both conditions must be true, otherwise either condition
12237 must be true. Not all conditions are possible, so CCmode is
12238 returned if it can't be done. */
12239 if (GET_CODE (x) == IF_THEN_ELSE
12240 && (XEXP (x, 2) == const0_rtx
12241 || XEXP (x, 2) == const1_rtx)
12242 && COMPARISON_P (XEXP (x, 0))
12243 && COMPARISON_P (XEXP (x, 1)))
12244 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12245 INTVAL (XEXP (x, 2)));
12247 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12248 if (GET_CODE (x) == AND
12249 && (op == EQ || op == NE)
12250 && COMPARISON_P (XEXP (x, 0))
12251 && COMPARISON_P (XEXP (x, 1)))
12252 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12255 if (GET_CODE (x) == IOR
12256 && (op == EQ || op == NE)
12257 && COMPARISON_P (XEXP (x, 0))
12258 && COMPARISON_P (XEXP (x, 1)))
12259 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12262 /* An operation (on Thumb) where we want to test for a single bit.
12263 This is done by shifting that bit up into the top bit of a
12264 scratch register; we can then branch on the sign bit. */
12266 && GET_MODE (x) == SImode
12267 && (op == EQ || op == NE)
12268 && GET_CODE (x) == ZERO_EXTRACT
12269 && XEXP (x, 1) == const1_rtx)
12272 /* An operation that sets the condition codes as a side-effect, the
12273 V flag is not set correctly, so we can only use comparisons where
12274 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12276 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12277 if (GET_MODE (x) == SImode
12279 && (op == EQ || op == NE || op == LT || op == GE)
12280 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12281 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12282 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12283 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12284 || GET_CODE (x) == LSHIFTRT
12285 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12286 || GET_CODE (x) == ROTATERT
12287 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12288 return CC_NOOVmode;
12290 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12293 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12294 && GET_CODE (x) == PLUS
12295 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12298 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12304 /* A DImode comparison against zero can be implemented by
12305 or'ing the two halves together. */
12306 if (y == const0_rtx)
12309 /* We can do an equality test in three Thumb instructions. */
12319 /* DImode unsigned comparisons can be implemented by cmp +
12320 cmpeq without a scratch register. Not worth doing in
12331 /* DImode signed and unsigned comparisons can be implemented
12332 by cmp + sbcs with a scratch register, but that does not
12333 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12334 gcc_assert (op != EQ && op != NE);
12338 gcc_unreachable ();
12342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12343 return GET_MODE (x);
12348 /* X and Y are two things to compare using CODE. Emit the compare insn and
12349 return the rtx for register 0 in the proper mode. FP means this is a
12350 floating point compare: I don't think that it is needed on the arm. */
12352 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12354 enum machine_mode mode;
12356 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12358 /* We might have X as a constant, Y as a register because of the predicates
12359 used for cmpdi. If so, force X to a register here. */
12360 if (dimode_comparison && !REG_P (x))
12361 x = force_reg (DImode, x);
12363 mode = SELECT_CC_MODE (code, x, y);
12364 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12366 if (dimode_comparison
12367 && mode != CC_CZmode)
12371 /* To compare two non-zero values for equality, XOR them and
12372 then compare against zero. Not used for ARM mode; there
12373 CC_CZmode is cheaper. */
12374 if (mode == CC_Zmode && y != const0_rtx)
12376 gcc_assert (!reload_completed);
12377 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12381 /* A scratch register is required. */
12382 if (reload_completed)
12383 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12385 scratch = gen_rtx_SCRATCH (SImode);
12387 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12388 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12389 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12392 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12397 /* Generate a sequence of insns that will generate the correct return
12398 address mask depending on the physical architecture that the program
12401 arm_gen_return_addr_mask (void)
12403 rtx reg = gen_reg_rtx (Pmode);
12405 emit_insn (gen_return_addr_mask (reg));
12410 arm_reload_in_hi (rtx *operands)
12412 rtx ref = operands[1];
12414 HOST_WIDE_INT offset = 0;
12416 if (GET_CODE (ref) == SUBREG)
12418 offset = SUBREG_BYTE (ref);
12419 ref = SUBREG_REG (ref);
12424 /* We have a pseudo which has been spilt onto the stack; there
12425 are two cases here: the first where there is a simple
12426 stack-slot replacement and a second where the stack-slot is
12427 out of range, or is used as a subreg. */
12428 if (reg_equiv_mem (REGNO (ref)))
12430 ref = reg_equiv_mem (REGNO (ref));
12431 base = find_replacement (&XEXP (ref, 0));
12434 /* The slot is out of range, or was dressed up in a SUBREG. */
12435 base = reg_equiv_address (REGNO (ref));
12438 base = find_replacement (&XEXP (ref, 0));
12440 /* Handle the case where the address is too complex to be offset by 1. */
12441 if (GET_CODE (base) == MINUS
12442 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12444 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12446 emit_set_insn (base_plus, base);
12449 else if (GET_CODE (base) == PLUS)
12451 /* The addend must be CONST_INT, or we would have dealt with it above. */
12452 HOST_WIDE_INT hi, lo;
12454 offset += INTVAL (XEXP (base, 1));
12455 base = XEXP (base, 0);
12457 /* Rework the address into a legal sequence of insns. */
12458 /* Valid range for lo is -4095 -> 4095 */
12461 : -((-offset) & 0xfff));
12463 /* Corner case, if lo is the max offset then we would be out of range
12464 once we have added the additional 1 below, so bump the msb into the
12465 pre-loading insn(s). */
12469 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12470 ^ (HOST_WIDE_INT) 0x80000000)
12471 - (HOST_WIDE_INT) 0x80000000);
12473 gcc_assert (hi + lo == offset);
12477 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12479 /* Get the base address; addsi3 knows how to handle constants
12480 that require more than one insn. */
12481 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12487 /* Operands[2] may overlap operands[0] (though it won't overlap
12488 operands[1]), that's why we asked for a DImode reg -- so we can
12489 use the bit that does not overlap. */
12490 if (REGNO (operands[2]) == REGNO (operands[0]))
12491 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12493 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12495 emit_insn (gen_zero_extendqisi2 (scratch,
12496 gen_rtx_MEM (QImode,
12497 plus_constant (Pmode, base,
12499 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12500 gen_rtx_MEM (QImode,
12501 plus_constant (Pmode, base,
12503 if (!BYTES_BIG_ENDIAN)
12504 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12505 gen_rtx_IOR (SImode,
12508 gen_rtx_SUBREG (SImode, operands[0], 0),
12512 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12513 gen_rtx_IOR (SImode,
12514 gen_rtx_ASHIFT (SImode, scratch,
12516 gen_rtx_SUBREG (SImode, operands[0], 0)));
12519 /* Handle storing a half-word to memory during reload by synthesizing as two
12520 byte stores. Take care not to clobber the input values until after we
12521 have moved them somewhere safe. This code assumes that if the DImode
12522 scratch in operands[2] overlaps either the input value or output address
12523 in some way, then that value must die in this insn (we absolutely need
12524 two scratch registers for some corner cases). */
12526 arm_reload_out_hi (rtx *operands)
12528 rtx ref = operands[0];
12529 rtx outval = operands[1];
12531 HOST_WIDE_INT offset = 0;
12533 if (GET_CODE (ref) == SUBREG)
12535 offset = SUBREG_BYTE (ref);
12536 ref = SUBREG_REG (ref);
12541 /* We have a pseudo which has been spilt onto the stack; there
12542 are two cases here: the first where there is a simple
12543 stack-slot replacement and a second where the stack-slot is
12544 out of range, or is used as a subreg. */
12545 if (reg_equiv_mem (REGNO (ref)))
12547 ref = reg_equiv_mem (REGNO (ref));
12548 base = find_replacement (&XEXP (ref, 0));
12551 /* The slot is out of range, or was dressed up in a SUBREG. */
12552 base = reg_equiv_address (REGNO (ref));
12555 base = find_replacement (&XEXP (ref, 0));
12557 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12559 /* Handle the case where the address is too complex to be offset by 1. */
12560 if (GET_CODE (base) == MINUS
12561 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12563 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12565 /* Be careful not to destroy OUTVAL. */
12566 if (reg_overlap_mentioned_p (base_plus, outval))
12568 /* Updating base_plus might destroy outval, see if we can
12569 swap the scratch and base_plus. */
12570 if (!reg_overlap_mentioned_p (scratch, outval))
12573 scratch = base_plus;
12578 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12580 /* Be conservative and copy OUTVAL into the scratch now,
12581 this should only be necessary if outval is a subreg
12582 of something larger than a word. */
12583 /* XXX Might this clobber base? I can't see how it can,
12584 since scratch is known to overlap with OUTVAL, and
12585 must be wider than a word. */
12586 emit_insn (gen_movhi (scratch_hi, outval));
12587 outval = scratch_hi;
12591 emit_set_insn (base_plus, base);
12594 else if (GET_CODE (base) == PLUS)
12596 /* The addend must be CONST_INT, or we would have dealt with it above. */
12597 HOST_WIDE_INT hi, lo;
12599 offset += INTVAL (XEXP (base, 1));
12600 base = XEXP (base, 0);
12602 /* Rework the address into a legal sequence of insns. */
12603 /* Valid range for lo is -4095 -> 4095 */
12606 : -((-offset) & 0xfff));
12608 /* Corner case, if lo is the max offset then we would be out of range
12609 once we have added the additional 1 below, so bump the msb into the
12610 pre-loading insn(s). */
12614 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12615 ^ (HOST_WIDE_INT) 0x80000000)
12616 - (HOST_WIDE_INT) 0x80000000);
12618 gcc_assert (hi + lo == offset);
12622 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12624 /* Be careful not to destroy OUTVAL. */
12625 if (reg_overlap_mentioned_p (base_plus, outval))
12627 /* Updating base_plus might destroy outval, see if we
12628 can swap the scratch and base_plus. */
12629 if (!reg_overlap_mentioned_p (scratch, outval))
12632 scratch = base_plus;
12637 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12639 /* Be conservative and copy outval into scratch now,
12640 this should only be necessary if outval is a
12641 subreg of something larger than a word. */
12642 /* XXX Might this clobber base? I can't see how it
12643 can, since scratch is known to overlap with
12645 emit_insn (gen_movhi (scratch_hi, outval));
12646 outval = scratch_hi;
12650 /* Get the base address; addsi3 knows how to handle constants
12651 that require more than one insn. */
12652 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12658 if (BYTES_BIG_ENDIAN)
12660 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12661 plus_constant (Pmode, base,
12663 gen_lowpart (QImode, outval)));
12664 emit_insn (gen_lshrsi3 (scratch,
12665 gen_rtx_SUBREG (SImode, outval, 0),
12667 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12669 gen_lowpart (QImode, scratch)));
12673 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12675 gen_lowpart (QImode, outval)));
12676 emit_insn (gen_lshrsi3 (scratch,
12677 gen_rtx_SUBREG (SImode, outval, 0),
12679 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12680 plus_constant (Pmode, base,
12682 gen_lowpart (QImode, scratch)));
12686 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12687 (padded to the size of a word) should be passed in a register. */
12690 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12692 if (TARGET_AAPCS_BASED)
12693 return must_pass_in_stack_var_size (mode, type);
12695 return must_pass_in_stack_var_size_or_pad (mode, type);
12699 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12700 Return true if an argument passed on the stack should be padded upwards,
12701 i.e. if the least-significant byte has useful data.
12702 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12703 aggregate types are placed in the lowest memory address. */
12706 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12708 if (!TARGET_AAPCS_BASED)
12709 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12711 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12718 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12719 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12720 register has useful data, and return the opposite if the most
12721 significant byte does. */
12724 arm_pad_reg_upward (enum machine_mode mode,
12725 tree type, int first ATTRIBUTE_UNUSED)
12727 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12729 /* For AAPCS, small aggregates, small fixed-point types,
12730 and small complex types are always padded upwards. */
12733 if ((AGGREGATE_TYPE_P (type)
12734 || TREE_CODE (type) == COMPLEX_TYPE
12735 || FIXED_POINT_TYPE_P (type))
12736 && int_size_in_bytes (type) <= 4)
12741 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12742 && GET_MODE_SIZE (mode) <= 4)
12747 /* Otherwise, use default padding. */
12748 return !BYTES_BIG_ENDIAN;
12751 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12752 assuming that the address in the base register is word aligned. */
12754 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12756 HOST_WIDE_INT max_offset;
12758 /* Offset must be a multiple of 4 in Thumb mode. */
12759 if (TARGET_THUMB2 && ((offset & 3) != 0))
12764 else if (TARGET_ARM)
12769 return ((offset <= max_offset) && (offset >= -max_offset));
12772 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12773 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12774 Assumes that the address in the base register RN is word aligned. Pattern
12775 guarantees that both memory accesses use the same base register,
12776 the offsets are constants within the range, and the gap between the offsets is 4.
12777 If preload complete then check that registers are legal. WBACK indicates whether
12778 address is updated. LOAD indicates whether memory access is load or store. */
12780 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12781 bool wback, bool load)
12783 unsigned int t, t2, n;
12785 if (!reload_completed)
12788 if (!offset_ok_for_ldrd_strd (offset))
12795 if ((TARGET_THUMB2)
12796 && ((wback && (n == t || n == t2))
12797 || (t == SP_REGNUM)
12798 || (t == PC_REGNUM)
12799 || (t2 == SP_REGNUM)
12800 || (t2 == PC_REGNUM)
12801 || (!load && (n == PC_REGNUM))
12802 || (load && (t == t2))
12803 /* Triggers Cortex-M3 LDRD errata. */
12804 || (!wback && load && fix_cm3_ldrd && (n == t))))
12808 && ((wback && (n == t || n == t2))
12809 || (t2 == PC_REGNUM)
12810 || (t % 2 != 0) /* First destination register is not even. */
12812 /* PC can be used as base register (for offset addressing only),
12813 but it is depricated. */
12814 || (n == PC_REGNUM)))
12820 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
12821 operand ADDR is an immediate offset from the base register and is
12822 not volatile, in which case it sets BASE and OFFSET
12825 mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
12827 /* TODO: Handle more general memory operand patterns, such as
12828 PRE_DEC and PRE_INC. */
12830 /* Convert a subreg of mem into mem itself. */
12831 if (GET_CODE (addr) == SUBREG)
12832 addr = alter_subreg (&addr, true);
12834 gcc_assert (MEM_P (addr));
12836 /* Don't modify volatile memory accesses. */
12837 if (MEM_VOLATILE_P (addr))
12840 *offset = const0_rtx;
12842 addr = XEXP (addr, 0);
12848 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
12850 *base = XEXP (addr, 0);
12851 *offset = XEXP (addr, 1);
12852 return (REG_P (*base) && CONST_INT_P (*offset));
12858 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
12860 /* Called from a peephole2 to replace two word-size accesses with a
12861 single LDRD/STRD instruction. Returns true iff we can generate a
12862 new instruction sequence. That is, both accesses use the same base
12863 register and the gap between constant offsets is 4. This function
12864 may reorder its operands to match ldrd/strd RTL templates.
12865 OPERANDS are the operands found by the peephole matcher;
12866 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
12867 corresponding memory operands. LOAD indicaates whether the access
12868 is load or store. CONST_STORE indicates a store of constant
12869 integer values held in OPERANDS[4,5] and assumes that the pattern
12870 is of length 4 insn, for the purpose of checking dead registers.
12871 COMMUTE indicates that register operands may be reordered. */
12873 gen_operands_ldrd_strd (rtx *operands, bool load,
12874 bool const_store, bool commute)
12877 HOST_WIDE_INT offsets[2], offset;
12878 rtx base = NULL_RTX;
12879 rtx cur_base, cur_offset, tmp;
12881 HARD_REG_SET regset;
12883 gcc_assert (!const_store || !load);
12884 /* Check that the memory references are immediate offsets from the
12885 same base register. Extract the base register, the destination
12886 registers, and the corresponding memory offsets. */
12887 for (i = 0; i < nops; i++)
12889 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
12894 else if (REGNO (base) != REGNO (cur_base))
12897 offsets[i] = INTVAL (cur_offset);
12898 if (GET_CODE (operands[i]) == SUBREG)
12900 tmp = SUBREG_REG (operands[i]);
12901 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
12906 /* Make sure there is no dependency between the individual loads. */
12907 if (load && REGNO (operands[0]) == REGNO (base))
12908 return false; /* RAW */
12910 if (load && REGNO (operands[0]) == REGNO (operands[1]))
12911 return false; /* WAW */
12913 /* If the same input register is used in both stores
12914 when storing different constants, try to find a free register.
12915 For example, the code
12920 can be transformed into
12923 in Thumb mode assuming that r1 is free. */
12925 && REGNO (operands[0]) == REGNO (operands[1])
12926 && INTVAL (operands[4]) != INTVAL (operands[5]))
12930 CLEAR_HARD_REG_SET (regset);
12931 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
12932 if (tmp == NULL_RTX)
12935 /* Use the new register in the first load to ensure that
12936 if the original input register is not dead after peephole,
12937 then it will have the correct constant value. */
12940 else if (TARGET_ARM)
12943 int regno = REGNO (operands[0]);
12944 if (!peep2_reg_dead_p (4, operands[0]))
12946 /* When the input register is even and is not dead after the
12947 pattern, it has to hold the second constant but we cannot
12948 form a legal STRD in ARM mode with this register as the second
12950 if (regno % 2 == 0)
12953 /* Is regno-1 free? */
12954 SET_HARD_REG_SET (regset);
12955 CLEAR_HARD_REG_BIT(regset, regno - 1);
12956 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
12957 if (tmp == NULL_RTX)
12964 /* Find a DImode register. */
12965 CLEAR_HARD_REG_SET (regset);
12966 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
12967 if (tmp != NULL_RTX)
12969 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
12970 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
12974 /* Can we use the input register to form a DI register? */
12975 SET_HARD_REG_SET (regset);
12976 CLEAR_HARD_REG_BIT(regset,
12977 regno % 2 == 0 ? regno + 1 : regno - 1);
12978 tmp = peep2_find_free_register (0, 4, "r", SImode, ®set);
12979 if (tmp == NULL_RTX)
12981 operands[regno % 2 == 1 ? 0 : 1] = tmp;
12985 gcc_assert (operands[0] != NULL_RTX);
12986 gcc_assert (operands[1] != NULL_RTX);
12987 gcc_assert (REGNO (operands[0]) % 2 == 0);
12988 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
12992 /* Make sure the instructions are ordered with lower memory access first. */
12993 if (offsets[0] > offsets[1])
12995 gap = offsets[0] - offsets[1];
12996 offset = offsets[1];
12998 /* Swap the instructions such that lower memory is accessed first. */
12999 SWAP_RTX (operands[0], operands[1]);
13000 SWAP_RTX (operands[2], operands[3]);
13002 SWAP_RTX (operands[4], operands[5]);
13006 gap = offsets[1] - offsets[0];
13007 offset = offsets[0];
13010 /* Make sure accesses are to consecutive memory locations. */
13014 /* Make sure we generate legal instructions. */
13015 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13019 /* In Thumb state, where registers are almost unconstrained, there
13020 is little hope to fix it. */
13024 if (load && commute)
13026 /* Try reordering registers. */
13027 SWAP_RTX (operands[0], operands[1]);
13028 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13035 /* If input registers are dead after this pattern, they can be
13036 reordered or replaced by other registers that are free in the
13037 current pattern. */
13038 if (!peep2_reg_dead_p (4, operands[0])
13039 || !peep2_reg_dead_p (4, operands[1]))
13042 /* Try to reorder the input registers. */
13043 /* For example, the code
13048 can be transformed into
13053 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
13056 SWAP_RTX (operands[0], operands[1]);
13060 /* Try to find a free DI register. */
13061 CLEAR_HARD_REG_SET (regset);
13062 add_to_hard_reg_set (®set, SImode, REGNO (operands[0]));
13063 add_to_hard_reg_set (®set, SImode, REGNO (operands[1]));
13066 tmp = peep2_find_free_register (0, 4, "r", DImode, ®set);
13067 if (tmp == NULL_RTX)
13070 /* DREG must be an even-numbered register in DImode.
13071 Split it into SI registers. */
13072 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13073 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13074 gcc_assert (operands[0] != NULL_RTX);
13075 gcc_assert (operands[1] != NULL_RTX);
13076 gcc_assert (REGNO (operands[0]) % 2 == 0);
13077 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
13079 return (operands_ok_ldrd_strd (operands[0], operands[1],
13092 /* Print a symbolic form of X to the debug file, F. */
13094 arm_print_value (FILE *f, rtx x)
13096 switch (GET_CODE (x))
13099 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
13103 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
13111 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
13113 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
13114 if (i < (CONST_VECTOR_NUNITS (x) - 1))
13122 fprintf (f, "\"%s\"", XSTR (x, 0));
13126 fprintf (f, "`%s'", XSTR (x, 0));
13130 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
13134 arm_print_value (f, XEXP (x, 0));
13138 arm_print_value (f, XEXP (x, 0));
13140 arm_print_value (f, XEXP (x, 1));
13148 fprintf (f, "????");
13153 /* Routines for manipulation of the constant pool. */
13155 /* Arm instructions cannot load a large constant directly into a
13156 register; they have to come from a pc relative load. The constant
13157 must therefore be placed in the addressable range of the pc
13158 relative load. Depending on the precise pc relative load
13159 instruction the range is somewhere between 256 bytes and 4k. This
13160 means that we often have to dump a constant inside a function, and
13161 generate code to branch around it.
13163 It is important to minimize this, since the branches will slow
13164 things down and make the code larger.
13166 Normally we can hide the table after an existing unconditional
13167 branch so that there is no interruption of the flow, but in the
13168 worst case the code looks like this:
13186 We fix this by performing a scan after scheduling, which notices
13187 which instructions need to have their operands fetched from the
13188 constant table and builds the table.
13190 The algorithm starts by building a table of all the constants that
13191 need fixing up and all the natural barriers in the function (places
13192 where a constant table can be dropped without breaking the flow).
13193 For each fixup we note how far the pc-relative replacement will be
13194 able to reach and the offset of the instruction into the function.
13196 Having built the table we then group the fixes together to form
13197 tables that are as large as possible (subject to addressing
13198 constraints) and emit each table of constants after the last
13199 barrier that is within range of all the instructions in the group.
13200 If a group does not contain a barrier, then we forcibly create one
13201 by inserting a jump instruction into the flow. Once the table has
13202 been inserted, the insns are then modified to reference the
13203 relevant entry in the pool.
13205 Possible enhancements to the algorithm (not implemented) are:
13207 1) For some processors and object formats, there may be benefit in
13208 aligning the pools to the start of cache lines; this alignment
13209 would need to be taken into account when calculating addressability
13212 /* These typedefs are located at the start of this file, so that
13213 they can be used in the prototypes there. This comment is to
13214 remind readers of that fact so that the following structures
13215 can be understood more easily.
13217 typedef struct minipool_node Mnode;
13218 typedef struct minipool_fixup Mfix; */
13220 struct minipool_node
13222 /* Doubly linked chain of entries. */
13225 /* The maximum offset into the code that this entry can be placed. While
13226 pushing fixes for forward references, all entries are sorted in order
13227 of increasing max_address. */
13228 HOST_WIDE_INT max_address;
13229 /* Similarly for an entry inserted for a backwards ref. */
13230 HOST_WIDE_INT min_address;
13231 /* The number of fixes referencing this entry. This can become zero
13232 if we "unpush" an entry. In this case we ignore the entry when we
13233 come to emit the code. */
13235 /* The offset from the start of the minipool. */
13236 HOST_WIDE_INT offset;
13237 /* The value in table. */
13239 /* The mode of value. */
13240 enum machine_mode mode;
13241 /* The size of the value. With iWMMXt enabled
13242 sizes > 4 also imply an alignment of 8-bytes. */
13246 struct minipool_fixup
13250 HOST_WIDE_INT address;
13252 enum machine_mode mode;
13256 HOST_WIDE_INT forwards;
13257 HOST_WIDE_INT backwards;
13260 /* Fixes less than a word need padding out to a word boundary. */
13261 #define MINIPOOL_FIX_SIZE(mode) \
13262 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
13264 static Mnode * minipool_vector_head;
13265 static Mnode * minipool_vector_tail;
13266 static rtx minipool_vector_label;
13267 static int minipool_pad;
13269 /* The linked list of all minipool fixes required for this function. */
13270 Mfix * minipool_fix_head;
13271 Mfix * minipool_fix_tail;
13272 /* The fix entry for the current minipool, once it has been placed. */
13273 Mfix * minipool_barrier;
13275 /* Determines if INSN is the start of a jump table. Returns the end
13276 of the TABLE or NULL_RTX. */
13278 is_jump_table (rtx insn)
13282 if (jump_to_label_p (insn)
13283 && ((table = next_active_insn (JUMP_LABEL (insn)))
13284 == next_active_insn (insn))
13286 && JUMP_TABLE_DATA_P (table))
13292 #ifndef JUMP_TABLES_IN_TEXT_SECTION
13293 #define JUMP_TABLES_IN_TEXT_SECTION 0
13296 static HOST_WIDE_INT
13297 get_jump_table_size (rtx insn)
13299 /* ADDR_VECs only take room if read-only data does into the text
13301 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
13303 rtx body = PATTERN (insn);
13304 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
13305 HOST_WIDE_INT size;
13306 HOST_WIDE_INT modesize;
13308 modesize = GET_MODE_SIZE (GET_MODE (body));
13309 size = modesize * XVECLEN (body, elt);
13313 /* Round up size of TBB table to a halfword boundary. */
13314 size = (size + 1) & ~(HOST_WIDE_INT)1;
13317 /* No padding necessary for TBH. */
13320 /* Add two bytes for alignment on Thumb. */
13325 gcc_unreachable ();
13333 /* Return the maximum amount of padding that will be inserted before
13336 static HOST_WIDE_INT
13337 get_label_padding (rtx label)
13339 HOST_WIDE_INT align, min_insn_size;
13341 align = 1 << label_to_alignment (label);
13342 min_insn_size = TARGET_THUMB ? 2 : 4;
13343 return align > min_insn_size ? align - min_insn_size : 0;
13346 /* Move a minipool fix MP from its current location to before MAX_MP.
13347 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
13348 constraints may need updating. */
13350 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
13351 HOST_WIDE_INT max_address)
13353 /* The code below assumes these are different. */
13354 gcc_assert (mp != max_mp);
13356 if (max_mp == NULL)
13358 if (max_address < mp->max_address)
13359 mp->max_address = max_address;
13363 if (max_address > max_mp->max_address - mp->fix_size)
13364 mp->max_address = max_mp->max_address - mp->fix_size;
13366 mp->max_address = max_address;
13368 /* Unlink MP from its current position. Since max_mp is non-null,
13369 mp->prev must be non-null. */
13370 mp->prev->next = mp->next;
13371 if (mp->next != NULL)
13372 mp->next->prev = mp->prev;
13374 minipool_vector_tail = mp->prev;
13376 /* Re-insert it before MAX_MP. */
13378 mp->prev = max_mp->prev;
13381 if (mp->prev != NULL)
13382 mp->prev->next = mp;
13384 minipool_vector_head = mp;
13387 /* Save the new entry. */
13390 /* Scan over the preceding entries and adjust their addresses as
13392 while (mp->prev != NULL
13393 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13395 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13402 /* Add a constant to the minipool for a forward reference. Returns the
13403 node added or NULL if the constant will not fit in this pool. */
13405 add_minipool_forward_ref (Mfix *fix)
13407 /* If set, max_mp is the first pool_entry that has a lower
13408 constraint than the one we are trying to add. */
13409 Mnode * max_mp = NULL;
13410 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
13413 /* If the minipool starts before the end of FIX->INSN then this FIX
13414 can not be placed into the current pool. Furthermore, adding the
13415 new constant pool entry may cause the pool to start FIX_SIZE bytes
13417 if (minipool_vector_head &&
13418 (fix->address + get_attr_length (fix->insn)
13419 >= minipool_vector_head->max_address - fix->fix_size))
13422 /* Scan the pool to see if a constant with the same value has
13423 already been added. While we are doing this, also note the
13424 location where we must insert the constant if it doesn't already
13426 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13428 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13429 && fix->mode == mp->mode
13430 && (!LABEL_P (fix->value)
13431 || (CODE_LABEL_NUMBER (fix->value)
13432 == CODE_LABEL_NUMBER (mp->value)))
13433 && rtx_equal_p (fix->value, mp->value))
13435 /* More than one fix references this entry. */
13437 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
13440 /* Note the insertion point if necessary. */
13442 && mp->max_address > max_address)
13445 /* If we are inserting an 8-bytes aligned quantity and
13446 we have not already found an insertion point, then
13447 make sure that all such 8-byte aligned quantities are
13448 placed at the start of the pool. */
13449 if (ARM_DOUBLEWORD_ALIGN
13451 && fix->fix_size >= 8
13452 && mp->fix_size < 8)
13455 max_address = mp->max_address;
13459 /* The value is not currently in the minipool, so we need to create
13460 a new entry for it. If MAX_MP is NULL, the entry will be put on
13461 the end of the list since the placement is less constrained than
13462 any existing entry. Otherwise, we insert the new fix before
13463 MAX_MP and, if necessary, adjust the constraints on the other
13466 mp->fix_size = fix->fix_size;
13467 mp->mode = fix->mode;
13468 mp->value = fix->value;
13470 /* Not yet required for a backwards ref. */
13471 mp->min_address = -65536;
13473 if (max_mp == NULL)
13475 mp->max_address = max_address;
13477 mp->prev = minipool_vector_tail;
13479 if (mp->prev == NULL)
13481 minipool_vector_head = mp;
13482 minipool_vector_label = gen_label_rtx ();
13485 mp->prev->next = mp;
13487 minipool_vector_tail = mp;
13491 if (max_address > max_mp->max_address - mp->fix_size)
13492 mp->max_address = max_mp->max_address - mp->fix_size;
13494 mp->max_address = max_address;
13497 mp->prev = max_mp->prev;
13499 if (mp->prev != NULL)
13500 mp->prev->next = mp;
13502 minipool_vector_head = mp;
13505 /* Save the new entry. */
13508 /* Scan over the preceding entries and adjust their addresses as
13510 while (mp->prev != NULL
13511 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13513 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13521 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13522 HOST_WIDE_INT min_address)
13524 HOST_WIDE_INT offset;
13526 /* The code below assumes these are different. */
13527 gcc_assert (mp != min_mp);
13529 if (min_mp == NULL)
13531 if (min_address > mp->min_address)
13532 mp->min_address = min_address;
13536 /* We will adjust this below if it is too loose. */
13537 mp->min_address = min_address;
13539 /* Unlink MP from its current position. Since min_mp is non-null,
13540 mp->next must be non-null. */
13541 mp->next->prev = mp->prev;
13542 if (mp->prev != NULL)
13543 mp->prev->next = mp->next;
13545 minipool_vector_head = mp->next;
13547 /* Reinsert it after MIN_MP. */
13549 mp->next = min_mp->next;
13551 if (mp->next != NULL)
13552 mp->next->prev = mp;
13554 minipool_vector_tail = mp;
13560 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13562 mp->offset = offset;
13563 if (mp->refcount > 0)
13564 offset += mp->fix_size;
13566 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13567 mp->next->min_address = mp->min_address + mp->fix_size;
13573 /* Add a constant to the minipool for a backward reference. Returns the
13574 node added or NULL if the constant will not fit in this pool.
13576 Note that the code for insertion for a backwards reference can be
13577 somewhat confusing because the calculated offsets for each fix do
13578 not take into account the size of the pool (which is still under
13581 add_minipool_backward_ref (Mfix *fix)
13583 /* If set, min_mp is the last pool_entry that has a lower constraint
13584 than the one we are trying to add. */
13585 Mnode *min_mp = NULL;
13586 /* This can be negative, since it is only a constraint. */
13587 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13590 /* If we can't reach the current pool from this insn, or if we can't
13591 insert this entry at the end of the pool without pushing other
13592 fixes out of range, then we don't try. This ensures that we
13593 can't fail later on. */
13594 if (min_address >= minipool_barrier->address
13595 || (minipool_vector_tail->min_address + fix->fix_size
13596 >= minipool_barrier->address))
13599 /* Scan the pool to see if a constant with the same value has
13600 already been added. While we are doing this, also note the
13601 location where we must insert the constant if it doesn't already
13603 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13605 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13606 && fix->mode == mp->mode
13607 && (!LABEL_P (fix->value)
13608 || (CODE_LABEL_NUMBER (fix->value)
13609 == CODE_LABEL_NUMBER (mp->value)))
13610 && rtx_equal_p (fix->value, mp->value)
13611 /* Check that there is enough slack to move this entry to the
13612 end of the table (this is conservative). */
13613 && (mp->max_address
13614 > (minipool_barrier->address
13615 + minipool_vector_tail->offset
13616 + minipool_vector_tail->fix_size)))
13619 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13622 if (min_mp != NULL)
13623 mp->min_address += fix->fix_size;
13626 /* Note the insertion point if necessary. */
13627 if (mp->min_address < min_address)
13629 /* For now, we do not allow the insertion of 8-byte alignment
13630 requiring nodes anywhere but at the start of the pool. */
13631 if (ARM_DOUBLEWORD_ALIGN
13632 && fix->fix_size >= 8 && mp->fix_size < 8)
13637 else if (mp->max_address
13638 < minipool_barrier->address + mp->offset + fix->fix_size)
13640 /* Inserting before this entry would push the fix beyond
13641 its maximum address (which can happen if we have
13642 re-located a forwards fix); force the new fix to come
13644 if (ARM_DOUBLEWORD_ALIGN
13645 && fix->fix_size >= 8 && mp->fix_size < 8)
13650 min_address = mp->min_address + fix->fix_size;
13653 /* Do not insert a non-8-byte aligned quantity before 8-byte
13654 aligned quantities. */
13655 else if (ARM_DOUBLEWORD_ALIGN
13656 && fix->fix_size < 8
13657 && mp->fix_size >= 8)
13660 min_address = mp->min_address + fix->fix_size;
13665 /* We need to create a new entry. */
13667 mp->fix_size = fix->fix_size;
13668 mp->mode = fix->mode;
13669 mp->value = fix->value;
13671 mp->max_address = minipool_barrier->address + 65536;
13673 mp->min_address = min_address;
13675 if (min_mp == NULL)
13678 mp->next = minipool_vector_head;
13680 if (mp->next == NULL)
13682 minipool_vector_tail = mp;
13683 minipool_vector_label = gen_label_rtx ();
13686 mp->next->prev = mp;
13688 minipool_vector_head = mp;
13692 mp->next = min_mp->next;
13696 if (mp->next != NULL)
13697 mp->next->prev = mp;
13699 minipool_vector_tail = mp;
13702 /* Save the new entry. */
13710 /* Scan over the following entries and adjust their offsets. */
13711 while (mp->next != NULL)
13713 if (mp->next->min_address < mp->min_address + mp->fix_size)
13714 mp->next->min_address = mp->min_address + mp->fix_size;
13717 mp->next->offset = mp->offset + mp->fix_size;
13719 mp->next->offset = mp->offset;
13728 assign_minipool_offsets (Mfix *barrier)
13730 HOST_WIDE_INT offset = 0;
13733 minipool_barrier = barrier;
13735 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13737 mp->offset = offset;
13739 if (mp->refcount > 0)
13740 offset += mp->fix_size;
13744 /* Output the literal table */
13746 dump_minipool (rtx scan)
13752 if (ARM_DOUBLEWORD_ALIGN)
13753 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13754 if (mp->refcount > 0 && mp->fix_size >= 8)
13761 fprintf (dump_file,
13762 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13763 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13765 scan = emit_label_after (gen_label_rtx (), scan);
13766 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13767 scan = emit_label_after (minipool_vector_label, scan);
13769 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13771 if (mp->refcount > 0)
13775 fprintf (dump_file,
13776 ";; Offset %u, min %ld, max %ld ",
13777 (unsigned) mp->offset, (unsigned long) mp->min_address,
13778 (unsigned long) mp->max_address);
13779 arm_print_value (dump_file, mp->value);
13780 fputc ('\n', dump_file);
13783 switch (mp->fix_size)
13785 #ifdef HAVE_consttable_1
13787 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13791 #ifdef HAVE_consttable_2
13793 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13797 #ifdef HAVE_consttable_4
13799 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13803 #ifdef HAVE_consttable_8
13805 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13809 #ifdef HAVE_consttable_16
13811 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13816 gcc_unreachable ();
13824 minipool_vector_head = minipool_vector_tail = NULL;
13825 scan = emit_insn_after (gen_consttable_end (), scan);
13826 scan = emit_barrier_after (scan);
13829 /* Return the cost of forcibly inserting a barrier after INSN. */
13831 arm_barrier_cost (rtx insn)
13833 /* Basing the location of the pool on the loop depth is preferable,
13834 but at the moment, the basic block information seems to be
13835 corrupt by this stage of the compilation. */
13836 int base_cost = 50;
13837 rtx next = next_nonnote_insn (insn);
13839 if (next != NULL && LABEL_P (next))
13842 switch (GET_CODE (insn))
13845 /* It will always be better to place the table before the label, rather
13854 return base_cost - 10;
13857 return base_cost + 10;
13861 /* Find the best place in the insn stream in the range
13862 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13863 Create the barrier by inserting a jump and add a new fix entry for
13866 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13868 HOST_WIDE_INT count = 0;
13870 rtx from = fix->insn;
13871 /* The instruction after which we will insert the jump. */
13872 rtx selected = NULL;
13874 /* The address at which the jump instruction will be placed. */
13875 HOST_WIDE_INT selected_address;
13877 HOST_WIDE_INT max_count = max_address - fix->address;
13878 rtx label = gen_label_rtx ();
13880 selected_cost = arm_barrier_cost (from);
13881 selected_address = fix->address;
13883 while (from && count < max_count)
13888 /* This code shouldn't have been called if there was a natural barrier
13890 gcc_assert (!BARRIER_P (from));
13892 /* Count the length of this insn. This must stay in sync with the
13893 code that pushes minipool fixes. */
13894 if (LABEL_P (from))
13895 count += get_label_padding (from);
13897 count += get_attr_length (from);
13899 /* If there is a jump table, add its length. */
13900 tmp = is_jump_table (from);
13903 count += get_jump_table_size (tmp);
13905 /* Jump tables aren't in a basic block, so base the cost on
13906 the dispatch insn. If we select this location, we will
13907 still put the pool after the table. */
13908 new_cost = arm_barrier_cost (from);
13910 if (count < max_count
13911 && (!selected || new_cost <= selected_cost))
13914 selected_cost = new_cost;
13915 selected_address = fix->address + count;
13918 /* Continue after the dispatch table. */
13919 from = NEXT_INSN (tmp);
13923 new_cost = arm_barrier_cost (from);
13925 if (count < max_count
13926 && (!selected || new_cost <= selected_cost))
13929 selected_cost = new_cost;
13930 selected_address = fix->address + count;
13933 from = NEXT_INSN (from);
13936 /* Make sure that we found a place to insert the jump. */
13937 gcc_assert (selected);
13939 /* Make sure we do not split a call and its corresponding
13940 CALL_ARG_LOCATION note. */
13941 if (CALL_P (selected))
13943 rtx next = NEXT_INSN (selected);
13944 if (next && NOTE_P (next)
13945 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13949 /* Create a new JUMP_INSN that branches around a barrier. */
13950 from = emit_jump_insn_after (gen_jump (label), selected);
13951 JUMP_LABEL (from) = label;
13952 barrier = emit_barrier_after (from);
13953 emit_label_after (label, barrier);
13955 /* Create a minipool barrier entry for the new barrier. */
13956 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13957 new_fix->insn = barrier;
13958 new_fix->address = selected_address;
13959 new_fix->next = fix->next;
13960 fix->next = new_fix;
13965 /* Record that there is a natural barrier in the insn stream at
13968 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13970 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13973 fix->address = address;
13976 if (minipool_fix_head != NULL)
13977 minipool_fix_tail->next = fix;
13979 minipool_fix_head = fix;
13981 minipool_fix_tail = fix;
13984 /* Record INSN, which will need fixing up to load a value from the
13985 minipool. ADDRESS is the offset of the insn since the start of the
13986 function; LOC is a pointer to the part of the insn which requires
13987 fixing; VALUE is the constant that must be loaded, which is of type
13990 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13991 enum machine_mode mode, rtx value)
13993 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13996 fix->address = address;
13999 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
14000 fix->value = value;
14001 fix->forwards = get_attr_pool_range (insn);
14002 fix->backwards = get_attr_neg_pool_range (insn);
14003 fix->minipool = NULL;
14005 /* If an insn doesn't have a range defined for it, then it isn't
14006 expecting to be reworked by this code. Better to stop now than
14007 to generate duff assembly code. */
14008 gcc_assert (fix->forwards || fix->backwards);
14010 /* If an entry requires 8-byte alignment then assume all constant pools
14011 require 4 bytes of padding. Trying to do this later on a per-pool
14012 basis is awkward because existing pool entries have to be modified. */
14013 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
14018 fprintf (dump_file,
14019 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
14020 GET_MODE_NAME (mode),
14021 INSN_UID (insn), (unsigned long) address,
14022 -1 * (long)fix->backwards, (long)fix->forwards);
14023 arm_print_value (dump_file, fix->value);
14024 fprintf (dump_file, "\n");
14027 /* Add it to the chain of fixes. */
14030 if (minipool_fix_head != NULL)
14031 minipool_fix_tail->next = fix;
14033 minipool_fix_head = fix;
14035 minipool_fix_tail = fix;
14038 /* Return the cost of synthesizing a 64-bit constant VAL inline.
14039 Returns the number of insns needed, or 99 if we don't know how to
14042 arm_const_double_inline_cost (rtx val)
14044 rtx lowpart, highpart;
14045 enum machine_mode mode;
14047 mode = GET_MODE (val);
14049 if (mode == VOIDmode)
14052 gcc_assert (GET_MODE_SIZE (mode) == 8);
14054 lowpart = gen_lowpart (SImode, val);
14055 highpart = gen_highpart_mode (SImode, mode, val);
14057 gcc_assert (CONST_INT_P (lowpart));
14058 gcc_assert (CONST_INT_P (highpart));
14060 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
14061 NULL_RTX, NULL_RTX, 0, 0)
14062 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
14063 NULL_RTX, NULL_RTX, 0, 0));
14066 /* Return true if it is worthwhile to split a 64-bit constant into two
14067 32-bit operations. This is the case if optimizing for size, or
14068 if we have load delay slots, or if one 32-bit part can be done with
14069 a single data operation. */
14071 arm_const_double_by_parts (rtx val)
14073 enum machine_mode mode = GET_MODE (val);
14076 if (optimize_size || arm_ld_sched)
14079 if (mode == VOIDmode)
14082 part = gen_highpart_mode (SImode, mode, val);
14084 gcc_assert (CONST_INT_P (part));
14086 if (const_ok_for_arm (INTVAL (part))
14087 || const_ok_for_arm (~INTVAL (part)))
14090 part = gen_lowpart (SImode, val);
14092 gcc_assert (CONST_INT_P (part));
14094 if (const_ok_for_arm (INTVAL (part))
14095 || const_ok_for_arm (~INTVAL (part)))
14101 /* Return true if it is possible to inline both the high and low parts
14102 of a 64-bit constant into 32-bit data processing instructions. */
14104 arm_const_double_by_immediates (rtx val)
14106 enum machine_mode mode = GET_MODE (val);
14109 if (mode == VOIDmode)
14112 part = gen_highpart_mode (SImode, mode, val);
14114 gcc_assert (CONST_INT_P (part));
14116 if (!const_ok_for_arm (INTVAL (part)))
14119 part = gen_lowpart (SImode, val);
14121 gcc_assert (CONST_INT_P (part));
14123 if (!const_ok_for_arm (INTVAL (part)))
14129 /* Scan INSN and note any of its operands that need fixing.
14130 If DO_PUSHES is false we do not actually push any of the fixups
14133 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
14137 extract_insn (insn);
14139 if (!constrain_operands (1))
14140 fatal_insn_not_found (insn);
14142 if (recog_data.n_alternatives == 0)
14145 /* Fill in recog_op_alt with information about the constraints of
14147 preprocess_constraints ();
14149 for (opno = 0; opno < recog_data.n_operands; opno++)
14151 /* Things we need to fix can only occur in inputs. */
14152 if (recog_data.operand_type[opno] != OP_IN)
14155 /* If this alternative is a memory reference, then any mention
14156 of constants in this alternative is really to fool reload
14157 into allowing us to accept one there. We need to fix them up
14158 now so that we output the right code. */
14159 if (recog_op_alt[opno][which_alternative].memory_ok)
14161 rtx op = recog_data.operand[opno];
14163 if (CONSTANT_P (op))
14166 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
14167 recog_data.operand_mode[opno], op);
14169 else if (MEM_P (op)
14170 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
14171 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
14175 rtx cop = avoid_constant_pool_reference (op);
14177 /* Casting the address of something to a mode narrower
14178 than a word can cause avoid_constant_pool_reference()
14179 to return the pool reference itself. That's no good to
14180 us here. Lets just hope that we can use the
14181 constant pool value directly. */
14183 cop = get_pool_constant (XEXP (op, 0));
14185 push_minipool_fix (insn, address,
14186 recog_data.operand_loc[opno],
14187 recog_data.operand_mode[opno], cop);
14197 /* Rewrite move insn into subtract of 0 if the condition codes will
14198 be useful in next conditional jump insn. */
14201 thumb1_reorg (void)
14207 rtx set, dest, src;
14209 rtx prev, insn = BB_END (bb);
14211 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
14212 insn = PREV_INSN (insn);
14214 /* Find the last cbranchsi4_insn in basic block BB. */
14215 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
14218 /* Find the first non-note insn before INSN in basic block BB. */
14219 gcc_assert (insn != BB_HEAD (bb));
14220 prev = PREV_INSN (insn);
14221 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
14222 prev = PREV_INSN (prev);
14224 set = single_set (prev);
14228 dest = SET_DEST (set);
14229 src = SET_SRC (set);
14230 if (!low_register_operand (dest, SImode)
14231 || !low_register_operand (src, SImode))
14234 pat = PATTERN (insn);
14235 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
14236 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
14237 in INSN. Don't need to check dest since cprop_hardreg pass propagates
14239 if (REGNO (op0) == REGNO (src))
14241 dest = copy_rtx (dest);
14242 src = copy_rtx (src);
14243 src = gen_rtx_MINUS (SImode, src, const0_rtx);
14244 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
14245 INSN_CODE (prev) = -1;
14246 /* Set test register in INSN to dest. */
14247 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
14248 INSN_CODE (insn) = -1;
14253 /* Convert instructions to their cc-clobbering variant if possible, since
14254 that allows us to use smaller encodings. */
14257 thumb2_reorg (void)
14262 INIT_REG_SET (&live);
14264 /* We are freeing block_for_insn in the toplev to keep compatibility
14265 with old MDEP_REORGS that are not CFG based. Recompute it now. */
14266 compute_bb_for_insn ();
14273 COPY_REG_SET (&live, DF_LR_OUT (bb));
14274 df_simulate_initialize_backwards (bb, &live);
14275 FOR_BB_INSNS_REVERSE (bb, insn)
14277 if (NONJUMP_INSN_P (insn)
14278 && !REGNO_REG_SET_P (&live, CC_REGNUM)
14279 && GET_CODE (PATTERN (insn)) == SET)
14281 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
14282 rtx pat = PATTERN (insn);
14283 rtx dst = XEXP (pat, 0);
14284 rtx src = XEXP (pat, 1);
14285 rtx op0 = NULL_RTX, op1 = NULL_RTX;
14287 if (!OBJECT_P (src))
14288 op0 = XEXP (src, 0);
14290 if (BINARY_P (src))
14291 op1 = XEXP (src, 1);
14293 if (low_register_operand (dst, SImode))
14295 switch (GET_CODE (src))
14298 /* Adding two registers and storing the result
14299 in the first source is already a 16-bit
14301 if (rtx_equal_p (dst, op0)
14302 && register_operand (op1, SImode))
14305 if (low_register_operand (op0, SImode))
14307 /* ADDS <Rd>,<Rn>,<Rm> */
14308 if (low_register_operand (op1, SImode))
14310 /* ADDS <Rdn>,#<imm8> */
14311 /* SUBS <Rdn>,#<imm8> */
14312 else if (rtx_equal_p (dst, op0)
14313 && CONST_INT_P (op1)
14314 && IN_RANGE (INTVAL (op1), -255, 255))
14316 /* ADDS <Rd>,<Rn>,#<imm3> */
14317 /* SUBS <Rd>,<Rn>,#<imm3> */
14318 else if (CONST_INT_P (op1)
14319 && IN_RANGE (INTVAL (op1), -7, 7))
14325 /* RSBS <Rd>,<Rn>,#0
14326 Not handled here: see NEG below. */
14327 /* SUBS <Rd>,<Rn>,#<imm3>
14329 Not handled here: see PLUS above. */
14330 /* SUBS <Rd>,<Rn>,<Rm> */
14331 if (low_register_operand (op0, SImode)
14332 && low_register_operand (op1, SImode))
14337 /* MULS <Rdm>,<Rn>,<Rdm>
14338 As an exception to the rule, this is only used
14339 when optimizing for size since MULS is slow on all
14340 known implementations. We do not even want to use
14341 MULS in cold code, if optimizing for speed, so we
14342 test the global flag here. */
14343 if (!optimize_size)
14345 /* else fall through. */
14349 /* ANDS <Rdn>,<Rm> */
14350 if (rtx_equal_p (dst, op0)
14351 && low_register_operand (op1, SImode))
14353 else if (rtx_equal_p (dst, op1)
14354 && low_register_operand (op0, SImode))
14355 action = SWAP_CONV;
14361 /* ASRS <Rdn>,<Rm> */
14362 /* LSRS <Rdn>,<Rm> */
14363 /* LSLS <Rdn>,<Rm> */
14364 if (rtx_equal_p (dst, op0)
14365 && low_register_operand (op1, SImode))
14367 /* ASRS <Rd>,<Rm>,#<imm5> */
14368 /* LSRS <Rd>,<Rm>,#<imm5> */
14369 /* LSLS <Rd>,<Rm>,#<imm5> */
14370 else if (low_register_operand (op0, SImode)
14371 && CONST_INT_P (op1)
14372 && IN_RANGE (INTVAL (op1), 0, 31))
14377 /* RORS <Rdn>,<Rm> */
14378 if (rtx_equal_p (dst, op0)
14379 && low_register_operand (op1, SImode))
14385 /* MVNS <Rd>,<Rm> */
14386 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
14387 if (low_register_operand (op0, SImode))
14392 /* MOVS <Rd>,#<imm8> */
14393 if (CONST_INT_P (src)
14394 && IN_RANGE (INTVAL (src), 0, 255))
14399 /* MOVS and MOV<c> with registers have different
14400 encodings, so are not relevant here. */
14408 if (action != SKIP)
14410 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
14411 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
14414 if (action == SWAP_CONV)
14416 src = copy_rtx (src);
14417 XEXP (src, 0) = op1;
14418 XEXP (src, 1) = op0;
14419 pat = gen_rtx_SET (VOIDmode, dst, src);
14420 vec = gen_rtvec (2, pat, clobber);
14422 else /* action == CONV */
14423 vec = gen_rtvec (2, pat, clobber);
14425 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
14426 INSN_CODE (insn) = -1;
14430 if (NONDEBUG_INSN_P (insn))
14431 df_simulate_one_insn_backwards (bb, insn, &live);
14435 CLEAR_REG_SET (&live);
14438 /* Gcc puts the pool in the wrong place for ARM, since we can only
14439 load addresses a limited distance around the pc. We do some
14440 special munging to move the constant pool values to the correct
14441 point in the code. */
14446 HOST_WIDE_INT address = 0;
14451 else if (TARGET_THUMB2)
14454 /* Ensure all insns that must be split have been split at this point.
14455 Otherwise, the pool placement code below may compute incorrect
14456 insn lengths. Note that when optimizing, all insns have already
14457 been split at this point. */
14459 split_all_insns_noflow ();
14461 minipool_fix_head = minipool_fix_tail = NULL;
14463 /* The first insn must always be a note, or the code below won't
14464 scan it properly. */
14465 insn = get_insns ();
14466 gcc_assert (NOTE_P (insn));
14469 /* Scan all the insns and record the operands that will need fixing. */
14470 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14472 if (BARRIER_P (insn))
14473 push_minipool_barrier (insn, address);
14474 else if (INSN_P (insn))
14478 note_invalid_constants (insn, address, true);
14479 address += get_attr_length (insn);
14481 /* If the insn is a vector jump, add the size of the table
14482 and skip the table. */
14483 if ((table = is_jump_table (insn)) != NULL)
14485 address += get_jump_table_size (table);
14489 else if (LABEL_P (insn))
14490 /* Add the worst-case padding due to alignment. We don't add
14491 the _current_ padding because the minipool insertions
14492 themselves might change it. */
14493 address += get_label_padding (insn);
14496 fix = minipool_fix_head;
14498 /* Now scan the fixups and perform the required changes. */
14503 Mfix * last_added_fix;
14504 Mfix * last_barrier = NULL;
14507 /* Skip any further barriers before the next fix. */
14508 while (fix && BARRIER_P (fix->insn))
14511 /* No more fixes. */
14515 last_added_fix = NULL;
14517 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14519 if (BARRIER_P (ftmp->insn))
14521 if (ftmp->address >= minipool_vector_head->max_address)
14524 last_barrier = ftmp;
14526 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14529 last_added_fix = ftmp; /* Keep track of the last fix added. */
14532 /* If we found a barrier, drop back to that; any fixes that we
14533 could have reached but come after the barrier will now go in
14534 the next mini-pool. */
14535 if (last_barrier != NULL)
14537 /* Reduce the refcount for those fixes that won't go into this
14539 for (fdel = last_barrier->next;
14540 fdel && fdel != ftmp;
14543 fdel->minipool->refcount--;
14544 fdel->minipool = NULL;
14547 ftmp = last_barrier;
14551 /* ftmp is first fix that we can't fit into this pool and
14552 there no natural barriers that we could use. Insert a
14553 new barrier in the code somewhere between the previous
14554 fix and this one, and arrange to jump around it. */
14555 HOST_WIDE_INT max_address;
14557 /* The last item on the list of fixes must be a barrier, so
14558 we can never run off the end of the list of fixes without
14559 last_barrier being set. */
14562 max_address = minipool_vector_head->max_address;
14563 /* Check that there isn't another fix that is in range that
14564 we couldn't fit into this pool because the pool was
14565 already too large: we need to put the pool before such an
14566 instruction. The pool itself may come just after the
14567 fix because create_fix_barrier also allows space for a
14568 jump instruction. */
14569 if (ftmp->address < max_address)
14570 max_address = ftmp->address + 1;
14572 last_barrier = create_fix_barrier (last_added_fix, max_address);
14575 assign_minipool_offsets (last_barrier);
14579 if (!BARRIER_P (ftmp->insn)
14580 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14587 /* Scan over the fixes we have identified for this pool, fixing them
14588 up and adding the constants to the pool itself. */
14589 for (this_fix = fix; this_fix && ftmp != this_fix;
14590 this_fix = this_fix->next)
14591 if (!BARRIER_P (this_fix->insn))
14594 = plus_constant (Pmode,
14595 gen_rtx_LABEL_REF (VOIDmode,
14596 minipool_vector_label),
14597 this_fix->minipool->offset);
14598 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14601 dump_minipool (last_barrier->insn);
14605 /* From now on we must synthesize any constants that we can't handle
14606 directly. This can happen if the RTL gets split during final
14607 instruction generation. */
14608 after_arm_reorg = 1;
14610 /* Free the minipool memory. */
14611 obstack_free (&minipool_obstack, minipool_startobj);
14614 /* Routines to output assembly language. */
14616 /* If the rtx is the correct value then return the string of the number.
14617 In this way we can ensure that valid double constants are generated even
14618 when cross compiling. */
14620 fp_immediate_constant (rtx x)
14624 if (!fp_consts_inited)
14627 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14629 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14633 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14634 static const char *
14635 fp_const_from_val (REAL_VALUE_TYPE *r)
14637 if (!fp_consts_inited)
14640 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14644 /* OPERANDS[0] is the entire list of insns that constitute pop,
14645 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14646 is in the list, UPDATE is true iff the list contains explicit
14647 update of base register. */
14649 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14655 const char *conditional;
14656 int num_saves = XVECLEN (operands[0], 0);
14657 unsigned int regno;
14658 unsigned int regno_base = REGNO (operands[1]);
14661 offset += update ? 1 : 0;
14662 offset += return_pc ? 1 : 0;
14664 /* Is the base register in the list? */
14665 for (i = offset; i < num_saves; i++)
14667 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14668 /* If SP is in the list, then the base register must be SP. */
14669 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14670 /* If base register is in the list, there must be no explicit update. */
14671 if (regno == regno_base)
14672 gcc_assert (!update);
14675 conditional = reverse ? "%?%D0" : "%?%d0";
14676 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14678 /* Output pop (not stmfd) because it has a shorter encoding. */
14679 gcc_assert (update);
14680 sprintf (pattern, "pop%s\t{", conditional);
14684 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14685 It's just a convention, their semantics are identical. */
14686 if (regno_base == SP_REGNUM)
14687 sprintf (pattern, "ldm%sfd\t", conditional);
14688 else if (TARGET_UNIFIED_ASM)
14689 sprintf (pattern, "ldmia%s\t", conditional);
14691 sprintf (pattern, "ldm%sia\t", conditional);
14693 strcat (pattern, reg_names[regno_base]);
14695 strcat (pattern, "!, {");
14697 strcat (pattern, ", {");
14700 /* Output the first destination register. */
14702 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14704 /* Output the rest of the destination registers. */
14705 for (i = offset + 1; i < num_saves; i++)
14707 strcat (pattern, ", ");
14709 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14712 strcat (pattern, "}");
14714 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14715 strcat (pattern, "^");
14717 output_asm_insn (pattern, &cond);
14721 /* Output the assembly for a store multiple. */
14724 vfp_output_fstmd (rtx * operands)
14731 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14732 p = strlen (pattern);
14734 gcc_assert (REG_P (operands[1]));
14736 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14737 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14739 p += sprintf (&pattern[p], ", d%d", base + i);
14741 strcpy (&pattern[p], "}");
14743 output_asm_insn (pattern, operands);
14748 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14749 number of bytes pushed. */
14752 vfp_emit_fstmd (int base_reg, int count)
14759 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14760 register pairs are stored by a store multiple insn. We avoid this
14761 by pushing an extra pair. */
14762 if (count == 2 && !arm_arch6)
14764 if (base_reg == LAST_VFP_REGNUM - 3)
14769 /* FSTMD may not store more than 16 doubleword registers at once. Split
14770 larger stores into multiple parts (up to a maximum of two, in
14775 /* NOTE: base_reg is an internal register number, so each D register
14777 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14778 saved += vfp_emit_fstmd (base_reg, 16);
14782 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14783 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14785 reg = gen_rtx_REG (DFmode, base_reg);
14788 XVECEXP (par, 0, 0)
14789 = gen_rtx_SET (VOIDmode,
14792 gen_rtx_PRE_MODIFY (Pmode,
14795 (Pmode, stack_pointer_rtx,
14798 gen_rtx_UNSPEC (BLKmode,
14799 gen_rtvec (1, reg),
14800 UNSPEC_PUSH_MULT));
14802 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14803 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14804 RTX_FRAME_RELATED_P (tmp) = 1;
14805 XVECEXP (dwarf, 0, 0) = tmp;
14807 tmp = gen_rtx_SET (VOIDmode,
14808 gen_frame_mem (DFmode, stack_pointer_rtx),
14810 RTX_FRAME_RELATED_P (tmp) = 1;
14811 XVECEXP (dwarf, 0, 1) = tmp;
14813 for (i = 1; i < count; i++)
14815 reg = gen_rtx_REG (DFmode, base_reg);
14817 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14819 tmp = gen_rtx_SET (VOIDmode,
14820 gen_frame_mem (DFmode,
14821 plus_constant (Pmode,
14825 RTX_FRAME_RELATED_P (tmp) = 1;
14826 XVECEXP (dwarf, 0, i + 1) = tmp;
14829 par = emit_insn (par);
14830 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14831 RTX_FRAME_RELATED_P (par) = 1;
14836 /* Emit a call instruction with pattern PAT. ADDR is the address of
14837 the call target. */
14840 arm_emit_call_insn (rtx pat, rtx addr)
14844 insn = emit_call_insn (pat);
14846 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14847 If the call might use such an entry, add a use of the PIC register
14848 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14849 if (TARGET_VXWORKS_RTP
14851 && GET_CODE (addr) == SYMBOL_REF
14852 && (SYMBOL_REF_DECL (addr)
14853 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14854 : !SYMBOL_REF_LOCAL_P (addr)))
14856 require_pic_register ();
14857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14861 /* Output a 'call' insn. */
14863 output_call (rtx *operands)
14865 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14867 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14868 if (REGNO (operands[0]) == LR_REGNUM)
14870 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14871 output_asm_insn ("mov%?\t%0, %|lr", operands);
14874 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14876 if (TARGET_INTERWORK || arm_arch4t)
14877 output_asm_insn ("bx%?\t%0", operands);
14879 output_asm_insn ("mov%?\t%|pc, %0", operands);
14884 /* Output a 'call' insn that is a reference in memory. This is
14885 disabled for ARMv5 and we prefer a blx instead because otherwise
14886 there's a significant performance overhead. */
14888 output_call_mem (rtx *operands)
14890 gcc_assert (!arm_arch5);
14891 if (TARGET_INTERWORK)
14893 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14894 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14895 output_asm_insn ("bx%?\t%|ip", operands);
14897 else if (regno_use_in (LR_REGNUM, operands[0]))
14899 /* LR is used in the memory address. We load the address in the
14900 first instruction. It's safe to use IP as the target of the
14901 load since the call will kill it anyway. */
14902 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14903 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14905 output_asm_insn ("bx%?\t%|ip", operands);
14907 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14911 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14912 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14919 /* Output a move from arm registers to arm registers of a long double
14920 OPERANDS[0] is the destination.
14921 OPERANDS[1] is the source. */
14923 output_mov_long_double_arm_from_arm (rtx *operands)
14925 /* We have to be careful here because the two might overlap. */
14926 int dest_start = REGNO (operands[0]);
14927 int src_start = REGNO (operands[1]);
14931 if (dest_start < src_start)
14933 for (i = 0; i < 3; i++)
14935 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14936 ops[1] = gen_rtx_REG (SImode, src_start + i);
14937 output_asm_insn ("mov%?\t%0, %1", ops);
14942 for (i = 2; i >= 0; i--)
14944 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14945 ops[1] = gen_rtx_REG (SImode, src_start + i);
14946 output_asm_insn ("mov%?\t%0, %1", ops);
14954 arm_emit_movpair (rtx dest, rtx src)
14956 /* If the src is an immediate, simplify it. */
14957 if (CONST_INT_P (src))
14959 HOST_WIDE_INT val = INTVAL (src);
14960 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14961 if ((val >> 16) & 0x0000ffff)
14962 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14964 GEN_INT ((val >> 16) & 0x0000ffff));
14967 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14968 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14971 /* Output a move between double words. It must be REG<-MEM
14974 output_move_double (rtx *operands, bool emit, int *count)
14976 enum rtx_code code0 = GET_CODE (operands[0]);
14977 enum rtx_code code1 = GET_CODE (operands[1]);
14982 /* The only case when this might happen is when
14983 you are looking at the length of a DImode instruction
14984 that has an invalid constant in it. */
14985 if (code0 == REG && code1 != MEM)
14987 gcc_assert (!emit);
14994 unsigned int reg0 = REGNO (operands[0]);
14996 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14998 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
15000 switch (GET_CODE (XEXP (operands[1], 0)))
15007 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
15008 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
15010 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15015 gcc_assert (TARGET_LDRD);
15017 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
15024 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
15026 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
15034 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
15036 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
15041 gcc_assert (TARGET_LDRD);
15043 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
15048 /* Autoicrement addressing modes should never have overlapping
15049 base and destination registers, and overlapping index registers
15050 are already prohibited, so this doesn't need to worry about
15052 otherops[0] = operands[0];
15053 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
15054 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
15056 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
15058 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
15060 /* Registers overlap so split out the increment. */
15063 output_asm_insn ("add%?\t%1, %1, %2", otherops);
15064 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
15071 /* Use a single insn if we can.
15072 FIXME: IWMMXT allows offsets larger than ldrd can
15073 handle, fix these up with a pair of ldr. */
15075 || !CONST_INT_P (otherops[2])
15076 || (INTVAL (otherops[2]) > -256
15077 && INTVAL (otherops[2]) < 256))
15080 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
15086 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
15087 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15097 /* Use a single insn if we can.
15098 FIXME: IWMMXT allows offsets larger than ldrd can handle,
15099 fix these up with a pair of ldr. */
15101 || !CONST_INT_P (otherops[2])
15102 || (INTVAL (otherops[2]) > -256
15103 && INTVAL (otherops[2]) < 256))
15106 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
15112 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15113 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
15123 /* We might be able to use ldrd %0, %1 here. However the range is
15124 different to ldr/adr, and it is broken on some ARMv7-M
15125 implementations. */
15126 /* Use the second register of the pair to avoid problematic
15128 otherops[1] = operands[1];
15130 output_asm_insn ("adr%?\t%0, %1", otherops);
15131 operands[1] = otherops[0];
15135 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15137 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
15144 /* ??? This needs checking for thumb2. */
15146 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
15147 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
15149 otherops[0] = operands[0];
15150 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
15151 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
15153 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
15155 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15157 switch ((int) INTVAL (otherops[2]))
15161 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
15167 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
15173 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
15177 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
15178 operands[1] = otherops[0];
15180 && (REG_P (otherops[2])
15182 || (CONST_INT_P (otherops[2])
15183 && INTVAL (otherops[2]) > -256
15184 && INTVAL (otherops[2]) < 256)))
15186 if (reg_overlap_mentioned_p (operands[0],
15190 /* Swap base and index registers over to
15191 avoid a conflict. */
15193 otherops[1] = otherops[2];
15196 /* If both registers conflict, it will usually
15197 have been fixed by a splitter. */
15198 if (reg_overlap_mentioned_p (operands[0], otherops[2])
15199 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
15203 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15204 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15211 otherops[0] = operands[0];
15213 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
15218 if (CONST_INT_P (otherops[2]))
15222 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
15223 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
15225 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15231 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15237 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
15244 return "ldr%(d%)\t%0, [%1]";
15246 return "ldm%(ia%)\t%1, %M0";
15250 otherops[1] = adjust_address (operands[1], SImode, 4);
15251 /* Take care of overlapping base/data reg. */
15252 if (reg_mentioned_p (operands[0], operands[1]))
15256 output_asm_insn ("ldr%?\t%0, %1", otherops);
15257 output_asm_insn ("ldr%?\t%0, %1", operands);
15267 output_asm_insn ("ldr%?\t%0, %1", operands);
15268 output_asm_insn ("ldr%?\t%0, %1", otherops);
15278 /* Constraints should ensure this. */
15279 gcc_assert (code0 == MEM && code1 == REG);
15280 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
15281 || (TARGET_ARM && TARGET_LDRD));
15283 switch (GET_CODE (XEXP (operands[0], 0)))
15289 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
15291 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15296 gcc_assert (TARGET_LDRD);
15298 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
15305 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
15307 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
15315 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
15317 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
15322 gcc_assert (TARGET_LDRD);
15324 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
15329 otherops[0] = operands[1];
15330 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
15331 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
15333 /* IWMMXT allows offsets larger than ldrd can handle,
15334 fix these up with a pair of ldr. */
15336 && CONST_INT_P (otherops[2])
15337 && (INTVAL(otherops[2]) <= -256
15338 || INTVAL(otherops[2]) >= 256))
15340 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15344 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
15345 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15354 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15355 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
15361 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15364 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
15369 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
15374 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
15375 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15377 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
15381 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
15388 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
15395 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
15400 && (REG_P (otherops[2])
15402 || (CONST_INT_P (otherops[2])
15403 && INTVAL (otherops[2]) > -256
15404 && INTVAL (otherops[2]) < 256)))
15406 otherops[0] = operands[1];
15407 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
15409 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
15415 otherops[0] = adjust_address (operands[0], SImode, 4);
15416 otherops[1] = operands[1];
15419 output_asm_insn ("str%?\t%1, %0", operands);
15420 output_asm_insn ("str%?\t%H1, %0", otherops);
15430 /* Output a move, load or store for quad-word vectors in ARM registers. Only
15431 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
15434 output_move_quad (rtx *operands)
15436 if (REG_P (operands[0]))
15438 /* Load, or reg->reg move. */
15440 if (MEM_P (operands[1]))
15442 switch (GET_CODE (XEXP (operands[1], 0)))
15445 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15450 output_asm_insn ("adr%?\t%0, %1", operands);
15451 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
15455 gcc_unreachable ();
15463 gcc_assert (REG_P (operands[1]));
15465 dest = REGNO (operands[0]);
15466 src = REGNO (operands[1]);
15468 /* This seems pretty dumb, but hopefully GCC won't try to do it
15471 for (i = 0; i < 4; i++)
15473 ops[0] = gen_rtx_REG (SImode, dest + i);
15474 ops[1] = gen_rtx_REG (SImode, src + i);
15475 output_asm_insn ("mov%?\t%0, %1", ops);
15478 for (i = 3; i >= 0; i--)
15480 ops[0] = gen_rtx_REG (SImode, dest + i);
15481 ops[1] = gen_rtx_REG (SImode, src + i);
15482 output_asm_insn ("mov%?\t%0, %1", ops);
15488 gcc_assert (MEM_P (operands[0]));
15489 gcc_assert (REG_P (operands[1]));
15490 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15492 switch (GET_CODE (XEXP (operands[0], 0)))
15495 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15499 gcc_unreachable ();
15506 /* Output a VFP load or store instruction. */
15509 output_move_vfp (rtx *operands)
15511 rtx reg, mem, addr, ops[2];
15512 int load = REG_P (operands[0]);
15513 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15514 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15517 enum machine_mode mode;
15519 reg = operands[!load];
15520 mem = operands[load];
15522 mode = GET_MODE (reg);
15524 gcc_assert (REG_P (reg));
15525 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15526 gcc_assert (mode == SFmode
15530 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15531 gcc_assert (MEM_P (mem));
15533 addr = XEXP (mem, 0);
15535 switch (GET_CODE (addr))
15538 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15539 ops[0] = XEXP (addr, 0);
15544 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15545 ops[0] = XEXP (addr, 0);
15550 templ = "f%s%c%%?\t%%%s0, %%1%s";
15556 sprintf (buff, templ,
15557 load ? "ld" : "st",
15560 integer_p ? "\t%@ int" : "");
15561 output_asm_insn (buff, ops);
15566 /* Output a Neon double-word or quad-word load or store, or a load
15567 or store for larger structure modes.
15569 WARNING: The ordering of elements is weird in big-endian mode,
15570 because the EABI requires that vectors stored in memory appear
15571 as though they were stored by a VSTM, as required by the EABI.
15572 GCC RTL defines element ordering based on in-memory order.
15573 This can be different from the architectural ordering of elements
15574 within a NEON register. The intrinsics defined in arm_neon.h use the
15575 NEON register element ordering, not the GCC RTL element ordering.
15577 For example, the in-memory ordering of a big-endian a quadword
15578 vector with 16-bit elements when stored from register pair {d0,d1}
15579 will be (lowest address first, d0[N] is NEON register element N):
15581 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15583 When necessary, quadword registers (dN, dN+1) are moved to ARM
15584 registers from rN in the order:
15586 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15588 So that STM/LDM can be used on vectors in ARM registers, and the
15589 same memory layout will result as if VSTM/VLDM were used.
15591 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15592 possible, which allows use of appropriate alignment tags.
15593 Note that the choice of "64" is independent of the actual vector
15594 element size; this size simply ensures that the behavior is
15595 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15597 Due to limitations of those instructions, use of VST1.64/VLD1.64
15598 is not possible if:
15599 - the address contains PRE_DEC, or
15600 - the mode refers to more than 4 double-word registers
15602 In those cases, it would be possible to replace VSTM/VLDM by a
15603 sequence of instructions; this is not currently implemented since
15604 this is not certain to actually improve performance. */
15607 output_move_neon (rtx *operands)
15609 rtx reg, mem, addr, ops[2];
15610 int regno, nregs, load = REG_P (operands[0]);
15613 enum machine_mode mode;
15615 reg = operands[!load];
15616 mem = operands[load];
15618 mode = GET_MODE (reg);
15620 gcc_assert (REG_P (reg));
15621 regno = REGNO (reg);
15622 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15623 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15624 || NEON_REGNO_OK_FOR_QUAD (regno));
15625 gcc_assert (VALID_NEON_DREG_MODE (mode)
15626 || VALID_NEON_QREG_MODE (mode)
15627 || VALID_NEON_STRUCT_MODE (mode));
15628 gcc_assert (MEM_P (mem));
15630 addr = XEXP (mem, 0);
15632 /* Strip off const from addresses like (const (plus (...))). */
15633 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15634 addr = XEXP (addr, 0);
15636 switch (GET_CODE (addr))
15639 /* We have to use vldm / vstm for too-large modes. */
15642 templ = "v%smia%%?\t%%0!, %%h1";
15643 ops[0] = XEXP (addr, 0);
15647 templ = "v%s1.64\t%%h1, %%A0";
15654 /* We have to use vldm / vstm in this case, since there is no
15655 pre-decrement form of the vld1 / vst1 instructions. */
15656 templ = "v%smdb%%?\t%%0!, %%h1";
15657 ops[0] = XEXP (addr, 0);
15662 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15663 gcc_unreachable ();
15670 for (i = 0; i < nregs; i++)
15672 /* We're only using DImode here because it's a convenient size. */
15673 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15674 ops[1] = adjust_address (mem, DImode, 8 * i);
15675 if (reg_overlap_mentioned_p (ops[0], mem))
15677 gcc_assert (overlap == -1);
15682 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15683 output_asm_insn (buff, ops);
15688 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15689 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15690 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15691 output_asm_insn (buff, ops);
15698 /* We have to use vldm / vstm for too-large modes. */
15700 templ = "v%smia%%?\t%%m0, %%h1";
15702 templ = "v%s1.64\t%%h1, %%A0";
15708 sprintf (buff, templ, load ? "ld" : "st");
15709 output_asm_insn (buff, ops);
15714 /* Compute and return the length of neon_mov<mode>, where <mode> is
15715 one of VSTRUCT modes: EI, OI, CI or XI. */
15717 arm_attr_length_move_neon (rtx insn)
15719 rtx reg, mem, addr;
15721 enum machine_mode mode;
15723 extract_insn_cached (insn);
15725 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15727 mode = GET_MODE (recog_data.operand[0]);
15738 gcc_unreachable ();
15742 load = REG_P (recog_data.operand[0]);
15743 reg = recog_data.operand[!load];
15744 mem = recog_data.operand[load];
15746 gcc_assert (MEM_P (mem));
15748 mode = GET_MODE (reg);
15749 addr = XEXP (mem, 0);
15751 /* Strip off const from addresses like (const (plus (...))). */
15752 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15753 addr = XEXP (addr, 0);
15755 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15757 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15764 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15768 arm_address_offset_is_imm (rtx insn)
15772 extract_insn_cached (insn);
15774 if (REG_P (recog_data.operand[0]))
15777 mem = recog_data.operand[0];
15779 gcc_assert (MEM_P (mem));
15781 addr = XEXP (mem, 0);
15784 || (GET_CODE (addr) == PLUS
15785 && REG_P (XEXP (addr, 0))
15786 && CONST_INT_P (XEXP (addr, 1))))
15792 /* Output an ADD r, s, #n where n may be too big for one instruction.
15793 If adding zero to one register, output nothing. */
15795 output_add_immediate (rtx *operands)
15797 HOST_WIDE_INT n = INTVAL (operands[2]);
15799 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15802 output_multi_immediate (operands,
15803 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15806 output_multi_immediate (operands,
15807 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15814 /* Output a multiple immediate operation.
15815 OPERANDS is the vector of operands referred to in the output patterns.
15816 INSTR1 is the output pattern to use for the first constant.
15817 INSTR2 is the output pattern to use for subsequent constants.
15818 IMMED_OP is the index of the constant slot in OPERANDS.
15819 N is the constant value. */
15820 static const char *
15821 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15822 int immed_op, HOST_WIDE_INT n)
15824 #if HOST_BITS_PER_WIDE_INT > 32
15830 /* Quick and easy output. */
15831 operands[immed_op] = const0_rtx;
15832 output_asm_insn (instr1, operands);
15837 const char * instr = instr1;
15839 /* Note that n is never zero here (which would give no output). */
15840 for (i = 0; i < 32; i += 2)
15844 operands[immed_op] = GEN_INT (n & (255 << i));
15845 output_asm_insn (instr, operands);
15855 /* Return the name of a shifter operation. */
15856 static const char *
15857 arm_shift_nmem(enum rtx_code code)
15862 return ARM_LSL_NAME;
15878 /* Return the appropriate ARM instruction for the operation code.
15879 The returned result should not be overwritten. OP is the rtx of the
15880 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15883 arithmetic_instr (rtx op, int shift_first_arg)
15885 switch (GET_CODE (op))
15891 return shift_first_arg ? "rsb" : "sub";
15906 return arm_shift_nmem(GET_CODE(op));
15909 gcc_unreachable ();
15913 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15914 for the operation code. The returned result should not be overwritten.
15915 OP is the rtx code of the shift.
15916 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15918 static const char *
15919 shift_op (rtx op, HOST_WIDE_INT *amountp)
15922 enum rtx_code code = GET_CODE (op);
15927 if (!CONST_INT_P (XEXP (op, 1)))
15929 output_operand_lossage ("invalid shift operand");
15934 *amountp = 32 - INTVAL (XEXP (op, 1));
15942 mnem = arm_shift_nmem(code);
15943 if (CONST_INT_P (XEXP (op, 1)))
15945 *amountp = INTVAL (XEXP (op, 1));
15947 else if (REG_P (XEXP (op, 1)))
15954 output_operand_lossage ("invalid shift operand");
15960 /* We never have to worry about the amount being other than a
15961 power of 2, since this case can never be reloaded from a reg. */
15962 if (!CONST_INT_P (XEXP (op, 1)))
15964 output_operand_lossage ("invalid shift operand");
15968 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
15970 /* Amount must be a power of two. */
15971 if (*amountp & (*amountp - 1))
15973 output_operand_lossage ("invalid shift operand");
15977 *amountp = int_log2 (*amountp);
15978 return ARM_LSL_NAME;
15981 output_operand_lossage ("invalid shift operand");
15985 /* This is not 100% correct, but follows from the desire to merge
15986 multiplication by a power of 2 with the recognizer for a
15987 shift. >=32 is not a valid shift for "lsl", so we must try and
15988 output a shift that produces the correct arithmetical result.
15989 Using lsr #32 is identical except for the fact that the carry bit
15990 is not set correctly if we set the flags; but we never use the
15991 carry bit from such an operation, so we can ignore that. */
15992 if (code == ROTATERT)
15993 /* Rotate is just modulo 32. */
15995 else if (*amountp != (*amountp & 31))
15997 if (code == ASHIFT)
16002 /* Shifts of 0 are no-ops. */
16009 /* Obtain the shift from the POWER of two. */
16011 static HOST_WIDE_INT
16012 int_log2 (HOST_WIDE_INT power)
16014 HOST_WIDE_INT shift = 0;
16016 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
16018 gcc_assert (shift <= 31);
16025 /* Output a .ascii pseudo-op, keeping track of lengths. This is
16026 because /bin/as is horribly restrictive. The judgement about
16027 whether or not each character is 'printable' (and can be output as
16028 is) or not (and must be printed with an octal escape) must be made
16029 with reference to the *host* character set -- the situation is
16030 similar to that discussed in the comments above pp_c_char in
16031 c-pretty-print.c. */
16033 #define MAX_ASCII_LEN 51
16036 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
16039 int len_so_far = 0;
16041 fputs ("\t.ascii\t\"", stream);
16043 for (i = 0; i < len; i++)
16047 if (len_so_far >= MAX_ASCII_LEN)
16049 fputs ("\"\n\t.ascii\t\"", stream);
16055 if (c == '\\' || c == '\"')
16057 putc ('\\', stream);
16065 fprintf (stream, "\\%03o", c);
16070 fputs ("\"\n", stream);
16073 /* Compute the register save mask for registers 0 through 12
16074 inclusive. This code is used by arm_compute_save_reg_mask. */
16076 static unsigned long
16077 arm_compute_save_reg0_reg12_mask (void)
16079 unsigned long func_type = arm_current_func_type ();
16080 unsigned long save_reg_mask = 0;
16083 if (IS_INTERRUPT (func_type))
16085 unsigned int max_reg;
16086 /* Interrupt functions must not corrupt any registers,
16087 even call clobbered ones. If this is a leaf function
16088 we can just examine the registers used by the RTL, but
16089 otherwise we have to assume that whatever function is
16090 called might clobber anything, and so we have to save
16091 all the call-clobbered registers as well. */
16092 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
16093 /* FIQ handlers have registers r8 - r12 banked, so
16094 we only need to check r0 - r7, Normal ISRs only
16095 bank r14 and r15, so we must check up to r12.
16096 r13 is the stack pointer which is always preserved,
16097 so we do not need to consider it here. */
16102 for (reg = 0; reg <= max_reg; reg++)
16103 if (df_regs_ever_live_p (reg)
16104 || (! crtl->is_leaf && call_used_regs[reg]))
16105 save_reg_mask |= (1 << reg);
16107 /* Also save the pic base register if necessary. */
16109 && !TARGET_SINGLE_PIC_BASE
16110 && arm_pic_register != INVALID_REGNUM
16111 && crtl->uses_pic_offset_table)
16112 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16114 else if (IS_VOLATILE(func_type))
16116 /* For noreturn functions we historically omitted register saves
16117 altogether. However this really messes up debugging. As a
16118 compromise save just the frame pointers. Combined with the link
16119 register saved elsewhere this should be sufficient to get
16121 if (frame_pointer_needed)
16122 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16123 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
16124 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16125 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
16126 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
16130 /* In the normal case we only need to save those registers
16131 which are call saved and which are used by this function. */
16132 for (reg = 0; reg <= 11; reg++)
16133 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16134 save_reg_mask |= (1 << reg);
16136 /* Handle the frame pointer as a special case. */
16137 if (frame_pointer_needed)
16138 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16140 /* If we aren't loading the PIC register,
16141 don't stack it even though it may be live. */
16143 && !TARGET_SINGLE_PIC_BASE
16144 && arm_pic_register != INVALID_REGNUM
16145 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
16146 || crtl->uses_pic_offset_table))
16147 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16149 /* The prologue will copy SP into R0, so save it. */
16150 if (IS_STACKALIGN (func_type))
16151 save_reg_mask |= 1;
16154 /* Save registers so the exception handler can modify them. */
16155 if (crtl->calls_eh_return)
16161 reg = EH_RETURN_DATA_REGNO (i);
16162 if (reg == INVALID_REGNUM)
16164 save_reg_mask |= 1 << reg;
16168 return save_reg_mask;
16172 /* Compute the number of bytes used to store the static chain register on the
16173 stack, above the stack frame. We need to know this accurately to get the
16174 alignment of the rest of the stack frame correct. */
16176 static int arm_compute_static_chain_stack_bytes (void)
16178 unsigned long func_type = arm_current_func_type ();
16179 int static_chain_stack_bytes = 0;
16181 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
16182 IS_NESTED (func_type) &&
16183 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
16184 static_chain_stack_bytes = 4;
16186 return static_chain_stack_bytes;
16190 /* Compute a bit mask of which registers need to be
16191 saved on the stack for the current function.
16192 This is used by arm_get_frame_offsets, which may add extra registers. */
16194 static unsigned long
16195 arm_compute_save_reg_mask (void)
16197 unsigned int save_reg_mask = 0;
16198 unsigned long func_type = arm_current_func_type ();
16201 if (IS_NAKED (func_type))
16202 /* This should never really happen. */
16205 /* If we are creating a stack frame, then we must save the frame pointer,
16206 IP (which will hold the old stack pointer), LR and the PC. */
16207 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16209 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
16212 | (1 << PC_REGNUM);
16214 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
16216 /* Decide if we need to save the link register.
16217 Interrupt routines have their own banked link register,
16218 so they never need to save it.
16219 Otherwise if we do not use the link register we do not need to save
16220 it. If we are pushing other registers onto the stack however, we
16221 can save an instruction in the epilogue by pushing the link register
16222 now and then popping it back into the PC. This incurs extra memory
16223 accesses though, so we only do it when optimizing for size, and only
16224 if we know that we will not need a fancy return sequence. */
16225 if (df_regs_ever_live_p (LR_REGNUM)
16228 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
16229 && !crtl->calls_eh_return))
16230 save_reg_mask |= 1 << LR_REGNUM;
16232 if (cfun->machine->lr_save_eliminated)
16233 save_reg_mask &= ~ (1 << LR_REGNUM);
16235 if (TARGET_REALLY_IWMMXT
16236 && ((bit_count (save_reg_mask)
16237 + ARM_NUM_INTS (crtl->args.pretend_args_size +
16238 arm_compute_static_chain_stack_bytes())
16241 /* The total number of registers that are going to be pushed
16242 onto the stack is odd. We need to ensure that the stack
16243 is 64-bit aligned before we start to save iWMMXt registers,
16244 and also before we start to create locals. (A local variable
16245 might be a double or long long which we will load/store using
16246 an iWMMXt instruction). Therefore we need to push another
16247 ARM register, so that the stack will be 64-bit aligned. We
16248 try to avoid using the arg registers (r0 -r3) as they might be
16249 used to pass values in a tail call. */
16250 for (reg = 4; reg <= 12; reg++)
16251 if ((save_reg_mask & (1 << reg)) == 0)
16255 save_reg_mask |= (1 << reg);
16258 cfun->machine->sibcall_blocked = 1;
16259 save_reg_mask |= (1 << 3);
16263 /* We may need to push an additional register for use initializing the
16264 PIC base register. */
16265 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
16266 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
16268 reg = thumb_find_work_register (1 << 4);
16269 if (!call_used_regs[reg])
16270 save_reg_mask |= (1 << reg);
16273 return save_reg_mask;
16277 /* Compute a bit mask of which registers need to be
16278 saved on the stack for the current function. */
16279 static unsigned long
16280 thumb1_compute_save_reg_mask (void)
16282 unsigned long mask;
16286 for (reg = 0; reg < 12; reg ++)
16287 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16291 && !TARGET_SINGLE_PIC_BASE
16292 && arm_pic_register != INVALID_REGNUM
16293 && crtl->uses_pic_offset_table)
16294 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16296 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
16297 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16298 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16300 /* LR will also be pushed if any lo regs are pushed. */
16301 if (mask & 0xff || thumb_force_lr_save ())
16302 mask |= (1 << LR_REGNUM);
16304 /* Make sure we have a low work register if we need one.
16305 We will need one if we are going to push a high register,
16306 but we are not currently intending to push a low register. */
16307 if ((mask & 0xff) == 0
16308 && ((mask & 0x0f00) || TARGET_BACKTRACE))
16310 /* Use thumb_find_work_register to choose which register
16311 we will use. If the register is live then we will
16312 have to push it. Use LAST_LO_REGNUM as our fallback
16313 choice for the register to select. */
16314 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
16315 /* Make sure the register returned by thumb_find_work_register is
16316 not part of the return value. */
16317 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
16318 reg = LAST_LO_REGNUM;
16320 if (! call_used_regs[reg])
16324 /* The 504 below is 8 bytes less than 512 because there are two possible
16325 alignment words. We can't tell here if they will be present or not so we
16326 have to play it safe and assume that they are. */
16327 if ((CALLER_INTERWORKING_SLOT_SIZE +
16328 ROUND_UP_WORD (get_frame_size ()) +
16329 crtl->outgoing_args_size) >= 504)
16331 /* This is the same as the code in thumb1_expand_prologue() which
16332 determines which register to use for stack decrement. */
16333 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
16334 if (mask & (1 << reg))
16337 if (reg > LAST_LO_REGNUM)
16339 /* Make sure we have a register available for stack decrement. */
16340 mask |= 1 << LAST_LO_REGNUM;
16348 /* Return the number of bytes required to save VFP registers. */
16350 arm_get_vfp_saved_size (void)
16352 unsigned int regno;
16357 /* Space for saved VFP registers. */
16358 if (TARGET_HARD_FLOAT && TARGET_VFP)
16361 for (regno = FIRST_VFP_REGNUM;
16362 regno < LAST_VFP_REGNUM;
16365 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
16366 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
16370 /* Workaround ARM10 VFPr1 bug. */
16371 if (count == 2 && !arm_arch6)
16373 saved += count * 8;
16382 if (count == 2 && !arm_arch6)
16384 saved += count * 8;
16391 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
16392 everything bar the final return instruction. If simple_return is true,
16393 then do not output epilogue, because it has already been emitted in RTL. */
16395 output_return_instruction (rtx operand, bool really_return, bool reverse,
16396 bool simple_return)
16398 char conditional[10];
16401 unsigned long live_regs_mask;
16402 unsigned long func_type;
16403 arm_stack_offsets *offsets;
16405 func_type = arm_current_func_type ();
16407 if (IS_NAKED (func_type))
16410 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
16412 /* If this function was declared non-returning, and we have
16413 found a tail call, then we have to trust that the called
16414 function won't return. */
16419 /* Otherwise, trap an attempted return by aborting. */
16421 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
16423 assemble_external_libcall (ops[1]);
16424 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
16430 gcc_assert (!cfun->calls_alloca || really_return);
16432 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
16434 cfun->machine->return_used_this_function = 1;
16436 offsets = arm_get_frame_offsets ();
16437 live_regs_mask = offsets->saved_regs_mask;
16439 if (!simple_return && live_regs_mask)
16441 const char * return_reg;
16443 /* If we do not have any special requirements for function exit
16444 (e.g. interworking) then we can load the return address
16445 directly into the PC. Otherwise we must load it into LR. */
16447 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
16448 return_reg = reg_names[PC_REGNUM];
16450 return_reg = reg_names[LR_REGNUM];
16452 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
16454 /* There are three possible reasons for the IP register
16455 being saved. 1) a stack frame was created, in which case
16456 IP contains the old stack pointer, or 2) an ISR routine
16457 corrupted it, or 3) it was saved to align the stack on
16458 iWMMXt. In case 1, restore IP into SP, otherwise just
16460 if (frame_pointer_needed)
16462 live_regs_mask &= ~ (1 << IP_REGNUM);
16463 live_regs_mask |= (1 << SP_REGNUM);
16466 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16469 /* On some ARM architectures it is faster to use LDR rather than
16470 LDM to load a single register. On other architectures, the
16471 cost is the same. In 26 bit mode, or for exception handlers,
16472 we have to use LDM to load the PC so that the CPSR is also
16474 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16475 if (live_regs_mask == (1U << reg))
16478 if (reg <= LAST_ARM_REGNUM
16479 && (reg != LR_REGNUM
16481 || ! IS_INTERRUPT (func_type)))
16483 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16484 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16491 /* Generate the load multiple instruction to restore the
16492 registers. Note we can get here, even if
16493 frame_pointer_needed is true, but only if sp already
16494 points to the base of the saved core registers. */
16495 if (live_regs_mask & (1 << SP_REGNUM))
16497 unsigned HOST_WIDE_INT stack_adjust;
16499 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16500 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16502 if (stack_adjust && arm_arch5 && TARGET_ARM)
16503 if (TARGET_UNIFIED_ASM)
16504 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16506 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16509 /* If we can't use ldmib (SA110 bug),
16510 then try to pop r3 instead. */
16512 live_regs_mask |= 1 << 3;
16514 if (TARGET_UNIFIED_ASM)
16515 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16517 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16521 if (TARGET_UNIFIED_ASM)
16522 sprintf (instr, "pop%s\t{", conditional);
16524 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16526 p = instr + strlen (instr);
16528 for (reg = 0; reg <= SP_REGNUM; reg++)
16529 if (live_regs_mask & (1 << reg))
16531 int l = strlen (reg_names[reg]);
16537 memcpy (p, ", ", 2);
16541 memcpy (p, "%|", 2);
16542 memcpy (p + 2, reg_names[reg], l);
16546 if (live_regs_mask & (1 << LR_REGNUM))
16548 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16549 /* If returning from an interrupt, restore the CPSR. */
16550 if (IS_INTERRUPT (func_type))
16557 output_asm_insn (instr, & operand);
16559 /* See if we need to generate an extra instruction to
16560 perform the actual function return. */
16562 && func_type != ARM_FT_INTERWORKED
16563 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16565 /* The return has already been handled
16566 by loading the LR into the PC. */
16573 switch ((int) ARM_FUNC_TYPE (func_type))
16577 /* ??? This is wrong for unified assembly syntax. */
16578 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16581 case ARM_FT_INTERWORKED:
16582 sprintf (instr, "bx%s\t%%|lr", conditional);
16585 case ARM_FT_EXCEPTION:
16586 /* ??? This is wrong for unified assembly syntax. */
16587 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16591 /* Use bx if it's available. */
16592 if (arm_arch5 || arm_arch4t)
16593 sprintf (instr, "bx%s\t%%|lr", conditional);
16595 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16599 output_asm_insn (instr, & operand);
16605 /* Write the function name into the code section, directly preceding
16606 the function prologue.
16608 Code will be output similar to this:
16610 .ascii "arm_poke_function_name", 0
16613 .word 0xff000000 + (t1 - t0)
16614 arm_poke_function_name
16616 stmfd sp!, {fp, ip, lr, pc}
16619 When performing a stack backtrace, code can inspect the value
16620 of 'pc' stored at 'fp' + 0. If the trace function then looks
16621 at location pc - 12 and the top 8 bits are set, then we know
16622 that there is a function name embedded immediately preceding this
16623 location and has length ((pc[-3]) & 0xff000000).
16625 We assume that pc is declared as a pointer to an unsigned long.
16627 It is of no benefit to output the function name if we are assembling
16628 a leaf function. These function types will not contain a stack
16629 backtrace structure, therefore it is not possible to determine the
16632 arm_poke_function_name (FILE *stream, const char *name)
16634 unsigned long alignlength;
16635 unsigned long length;
16638 length = strlen (name) + 1;
16639 alignlength = ROUND_UP_WORD (length);
16641 ASM_OUTPUT_ASCII (stream, name, length);
16642 ASM_OUTPUT_ALIGN (stream, 2);
16643 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16644 assemble_aligned_integer (UNITS_PER_WORD, x);
16647 /* Place some comments into the assembler stream
16648 describing the current function. */
16650 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16652 unsigned long func_type;
16654 /* ??? Do we want to print some of the below anyway? */
16658 /* Sanity check. */
16659 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16661 func_type = arm_current_func_type ();
16663 switch ((int) ARM_FUNC_TYPE (func_type))
16666 case ARM_FT_NORMAL:
16668 case ARM_FT_INTERWORKED:
16669 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16672 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16675 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16677 case ARM_FT_EXCEPTION:
16678 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16682 if (IS_NAKED (func_type))
16683 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16685 if (IS_VOLATILE (func_type))
16686 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16688 if (IS_NESTED (func_type))
16689 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16690 if (IS_STACKALIGN (func_type))
16691 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16693 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16695 crtl->args.pretend_args_size, frame_size);
16697 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16698 frame_pointer_needed,
16699 cfun->machine->uses_anonymous_args);
16701 if (cfun->machine->lr_save_eliminated)
16702 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16704 if (crtl->calls_eh_return)
16705 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16710 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16711 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16713 arm_stack_offsets *offsets;
16719 /* Emit any call-via-reg trampolines that are needed for v4t support
16720 of call_reg and call_value_reg type insns. */
16721 for (regno = 0; regno < LR_REGNUM; regno++)
16723 rtx label = cfun->machine->call_via[regno];
16727 switch_to_section (function_section (current_function_decl));
16728 targetm.asm_out.internal_label (asm_out_file, "L",
16729 CODE_LABEL_NUMBER (label));
16730 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16734 /* ??? Probably not safe to set this here, since it assumes that a
16735 function will be emitted as assembly immediately after we generate
16736 RTL for it. This does not happen for inline functions. */
16737 cfun->machine->return_used_this_function = 0;
16739 else /* TARGET_32BIT */
16741 /* We need to take into account any stack-frame rounding. */
16742 offsets = arm_get_frame_offsets ();
16744 gcc_assert (!use_return_insn (FALSE, NULL)
16745 || (cfun->machine->return_used_this_function != 0)
16746 || offsets->saved_regs == offsets->outgoing_args
16747 || frame_pointer_needed);
16749 /* Reset the ARM-specific per-function variables. */
16750 after_arm_reorg = 0;
16754 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
16755 number of registers are being pushed, multiple STRD patterns are created for
16756 all register pairs. If odd number of registers are pushed, emit a
16757 combination of STRDs and STR for the prologue saves. */
16759 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16763 rtx par = NULL_RTX;
16764 rtx insn = NULL_RTX;
16765 rtx dwarf = NULL_RTX;
16766 rtx tmp, reg, tmp1;
16768 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16769 if (saved_regs_mask & (1 << i))
16772 gcc_assert (num_regs && num_regs <= 16);
16774 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
16775 registers to push. */
16776 tmp = gen_rtx_SET (VOIDmode,
16778 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16779 RTX_FRAME_RELATED_P (tmp) = 1;
16780 insn = emit_insn (tmp);
16782 /* Create sequence for DWARF info. */
16783 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16785 /* RTLs cannot be shared, hence create new copy for dwarf. */
16786 tmp1 = gen_rtx_SET (VOIDmode,
16788 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16789 RTX_FRAME_RELATED_P (tmp1) = 1;
16790 XVECEXP (dwarf, 0, 0) = tmp1;
16792 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16793 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16795 /* Var j iterates over all the registers to gather all the registers in
16796 saved_regs_mask. Var i gives index of register R_j in stack frame.
16797 A PARALLEL RTX of register-pair is created here, so that pattern for
16798 STRD can be matched. If num_regs is odd, 1st register will be pushed
16799 using STR and remaining registers will be pushed with STRD in pairs.
16800 If num_regs is even, all registers are pushed with STRD in pairs.
16801 Hence, skip first element for odd num_regs. */
16802 for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
16803 if (saved_regs_mask & (1 << j))
16805 /* Create RTX for store. New RTX is created for dwarf as
16806 they are not sharable. */
16807 reg = gen_rtx_REG (SImode, j);
16808 tmp = gen_rtx_SET (SImode,
16811 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16814 tmp1 = gen_rtx_SET (SImode,
16817 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16819 RTX_FRAME_RELATED_P (tmp) = 1;
16820 RTX_FRAME_RELATED_P (tmp1) = 1;
16822 if (((i - (num_regs % 2)) % 2) == 1)
16823 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
16824 be created. Hence create it first. The STRD pattern we are
16826 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
16827 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
16828 where the target registers need not be consecutive. */
16829 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16831 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
16832 even, the reg_j is added as 0th element and if it is odd, reg_i is
16833 added as 1st element of STRD pattern shown above. */
16834 XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
16835 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16837 if (((i - (num_regs % 2)) % 2) == 0)
16838 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
16839 to be loaded are generated in above given STRD pattern, and the
16840 pattern can be emitted now. */
16846 if ((num_regs % 2) == 1)
16848 /* If odd number of registers are pushed, generate STR pattern to store
16850 for (; (saved_regs_mask & (1 << j)) == 0; j--);
16852 tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
16853 stack_pointer_rtx, 4 * i));
16854 reg = gen_rtx_REG (SImode, j);
16855 tmp = gen_rtx_SET (SImode, tmp1, reg);
16856 RTX_FRAME_RELATED_P (tmp) = 1;
16860 tmp1 = gen_rtx_SET (SImode,
16863 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16865 RTX_FRAME_RELATED_P (tmp1) = 1;
16866 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16869 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16870 RTX_FRAME_RELATED_P (insn) = 1;
16874 /* STRD in ARM mode requires consecutive registers. This function emits STRD
16875 whenever possible, otherwise it emits single-word stores. The first store
16876 also allocates stack space for all saved registers, using writeback with
16877 post-addressing mode. All other stores use offset addressing. If no STRD
16878 can be emitted, this function emits a sequence of single-word stores,
16879 and not an STM as before, because single-word stores provide more freedom
16880 scheduling and can be turned into an STM by peephole optimizations. */
16882 arm_emit_strd_push (unsigned long saved_regs_mask)
16885 int i, j, dwarf_index = 0;
16887 rtx dwarf = NULL_RTX;
16888 rtx insn = NULL_RTX;
16891 /* TODO: A more efficient code can be emitted by changing the
16892 layout, e.g., first push all pairs that can use STRD to keep the
16893 stack aligned, and then push all other registers. */
16894 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16895 if (saved_regs_mask & (1 << i))
16898 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16899 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16900 gcc_assert (num_regs > 0);
16902 /* Create sequence for DWARF info. */
16903 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16905 /* For dwarf info, we generate explicit stack update. */
16906 tmp = gen_rtx_SET (VOIDmode,
16908 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16909 RTX_FRAME_RELATED_P (tmp) = 1;
16910 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16912 /* Save registers. */
16913 offset = - 4 * num_regs;
16915 while (j <= LAST_ARM_REGNUM)
16916 if (saved_regs_mask & (1 << j))
16919 && (saved_regs_mask & (1 << (j + 1))))
16921 /* Current register and previous register form register pair for
16922 which STRD can be generated. */
16925 /* Allocate stack space for all saved registers. */
16926 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
16927 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
16928 mem = gen_frame_mem (DImode, tmp);
16931 else if (offset > 0)
16932 mem = gen_frame_mem (DImode,
16933 plus_constant (Pmode,
16937 mem = gen_frame_mem (DImode, stack_pointer_rtx);
16939 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
16940 RTX_FRAME_RELATED_P (tmp) = 1;
16941 tmp = emit_insn (tmp);
16943 /* Record the first store insn. */
16944 if (dwarf_index == 1)
16947 /* Generate dwarf info. */
16948 mem = gen_frame_mem (SImode,
16949 plus_constant (Pmode,
16952 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
16953 RTX_FRAME_RELATED_P (tmp) = 1;
16954 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16956 mem = gen_frame_mem (SImode,
16957 plus_constant (Pmode,
16960 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
16961 RTX_FRAME_RELATED_P (tmp) = 1;
16962 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
16969 /* Emit a single word store. */
16972 /* Allocate stack space for all saved registers. */
16973 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
16974 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
16975 mem = gen_frame_mem (SImode, tmp);
16978 else if (offset > 0)
16979 mem = gen_frame_mem (SImode,
16980 plus_constant (Pmode,
16984 mem = gen_frame_mem (SImode, stack_pointer_rtx);
16986 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
16987 RTX_FRAME_RELATED_P (tmp) = 1;
16988 tmp = emit_insn (tmp);
16990 /* Record the first store insn. */
16991 if (dwarf_index == 1)
16994 /* Generate dwarf info. */
16995 mem = gen_frame_mem (SImode,
16996 plus_constant(Pmode,
16999 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17000 RTX_FRAME_RELATED_P (tmp) = 1;
17001 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17010 /* Attach dwarf info to the first insn we generate. */
17011 gcc_assert (insn != NULL_RTX);
17012 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17013 RTX_FRAME_RELATED_P (insn) = 1;
17016 /* Generate and emit an insn that we will recognize as a push_multi.
17017 Unfortunately, since this insn does not reflect very well the actual
17018 semantics of the operation, we need to annotate the insn for the benefit
17019 of DWARF2 frame unwind information. */
17021 emit_multi_reg_push (unsigned long mask)
17024 int num_dwarf_regs;
17028 int dwarf_par_index;
17031 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17032 if (mask & (1 << i))
17035 gcc_assert (num_regs && num_regs <= 16);
17037 /* We don't record the PC in the dwarf frame information. */
17038 num_dwarf_regs = num_regs;
17039 if (mask & (1 << PC_REGNUM))
17042 /* For the body of the insn we are going to generate an UNSPEC in
17043 parallel with several USEs. This allows the insn to be recognized
17044 by the push_multi pattern in the arm.md file.
17046 The body of the insn looks something like this:
17049 (set (mem:BLK (pre_modify:SI (reg:SI sp)
17050 (const_int:SI <num>)))
17051 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
17057 For the frame note however, we try to be more explicit and actually
17058 show each register being stored into the stack frame, plus a (single)
17059 decrement of the stack pointer. We do it this way in order to be
17060 friendly to the stack unwinding code, which only wants to see a single
17061 stack decrement per instruction. The RTL we generate for the note looks
17062 something like this:
17065 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
17066 (set (mem:SI (reg:SI sp)) (reg:SI r4))
17067 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
17068 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
17072 FIXME:: In an ideal world the PRE_MODIFY would not exist and
17073 instead we'd have a parallel expression detailing all
17074 the stores to the various memory addresses so that debug
17075 information is more up-to-date. Remember however while writing
17076 this to take care of the constraints with the push instruction.
17078 Note also that this has to be taken care of for the VFP registers.
17080 For more see PR43399. */
17082 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
17083 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
17084 dwarf_par_index = 1;
17086 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17088 if (mask & (1 << i))
17090 reg = gen_rtx_REG (SImode, i);
17092 XVECEXP (par, 0, 0)
17093 = gen_rtx_SET (VOIDmode,
17096 gen_rtx_PRE_MODIFY (Pmode,
17099 (Pmode, stack_pointer_rtx,
17102 gen_rtx_UNSPEC (BLKmode,
17103 gen_rtvec (1, reg),
17104 UNSPEC_PUSH_MULT));
17106 if (i != PC_REGNUM)
17108 tmp = gen_rtx_SET (VOIDmode,
17109 gen_frame_mem (SImode, stack_pointer_rtx),
17111 RTX_FRAME_RELATED_P (tmp) = 1;
17112 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
17120 for (j = 1, i++; j < num_regs; i++)
17122 if (mask & (1 << i))
17124 reg = gen_rtx_REG (SImode, i);
17126 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
17128 if (i != PC_REGNUM)
17131 = gen_rtx_SET (VOIDmode,
17134 plus_constant (Pmode, stack_pointer_rtx,
17137 RTX_FRAME_RELATED_P (tmp) = 1;
17138 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
17145 par = emit_insn (par);
17147 tmp = gen_rtx_SET (VOIDmode,
17149 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17150 RTX_FRAME_RELATED_P (tmp) = 1;
17151 XVECEXP (dwarf, 0, 0) = tmp;
17153 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17158 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
17159 SIZE is the offset to be adjusted.
17160 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
17162 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
17166 RTX_FRAME_RELATED_P (insn) = 1;
17167 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
17168 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
17171 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
17172 SAVED_REGS_MASK shows which registers need to be restored.
17174 Unfortunately, since this insn does not reflect very well the actual
17175 semantics of the operation, we need to annotate the insn for the benefit
17176 of DWARF2 frame unwind information. */
17178 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
17183 rtx dwarf = NULL_RTX;
17189 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17190 offset_adj = return_in_pc ? 1 : 0;
17191 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17192 if (saved_regs_mask & (1 << i))
17195 gcc_assert (num_regs && num_regs <= 16);
17197 /* If SP is in reglist, then we don't emit SP update insn. */
17198 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
17200 /* The parallel needs to hold num_regs SETs
17201 and one SET for the stack update. */
17202 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
17207 XVECEXP (par, 0, 0) = tmp;
17212 /* Increment the stack pointer, based on there being
17213 num_regs 4-byte registers to restore. */
17214 tmp = gen_rtx_SET (VOIDmode,
17216 plus_constant (Pmode,
17219 RTX_FRAME_RELATED_P (tmp) = 1;
17220 XVECEXP (par, 0, offset_adj) = tmp;
17223 /* Now restore every reg, which may include PC. */
17224 for (j = 0, i = 0; j < num_regs; i++)
17225 if (saved_regs_mask & (1 << i))
17227 reg = gen_rtx_REG (SImode, i);
17228 if ((num_regs == 1) && emit_update && !return_in_pc)
17230 /* Emit single load with writeback. */
17231 tmp = gen_frame_mem (SImode,
17232 gen_rtx_POST_INC (Pmode,
17233 stack_pointer_rtx));
17234 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
17235 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17239 tmp = gen_rtx_SET (VOIDmode,
17243 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
17244 RTX_FRAME_RELATED_P (tmp) = 1;
17245 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
17247 /* We need to maintain a sequence for DWARF info too. As dwarf info
17248 should not have PC, skip PC. */
17249 if (i != PC_REGNUM)
17250 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17256 par = emit_jump_insn (par);
17258 par = emit_insn (par);
17260 REG_NOTES (par) = dwarf;
17262 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
17263 stack_pointer_rtx, stack_pointer_rtx);
17266 /* Generate and emit an insn pattern that we will recognize as a pop_multi
17267 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
17269 Unfortunately, since this insn does not reflect very well the actual
17270 semantics of the operation, we need to annotate the insn for the benefit
17271 of DWARF2 frame unwind information. */
17273 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
17277 rtx dwarf = NULL_RTX;
17280 gcc_assert (num_regs && num_regs <= 32);
17282 /* Workaround ARM10 VFPr1 bug. */
17283 if (num_regs == 2 && !arm_arch6)
17285 if (first_reg == 15)
17291 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
17292 there could be up to 32 D-registers to restore.
17293 If there are more than 16 D-registers, make two recursive calls,
17294 each of which emits one pop_multi instruction. */
17297 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
17298 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
17302 /* The parallel needs to hold num_regs SETs
17303 and one SET for the stack update. */
17304 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
17306 /* Increment the stack pointer, based on there being
17307 num_regs 8-byte registers to restore. */
17308 tmp = gen_rtx_SET (VOIDmode,
17310 plus_constant (Pmode, base_reg, 8 * num_regs));
17311 RTX_FRAME_RELATED_P (tmp) = 1;
17312 XVECEXP (par, 0, 0) = tmp;
17314 /* Now show every reg that will be restored, using a SET for each. */
17315 for (j = 0, i=first_reg; j < num_regs; i += 2)
17317 reg = gen_rtx_REG (DFmode, i);
17319 tmp = gen_rtx_SET (VOIDmode,
17323 plus_constant (Pmode, base_reg, 8 * j)));
17324 RTX_FRAME_RELATED_P (tmp) = 1;
17325 XVECEXP (par, 0, j + 1) = tmp;
17327 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17332 par = emit_insn (par);
17333 REG_NOTES (par) = dwarf;
17335 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
17336 base_reg, base_reg);
17339 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
17340 number of registers are being popped, multiple LDRD patterns are created for
17341 all register pairs. If odd number of registers are popped, last register is
17342 loaded by using LDR pattern. */
17344 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
17348 rtx par = NULL_RTX;
17349 rtx dwarf = NULL_RTX;
17350 rtx tmp, reg, tmp1;
17353 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17354 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17355 if (saved_regs_mask & (1 << i))
17358 gcc_assert (num_regs && num_regs <= 16);
17360 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
17361 to be popped. So, if num_regs is even, now it will become odd,
17362 and we can generate pop with PC. If num_regs is odd, it will be
17363 even now, and ldr with return can be generated for PC. */
17367 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17369 /* Var j iterates over all the registers to gather all the registers in
17370 saved_regs_mask. Var i gives index of saved registers in stack frame.
17371 A PARALLEL RTX of register-pair is created here, so that pattern for
17372 LDRD can be matched. As PC is always last register to be popped, and
17373 we have already decremented num_regs if PC, we don't have to worry
17374 about PC in this loop. */
17375 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
17376 if (saved_regs_mask & (1 << j))
17378 /* Create RTX for memory load. */
17379 reg = gen_rtx_REG (SImode, j);
17380 tmp = gen_rtx_SET (SImode,
17382 gen_frame_mem (SImode,
17383 plus_constant (Pmode,
17384 stack_pointer_rtx, 4 * i)));
17385 RTX_FRAME_RELATED_P (tmp) = 1;
17389 /* When saved-register index (i) is even, the RTX to be emitted is
17390 yet to be created. Hence create it first. The LDRD pattern we
17391 are generating is :
17392 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
17393 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
17394 where target registers need not be consecutive. */
17395 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17399 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
17400 added as 0th element and if i is odd, reg_i is added as 1st element
17401 of LDRD pattern shown above. */
17402 XVECEXP (par, 0, (i % 2)) = tmp;
17403 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17407 /* When saved-register index (i) is odd, RTXs for both the registers
17408 to be loaded are generated in above given LDRD pattern, and the
17409 pattern can be emitted now. */
17410 par = emit_insn (par);
17411 REG_NOTES (par) = dwarf;
17412 RTX_FRAME_RELATED_P (par) = 1;
17418 /* If the number of registers pushed is odd AND return_in_pc is false OR
17419 number of registers are even AND return_in_pc is true, last register is
17420 popped using LDR. It can be PC as well. Hence, adjust the stack first and
17421 then LDR with post increment. */
17423 /* Increment the stack pointer, based on there being
17424 num_regs 4-byte registers to restore. */
17425 tmp = gen_rtx_SET (VOIDmode,
17427 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
17428 RTX_FRAME_RELATED_P (tmp) = 1;
17429 tmp = emit_insn (tmp);
17432 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
17433 stack_pointer_rtx, stack_pointer_rtx);
17438 if (((num_regs % 2) == 1 && !return_in_pc)
17439 || ((num_regs % 2) == 0 && return_in_pc))
17441 /* Scan for the single register to be popped. Skip until the saved
17442 register is found. */
17443 for (; (saved_regs_mask & (1 << j)) == 0; j++);
17445 /* Gen LDR with post increment here. */
17446 tmp1 = gen_rtx_MEM (SImode,
17447 gen_rtx_POST_INC (SImode,
17448 stack_pointer_rtx));
17449 set_mem_alias_set (tmp1, get_frame_alias_set ());
17451 reg = gen_rtx_REG (SImode, j);
17452 tmp = gen_rtx_SET (SImode, reg, tmp1);
17453 RTX_FRAME_RELATED_P (tmp) = 1;
17454 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17458 /* If return_in_pc, j must be PC_REGNUM. */
17459 gcc_assert (j == PC_REGNUM);
17460 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17461 XVECEXP (par, 0, 0) = ret_rtx;
17462 XVECEXP (par, 0, 1) = tmp;
17463 par = emit_jump_insn (par);
17467 par = emit_insn (tmp);
17468 REG_NOTES (par) = dwarf;
17469 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17470 stack_pointer_rtx, stack_pointer_rtx);
17474 else if ((num_regs % 2) == 1 && return_in_pc)
17476 /* There are 2 registers to be popped. So, generate the pattern
17477 pop_multiple_with_stack_update_and_return to pop in PC. */
17478 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
17484 /* LDRD in ARM mode needs consecutive registers as operands. This function
17485 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
17486 offset addressing and then generates one separate stack udpate. This provides
17487 more scheduling freedom, compared to writeback on every load. However,
17488 if the function returns using load into PC directly
17489 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
17490 before the last load. TODO: Add a peephole optimization to recognize
17491 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
17492 peephole optimization to merge the load at stack-offset zero
17493 with the stack update instruction using load with writeback
17494 in post-index addressing mode. */
17496 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
17500 rtx par = NULL_RTX;
17501 rtx dwarf = NULL_RTX;
17504 /* Restore saved registers. */
17505 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
17507 while (j <= LAST_ARM_REGNUM)
17508 if (saved_regs_mask & (1 << j))
17511 && (saved_regs_mask & (1 << (j + 1)))
17512 && (j + 1) != PC_REGNUM)
17514 /* Current register and next register form register pair for which
17515 LDRD can be generated. PC is always the last register popped, and
17516 we handle it separately. */
17518 mem = gen_frame_mem (DImode,
17519 plus_constant (Pmode,
17523 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17525 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
17526 RTX_FRAME_RELATED_P (tmp) = 1;
17527 tmp = emit_insn (tmp);
17529 /* Generate dwarf info. */
17531 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17532 gen_rtx_REG (SImode, j),
17534 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17535 gen_rtx_REG (SImode, j + 1),
17538 REG_NOTES (tmp) = dwarf;
17543 else if (j != PC_REGNUM)
17545 /* Emit a single word load. */
17547 mem = gen_frame_mem (SImode,
17548 plus_constant (Pmode,
17552 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17554 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
17555 RTX_FRAME_RELATED_P (tmp) = 1;
17556 tmp = emit_insn (tmp);
17558 /* Generate dwarf info. */
17559 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
17560 gen_rtx_REG (SImode, j),
17566 else /* j == PC_REGNUM */
17572 /* Update the stack. */
17575 tmp = gen_rtx_SET (Pmode,
17577 plus_constant (Pmode,
17580 RTX_FRAME_RELATED_P (tmp) = 1;
17585 if (saved_regs_mask & (1 << PC_REGNUM))
17587 /* Only PC is to be popped. */
17588 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17589 XVECEXP (par, 0, 0) = ret_rtx;
17590 tmp = gen_rtx_SET (SImode,
17591 gen_rtx_REG (SImode, PC_REGNUM),
17592 gen_frame_mem (SImode,
17593 gen_rtx_POST_INC (SImode,
17594 stack_pointer_rtx)));
17595 RTX_FRAME_RELATED_P (tmp) = 1;
17596 XVECEXP (par, 0, 1) = tmp;
17597 par = emit_jump_insn (par);
17599 /* Generate dwarf info. */
17600 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17601 gen_rtx_REG (SImode, PC_REGNUM),
17603 REG_NOTES (par) = dwarf;
17607 /* Calculate the size of the return value that is passed in registers. */
17609 arm_size_return_regs (void)
17611 enum machine_mode mode;
17613 if (crtl->return_rtx != 0)
17614 mode = GET_MODE (crtl->return_rtx);
17616 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17618 return GET_MODE_SIZE (mode);
17621 /* Return true if the current function needs to save/restore LR. */
17623 thumb_force_lr_save (void)
17625 return !cfun->machine->lr_save_eliminated
17626 && (!leaf_function_p ()
17627 || thumb_far_jump_used_p ()
17628 || df_regs_ever_live_p (LR_REGNUM));
17631 /* We do not know if r3 will be available because
17632 we do have an indirect tailcall happening in this
17633 particular case. */
17635 is_indirect_tailcall_p (rtx call)
17637 rtx pat = PATTERN (call);
17639 /* Indirect tail call. */
17640 pat = XVECEXP (pat, 0, 0);
17641 if (GET_CODE (pat) == SET)
17642 pat = SET_SRC (pat);
17644 pat = XEXP (XEXP (pat, 0), 0);
17645 return REG_P (pat);
17648 /* Return true if r3 is used by any of the tail call insns in the
17649 current function. */
17651 any_sibcall_could_use_r3 (void)
17656 if (!crtl->tail_call_emit)
17658 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17659 if (e->flags & EDGE_SIBCALL)
17661 rtx call = BB_END (e->src);
17662 if (!CALL_P (call))
17663 call = prev_nonnote_nondebug_insn (call);
17664 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
17665 if (find_regno_fusage (call, USE, 3)
17666 || is_indirect_tailcall_p (call))
17673 /* Compute the distance from register FROM to register TO.
17674 These can be the arg pointer (26), the soft frame pointer (25),
17675 the stack pointer (13) or the hard frame pointer (11).
17676 In thumb mode r7 is used as the soft frame pointer, if needed.
17677 Typical stack layout looks like this:
17679 old stack pointer -> | |
17682 | | saved arguments for
17683 | | vararg functions
17686 hard FP & arg pointer -> | | \
17694 soft frame pointer -> | | /
17699 locals base pointer -> | | /
17704 current stack pointer -> | | /
17707 For a given function some or all of these stack components
17708 may not be needed, giving rise to the possibility of
17709 eliminating some of the registers.
17711 The values returned by this function must reflect the behavior
17712 of arm_expand_prologue() and arm_compute_save_reg_mask().
17714 The sign of the number returned reflects the direction of stack
17715 growth, so the values are positive for all eliminations except
17716 from the soft frame pointer to the hard frame pointer.
17718 SFP may point just inside the local variables block to ensure correct
17722 /* Calculate stack offsets. These are used to calculate register elimination
17723 offsets and in prologue/epilogue code. Also calculates which registers
17724 should be saved. */
17726 static arm_stack_offsets *
17727 arm_get_frame_offsets (void)
17729 struct arm_stack_offsets *offsets;
17730 unsigned long func_type;
17734 HOST_WIDE_INT frame_size;
17737 offsets = &cfun->machine->stack_offsets;
17739 /* We need to know if we are a leaf function. Unfortunately, it
17740 is possible to be called after start_sequence has been called,
17741 which causes get_insns to return the insns for the sequence,
17742 not the function, which will cause leaf_function_p to return
17743 the incorrect result.
17745 to know about leaf functions once reload has completed, and the
17746 frame size cannot be changed after that time, so we can safely
17747 use the cached value. */
17749 if (reload_completed)
17752 /* Initially this is the size of the local variables. It will translated
17753 into an offset once we have determined the size of preceding data. */
17754 frame_size = ROUND_UP_WORD (get_frame_size ());
17756 leaf = leaf_function_p ();
17758 /* Space for variadic functions. */
17759 offsets->saved_args = crtl->args.pretend_args_size;
17761 /* In Thumb mode this is incorrect, but never used. */
17762 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
17763 arm_compute_static_chain_stack_bytes();
17767 unsigned int regno;
17769 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
17770 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17771 saved = core_saved;
17773 /* We know that SP will be doubleword aligned on entry, and we must
17774 preserve that condition at any subroutine call. We also require the
17775 soft frame pointer to be doubleword aligned. */
17777 if (TARGET_REALLY_IWMMXT)
17779 /* Check for the call-saved iWMMXt registers. */
17780 for (regno = FIRST_IWMMXT_REGNUM;
17781 regno <= LAST_IWMMXT_REGNUM;
17783 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17787 func_type = arm_current_func_type ();
17788 /* Space for saved VFP registers. */
17789 if (! IS_VOLATILE (func_type)
17790 && TARGET_HARD_FLOAT && TARGET_VFP)
17791 saved += arm_get_vfp_saved_size ();
17793 else /* TARGET_THUMB1 */
17795 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17796 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17797 saved = core_saved;
17798 if (TARGET_BACKTRACE)
17802 /* Saved registers include the stack frame. */
17803 offsets->saved_regs = offsets->saved_args + saved +
17804 arm_compute_static_chain_stack_bytes();
17805 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17806 /* A leaf function does not need any stack alignment if it has nothing
17808 if (leaf && frame_size == 0
17809 /* However if it calls alloca(), we have a dynamically allocated
17810 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17811 && ! cfun->calls_alloca)
17813 offsets->outgoing_args = offsets->soft_frame;
17814 offsets->locals_base = offsets->soft_frame;
17818 /* Ensure SFP has the correct alignment. */
17819 if (ARM_DOUBLEWORD_ALIGN
17820 && (offsets->soft_frame & 7))
17822 offsets->soft_frame += 4;
17823 /* Try to align stack by pushing an extra reg. Don't bother doing this
17824 when there is a stack frame as the alignment will be rolled into
17825 the normal stack adjustment. */
17826 if (frame_size + crtl->outgoing_args_size == 0)
17830 /* If it is safe to use r3, then do so. This sometimes
17831 generates better code on Thumb-2 by avoiding the need to
17832 use 32-bit push/pop instructions. */
17833 if (! any_sibcall_could_use_r3 ()
17834 && arm_size_return_regs () <= 12
17835 && (offsets->saved_regs_mask & (1 << 3)) == 0
17836 && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
17841 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17843 /* Avoid fixed registers; they may be changed at
17844 arbitrary times so it's unsafe to restore them
17845 during the epilogue. */
17847 && (offsets->saved_regs_mask & (1 << i)) == 0)
17856 offsets->saved_regs += 4;
17857 offsets->saved_regs_mask |= (1 << reg);
17862 offsets->locals_base = offsets->soft_frame + frame_size;
17863 offsets->outgoing_args = (offsets->locals_base
17864 + crtl->outgoing_args_size);
17866 if (ARM_DOUBLEWORD_ALIGN)
17868 /* Ensure SP remains doubleword aligned. */
17869 if (offsets->outgoing_args & 7)
17870 offsets->outgoing_args += 4;
17871 gcc_assert (!(offsets->outgoing_args & 7));
17878 /* Calculate the relative offsets for the different stack pointers. Positive
17879 offsets are in the direction of stack growth. */
17882 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17884 arm_stack_offsets *offsets;
17886 offsets = arm_get_frame_offsets ();
17888 /* OK, now we have enough information to compute the distances.
17889 There must be an entry in these switch tables for each pair
17890 of registers in ELIMINABLE_REGS, even if some of the entries
17891 seem to be redundant or useless. */
17894 case ARG_POINTER_REGNUM:
17897 case THUMB_HARD_FRAME_POINTER_REGNUM:
17900 case FRAME_POINTER_REGNUM:
17901 /* This is the reverse of the soft frame pointer
17902 to hard frame pointer elimination below. */
17903 return offsets->soft_frame - offsets->saved_args;
17905 case ARM_HARD_FRAME_POINTER_REGNUM:
17906 /* This is only non-zero in the case where the static chain register
17907 is stored above the frame. */
17908 return offsets->frame - offsets->saved_args - 4;
17910 case STACK_POINTER_REGNUM:
17911 /* If nothing has been pushed on the stack at all
17912 then this will return -4. This *is* correct! */
17913 return offsets->outgoing_args - (offsets->saved_args + 4);
17916 gcc_unreachable ();
17918 gcc_unreachable ();
17920 case FRAME_POINTER_REGNUM:
17923 case THUMB_HARD_FRAME_POINTER_REGNUM:
17926 case ARM_HARD_FRAME_POINTER_REGNUM:
17927 /* The hard frame pointer points to the top entry in the
17928 stack frame. The soft frame pointer to the bottom entry
17929 in the stack frame. If there is no stack frame at all,
17930 then they are identical. */
17932 return offsets->frame - offsets->soft_frame;
17934 case STACK_POINTER_REGNUM:
17935 return offsets->outgoing_args - offsets->soft_frame;
17938 gcc_unreachable ();
17940 gcc_unreachable ();
17943 /* You cannot eliminate from the stack pointer.
17944 In theory you could eliminate from the hard frame
17945 pointer to the stack pointer, but this will never
17946 happen, since if a stack frame is not needed the
17947 hard frame pointer will never be used. */
17948 gcc_unreachable ();
17952 /* Given FROM and TO register numbers, say whether this elimination is
17953 allowed. Frame pointer elimination is automatically handled.
17955 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
17956 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
17957 pointer, we must eliminate FRAME_POINTER_REGNUM into
17958 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
17959 ARG_POINTER_REGNUM. */
17962 arm_can_eliminate (const int from, const int to)
17964 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
17965 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
17966 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
17967 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
17971 /* Emit RTL to save coprocessor registers on function entry. Returns the
17972 number of bytes pushed. */
17975 arm_save_coproc_regs(void)
17977 int saved_size = 0;
17979 unsigned start_reg;
17982 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
17983 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
17985 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
17986 insn = gen_rtx_MEM (V2SImode, insn);
17987 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
17988 RTX_FRAME_RELATED_P (insn) = 1;
17992 if (TARGET_HARD_FLOAT && TARGET_VFP)
17994 start_reg = FIRST_VFP_REGNUM;
17996 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
17998 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
17999 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
18001 if (start_reg != reg)
18002 saved_size += vfp_emit_fstmd (start_reg,
18003 (reg - start_reg) / 2);
18004 start_reg = reg + 2;
18007 if (start_reg != reg)
18008 saved_size += vfp_emit_fstmd (start_reg,
18009 (reg - start_reg) / 2);
18015 /* Set the Thumb frame pointer from the stack pointer. */
18018 thumb_set_frame_pointer (arm_stack_offsets *offsets)
18020 HOST_WIDE_INT amount;
18023 amount = offsets->outgoing_args - offsets->locals_base;
18025 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18026 stack_pointer_rtx, GEN_INT (amount)));
18029 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
18030 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
18031 expects the first two operands to be the same. */
18034 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18036 hard_frame_pointer_rtx));
18040 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18041 hard_frame_pointer_rtx,
18042 stack_pointer_rtx));
18044 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
18045 plus_constant (Pmode, stack_pointer_rtx, amount));
18046 RTX_FRAME_RELATED_P (dwarf) = 1;
18047 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18050 RTX_FRAME_RELATED_P (insn) = 1;
18053 /* Generate the prologue instructions for entry into an ARM or Thumb-2
18056 arm_expand_prologue (void)
18061 unsigned long live_regs_mask;
18062 unsigned long func_type;
18064 int saved_pretend_args = 0;
18065 int saved_regs = 0;
18066 unsigned HOST_WIDE_INT args_to_push;
18067 arm_stack_offsets *offsets;
18069 func_type = arm_current_func_type ();
18071 /* Naked functions don't have prologues. */
18072 if (IS_NAKED (func_type))
18075 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
18076 args_to_push = crtl->args.pretend_args_size;
18078 /* Compute which register we will have to save onto the stack. */
18079 offsets = arm_get_frame_offsets ();
18080 live_regs_mask = offsets->saved_regs_mask;
18082 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
18084 if (IS_STACKALIGN (func_type))
18088 /* Handle a word-aligned stack pointer. We generate the following:
18093 <save and restore r0 in normal prologue/epilogue>
18097 The unwinder doesn't need to know about the stack realignment.
18098 Just tell it we saved SP in r0. */
18099 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
18101 r0 = gen_rtx_REG (SImode, 0);
18102 r1 = gen_rtx_REG (SImode, 1);
18104 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
18105 RTX_FRAME_RELATED_P (insn) = 1;
18106 add_reg_note (insn, REG_CFA_REGISTER, NULL);
18108 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
18110 /* ??? The CFA changes here, which may cause GDB to conclude that it
18111 has entered a different function. That said, the unwind info is
18112 correct, individually, before and after this instruction because
18113 we've described the save of SP, which will override the default
18114 handling of SP as restoring from the CFA. */
18115 emit_insn (gen_movsi (stack_pointer_rtx, r1));
18118 /* For APCS frames, if IP register is clobbered
18119 when creating frame, save that register in a special
18121 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18123 if (IS_INTERRUPT (func_type))
18125 /* Interrupt functions must not corrupt any registers.
18126 Creating a frame pointer however, corrupts the IP
18127 register, so we must push it first. */
18128 emit_multi_reg_push (1 << IP_REGNUM);
18130 /* Do not set RTX_FRAME_RELATED_P on this insn.
18131 The dwarf stack unwinding code only wants to see one
18132 stack decrement per function, and this is not it. If
18133 this instruction is labeled as being part of the frame
18134 creation sequence then dwarf2out_frame_debug_expr will
18135 die when it encounters the assignment of IP to FP
18136 later on, since the use of SP here establishes SP as
18137 the CFA register and not IP.
18139 Anyway this instruction is not really part of the stack
18140 frame creation although it is part of the prologue. */
18142 else if (IS_NESTED (func_type))
18144 /* The Static chain register is the same as the IP register
18145 used as a scratch register during stack frame creation.
18146 To get around this need to find somewhere to store IP
18147 whilst the frame is being created. We try the following
18150 1. The last argument register.
18151 2. A slot on the stack above the frame. (This only
18152 works if the function is not a varargs function).
18153 3. Register r3, after pushing the argument registers
18156 Note - we only need to tell the dwarf2 backend about the SP
18157 adjustment in the second variant; the static chain register
18158 doesn't need to be unwound, as it doesn't contain a value
18159 inherited from the caller. */
18161 if (df_regs_ever_live_p (3) == false)
18162 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18163 else if (args_to_push == 0)
18167 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
18170 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
18171 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
18174 /* Just tell the dwarf backend that we adjusted SP. */
18175 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18176 plus_constant (Pmode, stack_pointer_rtx,
18178 RTX_FRAME_RELATED_P (insn) = 1;
18179 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18183 /* Store the args on the stack. */
18184 if (cfun->machine->uses_anonymous_args)
18185 insn = emit_multi_reg_push
18186 ((0xf0 >> (args_to_push / 4)) & 0xf);
18189 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18190 GEN_INT (- args_to_push)));
18192 RTX_FRAME_RELATED_P (insn) = 1;
18194 saved_pretend_args = 1;
18195 fp_offset = args_to_push;
18198 /* Now reuse r3 to preserve IP. */
18199 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18203 insn = emit_set_insn (ip_rtx,
18204 plus_constant (Pmode, stack_pointer_rtx,
18206 RTX_FRAME_RELATED_P (insn) = 1;
18211 /* Push the argument registers, or reserve space for them. */
18212 if (cfun->machine->uses_anonymous_args)
18213 insn = emit_multi_reg_push
18214 ((0xf0 >> (args_to_push / 4)) & 0xf);
18217 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18218 GEN_INT (- args_to_push)));
18219 RTX_FRAME_RELATED_P (insn) = 1;
18222 /* If this is an interrupt service routine, and the link register
18223 is going to be pushed, and we're not generating extra
18224 push of IP (needed when frame is needed and frame layout if apcs),
18225 subtracting four from LR now will mean that the function return
18226 can be done with a single instruction. */
18227 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
18228 && (live_regs_mask & (1 << LR_REGNUM)) != 0
18229 && !(frame_pointer_needed && TARGET_APCS_FRAME)
18232 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
18234 emit_set_insn (lr, plus_constant (SImode, lr, -4));
18237 if (live_regs_mask)
18239 saved_regs += bit_count (live_regs_mask) * 4;
18240 if (optimize_size && !frame_pointer_needed
18241 && saved_regs == offsets->saved_regs - offsets->saved_args)
18243 /* If no coprocessor registers are being pushed and we don't have
18244 to worry about a frame pointer then push extra registers to
18245 create the stack frame. This is done is a way that does not
18246 alter the frame layout, so is independent of the epilogue. */
18250 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
18252 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
18253 if (frame && n * 4 >= frame)
18256 live_regs_mask |= (1 << n) - 1;
18257 saved_regs += frame;
18261 if (current_tune->prefer_ldrd_strd
18262 && !optimize_function_for_size_p (cfun))
18266 thumb2_emit_strd_push (live_regs_mask);
18268 else if (TARGET_ARM
18269 && !TARGET_APCS_FRAME
18270 && !IS_INTERRUPT (func_type))
18272 arm_emit_strd_push (live_regs_mask);
18276 insn = emit_multi_reg_push (live_regs_mask);
18277 RTX_FRAME_RELATED_P (insn) = 1;
18282 insn = emit_multi_reg_push (live_regs_mask);
18283 RTX_FRAME_RELATED_P (insn) = 1;
18287 if (! IS_VOLATILE (func_type))
18288 saved_regs += arm_save_coproc_regs ();
18290 if (frame_pointer_needed && TARGET_ARM)
18292 /* Create the new frame pointer. */
18293 if (TARGET_APCS_FRAME)
18295 insn = GEN_INT (-(4 + args_to_push + fp_offset));
18296 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
18297 RTX_FRAME_RELATED_P (insn) = 1;
18299 if (IS_NESTED (func_type))
18301 /* Recover the static chain register. */
18302 if (!df_regs_ever_live_p (3)
18303 || saved_pretend_args)
18304 insn = gen_rtx_REG (SImode, 3);
18305 else /* if (crtl->args.pretend_args_size == 0) */
18307 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
18308 insn = gen_frame_mem (SImode, insn);
18310 emit_set_insn (ip_rtx, insn);
18311 /* Add a USE to stop propagate_one_insn() from barfing. */
18312 emit_insn (gen_force_register_use (ip_rtx));
18317 insn = GEN_INT (saved_regs - 4);
18318 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18319 stack_pointer_rtx, insn));
18320 RTX_FRAME_RELATED_P (insn) = 1;
18324 if (flag_stack_usage_info)
18325 current_function_static_stack_size
18326 = offsets->outgoing_args - offsets->saved_args;
18328 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
18330 /* This add can produce multiple insns for a large constant, so we
18331 need to get tricky. */
18332 rtx last = get_last_insn ();
18334 amount = GEN_INT (offsets->saved_args + saved_regs
18335 - offsets->outgoing_args);
18337 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18341 last = last ? NEXT_INSN (last) : get_insns ();
18342 RTX_FRAME_RELATED_P (last) = 1;
18344 while (last != insn);
18346 /* If the frame pointer is needed, emit a special barrier that
18347 will prevent the scheduler from moving stores to the frame
18348 before the stack adjustment. */
18349 if (frame_pointer_needed)
18350 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
18351 hard_frame_pointer_rtx));
18355 if (frame_pointer_needed && TARGET_THUMB2)
18356 thumb_set_frame_pointer (offsets);
18358 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18360 unsigned long mask;
18362 mask = live_regs_mask;
18363 mask &= THUMB2_WORK_REGS;
18364 if (!IS_NESTED (func_type))
18365 mask |= (1 << IP_REGNUM);
18366 arm_load_pic_register (mask);
18369 /* If we are profiling, make sure no instructions are scheduled before
18370 the call to mcount. Similarly if the user has requested no
18371 scheduling in the prolog. Similarly if we want non-call exceptions
18372 using the EABI unwinder, to prevent faulting instructions from being
18373 swapped with a stack adjustment. */
18374 if (crtl->profile || !TARGET_SCHED_PROLOG
18375 || (arm_except_unwind_info (&global_options) == UI_TARGET
18376 && cfun->can_throw_non_call_exceptions))
18377 emit_insn (gen_blockage ());
18379 /* If the link register is being kept alive, with the return address in it,
18380 then make sure that it does not get reused by the ce2 pass. */
18381 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
18382 cfun->machine->lr_save_eliminated = 1;
18385 /* Print condition code to STREAM. Helper function for arm_print_operand. */
18387 arm_print_condition (FILE *stream)
18389 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
18391 /* Branch conversion is not implemented for Thumb-2. */
18394 output_operand_lossage ("predicated Thumb instruction");
18397 if (current_insn_predicate != NULL)
18399 output_operand_lossage
18400 ("predicated instruction in conditional sequence");
18404 fputs (arm_condition_codes[arm_current_cc], stream);
18406 else if (current_insn_predicate)
18408 enum arm_cond_code code;
18412 output_operand_lossage ("predicated Thumb instruction");
18416 code = get_arm_condition_code (current_insn_predicate);
18417 fputs (arm_condition_codes[code], stream);
18422 /* If CODE is 'd', then the X is a condition operand and the instruction
18423 should only be executed if the condition is true.
18424 if CODE is 'D', then the X is a condition operand and the instruction
18425 should only be executed if the condition is false: however, if the mode
18426 of the comparison is CCFPEmode, then always execute the instruction -- we
18427 do this because in these circumstances !GE does not necessarily imply LT;
18428 in these cases the instruction pattern will take care to make sure that
18429 an instruction containing %d will follow, thereby undoing the effects of
18430 doing this instruction unconditionally.
18431 If CODE is 'N' then X is a floating point operand that must be negated
18433 If CODE is 'B' then output a bitwise inverted value of X (a const int).
18434 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
18436 arm_print_operand (FILE *stream, rtx x, int code)
18441 fputs (ASM_COMMENT_START, stream);
18445 fputs (user_label_prefix, stream);
18449 fputs (REGISTER_PREFIX, stream);
18453 arm_print_condition (stream);
18457 /* Nothing in unified syntax, otherwise the current condition code. */
18458 if (!TARGET_UNIFIED_ASM)
18459 arm_print_condition (stream);
18463 /* The current condition code in unified syntax, otherwise nothing. */
18464 if (TARGET_UNIFIED_ASM)
18465 arm_print_condition (stream);
18469 /* The current condition code for a condition code setting instruction.
18470 Preceded by 's' in unified syntax, otherwise followed by 's'. */
18471 if (TARGET_UNIFIED_ASM)
18473 fputc('s', stream);
18474 arm_print_condition (stream);
18478 arm_print_condition (stream);
18479 fputc('s', stream);
18484 /* If the instruction is conditionally executed then print
18485 the current condition code, otherwise print 's'. */
18486 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
18487 if (current_insn_predicate)
18488 arm_print_condition (stream);
18490 fputc('s', stream);
18493 /* %# is a "break" sequence. It doesn't output anything, but is used to
18494 separate e.g. operand numbers from following text, if that text consists
18495 of further digits which we don't want to be part of the operand
18503 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
18504 r = real_value_negate (&r);
18505 fprintf (stream, "%s", fp_const_from_val (&r));
18509 /* An integer or symbol address without a preceding # sign. */
18511 switch (GET_CODE (x))
18514 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18518 output_addr_const (stream, x);
18522 if (GET_CODE (XEXP (x, 0)) == PLUS
18523 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
18525 output_addr_const (stream, x);
18528 /* Fall through. */
18531 output_operand_lossage ("Unsupported operand for code '%c'", code);
18535 /* An integer that we want to print in HEX. */
18537 switch (GET_CODE (x))
18540 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
18544 output_operand_lossage ("Unsupported operand for code '%c'", code);
18549 if (CONST_INT_P (x))
18552 val = ARM_SIGN_EXTEND (~INTVAL (x));
18553 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
18557 putc ('~', stream);
18558 output_addr_const (stream, x);
18563 /* The low 16 bits of an immediate constant. */
18564 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
18568 fprintf (stream, "%s", arithmetic_instr (x, 1));
18572 fprintf (stream, "%s", arithmetic_instr (x, 0));
18580 shift = shift_op (x, &val);
18584 fprintf (stream, ", %s ", shift);
18586 arm_print_operand (stream, XEXP (x, 1), 0);
18588 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
18593 /* An explanation of the 'Q', 'R' and 'H' register operands:
18595 In a pair of registers containing a DI or DF value the 'Q'
18596 operand returns the register number of the register containing
18597 the least significant part of the value. The 'R' operand returns
18598 the register number of the register containing the most
18599 significant part of the value.
18601 The 'H' operand returns the higher of the two register numbers.
18602 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
18603 same as the 'Q' operand, since the most significant part of the
18604 value is held in the lower number register. The reverse is true
18605 on systems where WORDS_BIG_ENDIAN is false.
18607 The purpose of these operands is to distinguish between cases
18608 where the endian-ness of the values is important (for example
18609 when they are added together), and cases where the endian-ness
18610 is irrelevant, but the order of register operations is important.
18611 For example when loading a value from memory into a register
18612 pair, the endian-ness does not matter. Provided that the value
18613 from the lower memory address is put into the lower numbered
18614 register, and the value from the higher address is put into the
18615 higher numbered register, the load will work regardless of whether
18616 the value being loaded is big-wordian or little-wordian. The
18617 order of the two register loads can matter however, if the address
18618 of the memory location is actually held in one of the registers
18619 being overwritten by the load.
18621 The 'Q' and 'R' constraints are also available for 64-bit
18624 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18626 rtx part = gen_lowpart (SImode, x);
18627 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18631 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18633 output_operand_lossage ("invalid operand for code '%c'", code);
18637 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
18641 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18643 enum machine_mode mode = GET_MODE (x);
18646 if (mode == VOIDmode)
18648 part = gen_highpart_mode (SImode, mode, x);
18649 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18653 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18655 output_operand_lossage ("invalid operand for code '%c'", code);
18659 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
18663 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18665 output_operand_lossage ("invalid operand for code '%c'", code);
18669 asm_fprintf (stream, "%r", REGNO (x) + 1);
18673 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18675 output_operand_lossage ("invalid operand for code '%c'", code);
18679 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
18683 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18685 output_operand_lossage ("invalid operand for code '%c'", code);
18689 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
18693 asm_fprintf (stream, "%r",
18694 REG_P (XEXP (x, 0))
18695 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
18699 asm_fprintf (stream, "{%r-%r}",
18701 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
18704 /* Like 'M', but writing doubleword vector registers, for use by Neon
18708 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
18709 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
18711 asm_fprintf (stream, "{d%d}", regno);
18713 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
18718 /* CONST_TRUE_RTX means always -- that's the default. */
18719 if (x == const_true_rtx)
18722 if (!COMPARISON_P (x))
18724 output_operand_lossage ("invalid operand for code '%c'", code);
18728 fputs (arm_condition_codes[get_arm_condition_code (x)],
18733 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
18734 want to do that. */
18735 if (x == const_true_rtx)
18737 output_operand_lossage ("instruction never executed");
18740 if (!COMPARISON_P (x))
18742 output_operand_lossage ("invalid operand for code '%c'", code);
18746 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
18747 (get_arm_condition_code (x))],
18757 /* Former Maverick support, removed after GCC-4.7. */
18758 output_operand_lossage ("obsolete Maverick format code '%c'", code);
18763 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
18764 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
18765 /* Bad value for wCG register number. */
18767 output_operand_lossage ("invalid operand for code '%c'", code);
18772 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
18775 /* Print an iWMMXt control register name. */
18777 if (!CONST_INT_P (x)
18779 || INTVAL (x) >= 16)
18780 /* Bad value for wC register number. */
18782 output_operand_lossage ("invalid operand for code '%c'", code);
18788 static const char * wc_reg_names [16] =
18790 "wCID", "wCon", "wCSSF", "wCASF",
18791 "wC4", "wC5", "wC6", "wC7",
18792 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18793 "wC12", "wC13", "wC14", "wC15"
18796 fputs (wc_reg_names [INTVAL (x)], stream);
18800 /* Print the high single-precision register of a VFP double-precision
18804 int mode = GET_MODE (x);
18807 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18809 output_operand_lossage ("invalid operand for code '%c'", code);
18814 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18816 output_operand_lossage ("invalid operand for code '%c'", code);
18820 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18824 /* Print a VFP/Neon double precision or quad precision register name. */
18828 int mode = GET_MODE (x);
18829 int is_quad = (code == 'q');
18832 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18834 output_operand_lossage ("invalid operand for code '%c'", code);
18839 || !IS_VFP_REGNUM (REGNO (x)))
18841 output_operand_lossage ("invalid operand for code '%c'", code);
18846 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18847 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18849 output_operand_lossage ("invalid operand for code '%c'", code);
18853 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18854 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18858 /* These two codes print the low/high doubleword register of a Neon quad
18859 register, respectively. For pair-structure types, can also print
18860 low/high quadword registers. */
18864 int mode = GET_MODE (x);
18867 if ((GET_MODE_SIZE (mode) != 16
18868 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18870 output_operand_lossage ("invalid operand for code '%c'", code);
18875 if (!NEON_REGNO_OK_FOR_QUAD (regno))
18877 output_operand_lossage ("invalid operand for code '%c'", code);
18881 if (GET_MODE_SIZE (mode) == 16)
18882 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18883 + (code == 'f' ? 1 : 0));
18885 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
18886 + (code == 'f' ? 1 : 0));
18890 /* Print a VFPv3 floating-point constant, represented as an integer
18894 int index = vfp3_const_double_index (x);
18895 gcc_assert (index != -1);
18896 fprintf (stream, "%d", index);
18900 /* Print bits representing opcode features for Neon.
18902 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
18903 and polynomials as unsigned.
18905 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
18907 Bit 2 is 1 for rounding functions, 0 otherwise. */
18909 /* Identify the type as 's', 'u', 'p' or 'f'. */
18912 HOST_WIDE_INT bits = INTVAL (x);
18913 fputc ("uspf"[bits & 3], stream);
18917 /* Likewise, but signed and unsigned integers are both 'i'. */
18920 HOST_WIDE_INT bits = INTVAL (x);
18921 fputc ("iipf"[bits & 3], stream);
18925 /* As for 'T', but emit 'u' instead of 'p'. */
18928 HOST_WIDE_INT bits = INTVAL (x);
18929 fputc ("usuf"[bits & 3], stream);
18933 /* Bit 2: rounding (vs none). */
18936 HOST_WIDE_INT bits = INTVAL (x);
18937 fputs ((bits & 4) != 0 ? "r" : "", stream);
18941 /* Memory operand for vld1/vst1 instruction. */
18945 bool postinc = FALSE;
18946 unsigned align, memsize, align_bits;
18948 gcc_assert (MEM_P (x));
18949 addr = XEXP (x, 0);
18950 if (GET_CODE (addr) == POST_INC)
18953 addr = XEXP (addr, 0);
18955 asm_fprintf (stream, "[%r", REGNO (addr));
18957 /* We know the alignment of this access, so we can emit a hint in the
18958 instruction (for some alignments) as an aid to the memory subsystem
18960 align = MEM_ALIGN (x) >> 3;
18961 memsize = MEM_SIZE (x);
18963 /* Only certain alignment specifiers are supported by the hardware. */
18964 if (memsize == 32 && (align % 32) == 0)
18966 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
18968 else if (memsize >= 8 && (align % 8) == 0)
18973 if (align_bits != 0)
18974 asm_fprintf (stream, ":%d", align_bits);
18976 asm_fprintf (stream, "]");
18979 fputs("!", stream);
18987 gcc_assert (MEM_P (x));
18988 addr = XEXP (x, 0);
18989 gcc_assert (REG_P (addr));
18990 asm_fprintf (stream, "[%r]", REGNO (addr));
18994 /* Translate an S register number into a D register number and element index. */
18997 int mode = GET_MODE (x);
19000 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
19002 output_operand_lossage ("invalid operand for code '%c'", code);
19007 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19009 output_operand_lossage ("invalid operand for code '%c'", code);
19013 regno = regno - FIRST_VFP_REGNUM;
19014 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
19019 gcc_assert (CONST_DOUBLE_P (x));
19020 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
19023 /* Register specifier for vld1.16/vst1.16. Translate the S register
19024 number into a D register number and element index. */
19027 int mode = GET_MODE (x);
19030 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
19032 output_operand_lossage ("invalid operand for code '%c'", code);
19037 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19039 output_operand_lossage ("invalid operand for code '%c'", code);
19043 regno = regno - FIRST_VFP_REGNUM;
19044 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
19051 output_operand_lossage ("missing operand");
19055 switch (GET_CODE (x))
19058 asm_fprintf (stream, "%r", REGNO (x));
19062 output_memory_reference_mode = GET_MODE (x);
19063 output_address (XEXP (x, 0));
19070 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
19071 sizeof (fpstr), 0, 1);
19072 fprintf (stream, "#%s", fpstr);
19075 fprintf (stream, "#%s", fp_immediate_constant (x));
19079 gcc_assert (GET_CODE (x) != NEG);
19080 fputc ('#', stream);
19081 if (GET_CODE (x) == HIGH)
19083 fputs (":lower16:", stream);
19087 output_addr_const (stream, x);
19093 /* Target hook for printing a memory address. */
19095 arm_print_operand_address (FILE *stream, rtx x)
19099 int is_minus = GET_CODE (x) == MINUS;
19102 asm_fprintf (stream, "[%r]", REGNO (x));
19103 else if (GET_CODE (x) == PLUS || is_minus)
19105 rtx base = XEXP (x, 0);
19106 rtx index = XEXP (x, 1);
19107 HOST_WIDE_INT offset = 0;
19109 || (REG_P (index) && REGNO (index) == SP_REGNUM))
19111 /* Ensure that BASE is a register. */
19112 /* (one of them must be). */
19113 /* Also ensure the SP is not used as in index register. */
19118 switch (GET_CODE (index))
19121 offset = INTVAL (index);
19124 asm_fprintf (stream, "[%r, #%wd]",
19125 REGNO (base), offset);
19129 asm_fprintf (stream, "[%r, %s%r]",
19130 REGNO (base), is_minus ? "-" : "",
19140 asm_fprintf (stream, "[%r, %s%r",
19141 REGNO (base), is_minus ? "-" : "",
19142 REGNO (XEXP (index, 0)));
19143 arm_print_operand (stream, index, 'S');
19144 fputs ("]", stream);
19149 gcc_unreachable ();
19152 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
19153 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
19155 extern enum machine_mode output_memory_reference_mode;
19157 gcc_assert (REG_P (XEXP (x, 0)));
19159 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
19160 asm_fprintf (stream, "[%r, #%s%d]!",
19161 REGNO (XEXP (x, 0)),
19162 GET_CODE (x) == PRE_DEC ? "-" : "",
19163 GET_MODE_SIZE (output_memory_reference_mode));
19165 asm_fprintf (stream, "[%r], #%s%d",
19166 REGNO (XEXP (x, 0)),
19167 GET_CODE (x) == POST_DEC ? "-" : "",
19168 GET_MODE_SIZE (output_memory_reference_mode));
19170 else if (GET_CODE (x) == PRE_MODIFY)
19172 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
19173 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19174 asm_fprintf (stream, "#%wd]!",
19175 INTVAL (XEXP (XEXP (x, 1), 1)));
19177 asm_fprintf (stream, "%r]!",
19178 REGNO (XEXP (XEXP (x, 1), 1)));
19180 else if (GET_CODE (x) == POST_MODIFY)
19182 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
19183 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19184 asm_fprintf (stream, "#%wd",
19185 INTVAL (XEXP (XEXP (x, 1), 1)));
19187 asm_fprintf (stream, "%r",
19188 REGNO (XEXP (XEXP (x, 1), 1)));
19190 else output_addr_const (stream, x);
19195 asm_fprintf (stream, "[%r]", REGNO (x));
19196 else if (GET_CODE (x) == POST_INC)
19197 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
19198 else if (GET_CODE (x) == PLUS)
19200 gcc_assert (REG_P (XEXP (x, 0)));
19201 if (CONST_INT_P (XEXP (x, 1)))
19202 asm_fprintf (stream, "[%r, #%wd]",
19203 REGNO (XEXP (x, 0)),
19204 INTVAL (XEXP (x, 1)));
19206 asm_fprintf (stream, "[%r, %r]",
19207 REGNO (XEXP (x, 0)),
19208 REGNO (XEXP (x, 1)));
19211 output_addr_const (stream, x);
19215 /* Target hook for indicating whether a punctuation character for
19216 TARGET_PRINT_OPERAND is valid. */
19218 arm_print_operand_punct_valid_p (unsigned char code)
19220 return (code == '@' || code == '|' || code == '.'
19221 || code == '(' || code == ')' || code == '#'
19222 || (TARGET_32BIT && (code == '?'))
19223 || (TARGET_THUMB2 && (code == '!'))
19224 || (TARGET_THUMB && (code == '_')));
19227 /* Target hook for assembling integer objects. The ARM version needs to
19228 handle word-sized values specially. */
19230 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
19232 enum machine_mode mode;
19234 if (size == UNITS_PER_WORD && aligned_p)
19236 fputs ("\t.word\t", asm_out_file);
19237 output_addr_const (asm_out_file, x);
19239 /* Mark symbols as position independent. We only do this in the
19240 .text segment, not in the .data segment. */
19241 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
19242 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
19244 /* See legitimize_pic_address for an explanation of the
19245 TARGET_VXWORKS_RTP check. */
19246 if (TARGET_VXWORKS_RTP
19247 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
19248 fputs ("(GOT)", asm_out_file);
19250 fputs ("(GOTOFF)", asm_out_file);
19252 fputc ('\n', asm_out_file);
19256 mode = GET_MODE (x);
19258 if (arm_vector_mode_supported_p (mode))
19262 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19264 units = CONST_VECTOR_NUNITS (x);
19265 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
19267 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19268 for (i = 0; i < units; i++)
19270 rtx elt = CONST_VECTOR_ELT (x, i);
19272 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
19275 for (i = 0; i < units; i++)
19277 rtx elt = CONST_VECTOR_ELT (x, i);
19278 REAL_VALUE_TYPE rval;
19280 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
19283 (rval, GET_MODE_INNER (mode),
19284 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
19290 return default_assemble_integer (x, size, aligned_p);
19294 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
19298 if (!TARGET_AAPCS_BASED)
19301 default_named_section_asm_out_constructor
19302 : default_named_section_asm_out_destructor) (symbol, priority);
19306 /* Put these in the .init_array section, using a special relocation. */
19307 if (priority != DEFAULT_INIT_PRIORITY)
19310 sprintf (buf, "%s.%.5u",
19311 is_ctor ? ".init_array" : ".fini_array",
19313 s = get_section (buf, SECTION_WRITE, NULL_TREE);
19320 switch_to_section (s);
19321 assemble_align (POINTER_SIZE);
19322 fputs ("\t.word\t", asm_out_file);
19323 output_addr_const (asm_out_file, symbol);
19324 fputs ("(target1)\n", asm_out_file);
19327 /* Add a function to the list of static constructors. */
19330 arm_elf_asm_constructor (rtx symbol, int priority)
19332 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
19335 /* Add a function to the list of static destructors. */
19338 arm_elf_asm_destructor (rtx symbol, int priority)
19340 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
19343 /* A finite state machine takes care of noticing whether or not instructions
19344 can be conditionally executed, and thus decrease execution time and code
19345 size by deleting branch instructions. The fsm is controlled by
19346 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
19348 /* The state of the fsm controlling condition codes are:
19349 0: normal, do nothing special
19350 1: make ASM_OUTPUT_OPCODE not output this instruction
19351 2: make ASM_OUTPUT_OPCODE not output this instruction
19352 3: make instructions conditional
19353 4: make instructions conditional
19355 State transitions (state->state by whom under condition):
19356 0 -> 1 final_prescan_insn if the `target' is a label
19357 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
19358 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
19359 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
19360 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
19361 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
19362 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
19363 (the target insn is arm_target_insn).
19365 If the jump clobbers the conditions then we use states 2 and 4.
19367 A similar thing can be done with conditional return insns.
19369 XXX In case the `target' is an unconditional branch, this conditionalising
19370 of the instructions always reduces code size, but not always execution
19371 time. But then, I want to reduce the code size to somewhere near what
19372 /bin/cc produces. */
19374 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
19375 instructions. When a COND_EXEC instruction is seen the subsequent
19376 instructions are scanned so that multiple conditional instructions can be
19377 combined into a single IT block. arm_condexec_count and arm_condexec_mask
19378 specify the length and true/false mask for the IT block. These will be
19379 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
19381 /* Returns the index of the ARM condition code string in
19382 `arm_condition_codes', or ARM_NV if the comparison is invalid.
19383 COMPARISON should be an rtx like `(eq (...) (...))'. */
19386 maybe_get_arm_condition_code (rtx comparison)
19388 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
19389 enum arm_cond_code code;
19390 enum rtx_code comp_code = GET_CODE (comparison);
19392 if (GET_MODE_CLASS (mode) != MODE_CC)
19393 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
19394 XEXP (comparison, 1));
19398 case CC_DNEmode: code = ARM_NE; goto dominance;
19399 case CC_DEQmode: code = ARM_EQ; goto dominance;
19400 case CC_DGEmode: code = ARM_GE; goto dominance;
19401 case CC_DGTmode: code = ARM_GT; goto dominance;
19402 case CC_DLEmode: code = ARM_LE; goto dominance;
19403 case CC_DLTmode: code = ARM_LT; goto dominance;
19404 case CC_DGEUmode: code = ARM_CS; goto dominance;
19405 case CC_DGTUmode: code = ARM_HI; goto dominance;
19406 case CC_DLEUmode: code = ARM_LS; goto dominance;
19407 case CC_DLTUmode: code = ARM_CC;
19410 if (comp_code == EQ)
19411 return ARM_INVERSE_CONDITION_CODE (code);
19412 if (comp_code == NE)
19419 case NE: return ARM_NE;
19420 case EQ: return ARM_EQ;
19421 case GE: return ARM_PL;
19422 case LT: return ARM_MI;
19423 default: return ARM_NV;
19429 case NE: return ARM_NE;
19430 case EQ: return ARM_EQ;
19431 default: return ARM_NV;
19437 case NE: return ARM_MI;
19438 case EQ: return ARM_PL;
19439 default: return ARM_NV;
19444 /* We can handle all cases except UNEQ and LTGT. */
19447 case GE: return ARM_GE;
19448 case GT: return ARM_GT;
19449 case LE: return ARM_LS;
19450 case LT: return ARM_MI;
19451 case NE: return ARM_NE;
19452 case EQ: return ARM_EQ;
19453 case ORDERED: return ARM_VC;
19454 case UNORDERED: return ARM_VS;
19455 case UNLT: return ARM_LT;
19456 case UNLE: return ARM_LE;
19457 case UNGT: return ARM_HI;
19458 case UNGE: return ARM_PL;
19459 /* UNEQ and LTGT do not have a representation. */
19460 case UNEQ: /* Fall through. */
19461 case LTGT: /* Fall through. */
19462 default: return ARM_NV;
19468 case NE: return ARM_NE;
19469 case EQ: return ARM_EQ;
19470 case GE: return ARM_LE;
19471 case GT: return ARM_LT;
19472 case LE: return ARM_GE;
19473 case LT: return ARM_GT;
19474 case GEU: return ARM_LS;
19475 case GTU: return ARM_CC;
19476 case LEU: return ARM_CS;
19477 case LTU: return ARM_HI;
19478 default: return ARM_NV;
19484 case LTU: return ARM_CS;
19485 case GEU: return ARM_CC;
19486 default: return ARM_NV;
19492 case NE: return ARM_NE;
19493 case EQ: return ARM_EQ;
19494 case GEU: return ARM_CS;
19495 case GTU: return ARM_HI;
19496 case LEU: return ARM_LS;
19497 case LTU: return ARM_CC;
19498 default: return ARM_NV;
19504 case GE: return ARM_GE;
19505 case LT: return ARM_LT;
19506 case GEU: return ARM_CS;
19507 case LTU: return ARM_CC;
19508 default: return ARM_NV;
19514 case NE: return ARM_NE;
19515 case EQ: return ARM_EQ;
19516 case GE: return ARM_GE;
19517 case GT: return ARM_GT;
19518 case LE: return ARM_LE;
19519 case LT: return ARM_LT;
19520 case GEU: return ARM_CS;
19521 case GTU: return ARM_HI;
19522 case LEU: return ARM_LS;
19523 case LTU: return ARM_CC;
19524 default: return ARM_NV;
19527 default: gcc_unreachable ();
19531 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
19532 static enum arm_cond_code
19533 get_arm_condition_code (rtx comparison)
19535 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
19536 gcc_assert (code != ARM_NV);
19540 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
19543 thumb2_final_prescan_insn (rtx insn)
19545 rtx first_insn = insn;
19546 rtx body = PATTERN (insn);
19548 enum arm_cond_code code;
19552 /* Remove the previous insn from the count of insns to be output. */
19553 if (arm_condexec_count)
19554 arm_condexec_count--;
19556 /* Nothing to do if we are already inside a conditional block. */
19557 if (arm_condexec_count)
19560 if (GET_CODE (body) != COND_EXEC)
19563 /* Conditional jumps are implemented directly. */
19567 predicate = COND_EXEC_TEST (body);
19568 arm_current_cc = get_arm_condition_code (predicate);
19570 n = get_attr_ce_count (insn);
19571 arm_condexec_count = 1;
19572 arm_condexec_mask = (1 << n) - 1;
19573 arm_condexec_masklen = n;
19574 /* See if subsequent instructions can be combined into the same block. */
19577 insn = next_nonnote_insn (insn);
19579 /* Jumping into the middle of an IT block is illegal, so a label or
19580 barrier terminates the block. */
19581 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
19584 body = PATTERN (insn);
19585 /* USE and CLOBBER aren't really insns, so just skip them. */
19586 if (GET_CODE (body) == USE
19587 || GET_CODE (body) == CLOBBER)
19590 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
19591 if (GET_CODE (body) != COND_EXEC)
19593 /* Allow up to 4 conditionally executed instructions in a block. */
19594 n = get_attr_ce_count (insn);
19595 if (arm_condexec_masklen + n > 4)
19598 predicate = COND_EXEC_TEST (body);
19599 code = get_arm_condition_code (predicate);
19600 mask = (1 << n) - 1;
19601 if (arm_current_cc == code)
19602 arm_condexec_mask |= (mask << arm_condexec_masklen);
19603 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
19606 arm_condexec_count++;
19607 arm_condexec_masklen += n;
19609 /* A jump must be the last instruction in a conditional block. */
19613 /* Restore recog_data (getting the attributes of other insns can
19614 destroy this array, but final.c assumes that it remains intact
19615 across this call). */
19616 extract_constrain_insn_cached (first_insn);
19620 arm_final_prescan_insn (rtx insn)
19622 /* BODY will hold the body of INSN. */
19623 rtx body = PATTERN (insn);
19625 /* This will be 1 if trying to repeat the trick, and things need to be
19626 reversed if it appears to fail. */
19629 /* If we start with a return insn, we only succeed if we find another one. */
19630 int seeking_return = 0;
19631 enum rtx_code return_code = UNKNOWN;
19633 /* START_INSN will hold the insn from where we start looking. This is the
19634 first insn after the following code_label if REVERSE is true. */
19635 rtx start_insn = insn;
19637 /* If in state 4, check if the target branch is reached, in order to
19638 change back to state 0. */
19639 if (arm_ccfsm_state == 4)
19641 if (insn == arm_target_insn)
19643 arm_target_insn = NULL;
19644 arm_ccfsm_state = 0;
19649 /* If in state 3, it is possible to repeat the trick, if this insn is an
19650 unconditional branch to a label, and immediately following this branch
19651 is the previous target label which is only used once, and the label this
19652 branch jumps to is not too far off. */
19653 if (arm_ccfsm_state == 3)
19655 if (simplejump_p (insn))
19657 start_insn = next_nonnote_insn (start_insn);
19658 if (BARRIER_P (start_insn))
19660 /* XXX Isn't this always a barrier? */
19661 start_insn = next_nonnote_insn (start_insn);
19663 if (LABEL_P (start_insn)
19664 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19665 && LABEL_NUSES (start_insn) == 1)
19670 else if (ANY_RETURN_P (body))
19672 start_insn = next_nonnote_insn (start_insn);
19673 if (BARRIER_P (start_insn))
19674 start_insn = next_nonnote_insn (start_insn);
19675 if (LABEL_P (start_insn)
19676 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19677 && LABEL_NUSES (start_insn) == 1)
19680 seeking_return = 1;
19681 return_code = GET_CODE (body);
19690 gcc_assert (!arm_ccfsm_state || reverse);
19691 if (!JUMP_P (insn))
19694 /* This jump might be paralleled with a clobber of the condition codes
19695 the jump should always come first */
19696 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
19697 body = XVECEXP (body, 0, 0);
19700 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
19701 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
19704 int fail = FALSE, succeed = FALSE;
19705 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
19706 int then_not_else = TRUE;
19707 rtx this_insn = start_insn, label = 0;
19709 /* Register the insn jumped to. */
19712 if (!seeking_return)
19713 label = XEXP (SET_SRC (body), 0);
19715 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
19716 label = XEXP (XEXP (SET_SRC (body), 1), 0);
19717 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
19719 label = XEXP (XEXP (SET_SRC (body), 2), 0);
19720 then_not_else = FALSE;
19722 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
19724 seeking_return = 1;
19725 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
19727 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
19729 seeking_return = 1;
19730 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
19731 then_not_else = FALSE;
19734 gcc_unreachable ();
19736 /* See how many insns this branch skips, and what kind of insns. If all
19737 insns are okay, and the label or unconditional branch to the same
19738 label is not too far away, succeed. */
19739 for (insns_skipped = 0;
19740 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
19744 this_insn = next_nonnote_insn (this_insn);
19748 switch (GET_CODE (this_insn))
19751 /* Succeed if it is the target label, otherwise fail since
19752 control falls in from somewhere else. */
19753 if (this_insn == label)
19755 arm_ccfsm_state = 1;
19763 /* Succeed if the following insn is the target label.
19765 If return insns are used then the last insn in a function
19766 will be a barrier. */
19767 this_insn = next_nonnote_insn (this_insn);
19768 if (this_insn && this_insn == label)
19770 arm_ccfsm_state = 1;
19778 /* The AAPCS says that conditional calls should not be
19779 used since they make interworking inefficient (the
19780 linker can't transform BL<cond> into BLX). That's
19781 only a problem if the machine has BLX. */
19788 /* Succeed if the following insn is the target label, or
19789 if the following two insns are a barrier and the
19791 this_insn = next_nonnote_insn (this_insn);
19792 if (this_insn && BARRIER_P (this_insn))
19793 this_insn = next_nonnote_insn (this_insn);
19795 if (this_insn && this_insn == label
19796 && insns_skipped < max_insns_skipped)
19798 arm_ccfsm_state = 1;
19806 /* If this is an unconditional branch to the same label, succeed.
19807 If it is to another label, do nothing. If it is conditional,
19809 /* XXX Probably, the tests for SET and the PC are
19812 scanbody = PATTERN (this_insn);
19813 if (GET_CODE (scanbody) == SET
19814 && GET_CODE (SET_DEST (scanbody)) == PC)
19816 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19817 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19819 arm_ccfsm_state = 2;
19822 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19825 /* Fail if a conditional return is undesirable (e.g. on a
19826 StrongARM), but still allow this if optimizing for size. */
19827 else if (GET_CODE (scanbody) == return_code
19828 && !use_return_insn (TRUE, NULL)
19831 else if (GET_CODE (scanbody) == return_code)
19833 arm_ccfsm_state = 2;
19836 else if (GET_CODE (scanbody) == PARALLEL)
19838 switch (get_attr_conds (this_insn))
19848 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
19853 /* Instructions using or affecting the condition codes make it
19855 scanbody = PATTERN (this_insn);
19856 if (!(GET_CODE (scanbody) == SET
19857 || GET_CODE (scanbody) == PARALLEL)
19858 || get_attr_conds (this_insn) != CONDS_NOCOND)
19868 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19869 arm_target_label = CODE_LABEL_NUMBER (label);
19872 gcc_assert (seeking_return || arm_ccfsm_state == 2);
19874 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19876 this_insn = next_nonnote_insn (this_insn);
19877 gcc_assert (!this_insn
19878 || (!BARRIER_P (this_insn)
19879 && !LABEL_P (this_insn)));
19883 /* Oh, dear! we ran off the end.. give up. */
19884 extract_constrain_insn_cached (insn);
19885 arm_ccfsm_state = 0;
19886 arm_target_insn = NULL;
19889 arm_target_insn = this_insn;
19892 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
19895 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
19897 if (reverse || then_not_else)
19898 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
19901 /* Restore recog_data (getting the attributes of other insns can
19902 destroy this array, but final.c assumes that it remains intact
19903 across this call. */
19904 extract_constrain_insn_cached (insn);
19908 /* Output IT instructions. */
19910 thumb2_asm_output_opcode (FILE * stream)
19915 if (arm_condexec_mask)
19917 for (n = 0; n < arm_condexec_masklen; n++)
19918 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
19920 asm_fprintf(stream, "i%s\t%s\n\t", buff,
19921 arm_condition_codes[arm_current_cc]);
19922 arm_condexec_mask = 0;
19926 /* Returns true if REGNO is a valid register
19927 for holding a quantity of type MODE. */
19929 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
19931 if (GET_MODE_CLASS (mode) == MODE_CC)
19932 return (regno == CC_REGNUM
19933 || (TARGET_HARD_FLOAT && TARGET_VFP
19934 && regno == VFPCC_REGNUM));
19937 /* For the Thumb we only allow values bigger than SImode in
19938 registers 0 - 6, so that there is always a second low
19939 register available to hold the upper part of the value.
19940 We probably we ought to ensure that the register is the
19941 start of an even numbered register pair. */
19942 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
19944 if (TARGET_HARD_FLOAT && TARGET_VFP
19945 && IS_VFP_REGNUM (regno))
19947 if (mode == SFmode || mode == SImode)
19948 return VFP_REGNO_OK_FOR_SINGLE (regno);
19950 if (mode == DFmode)
19951 return VFP_REGNO_OK_FOR_DOUBLE (regno);
19953 /* VFP registers can hold HFmode values, but there is no point in
19954 putting them there unless we have hardware conversion insns. */
19955 if (mode == HFmode)
19956 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
19959 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
19960 || (VALID_NEON_QREG_MODE (mode)
19961 && NEON_REGNO_OK_FOR_QUAD (regno))
19962 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
19963 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
19964 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
19965 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
19966 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
19971 if (TARGET_REALLY_IWMMXT)
19973 if (IS_IWMMXT_GR_REGNUM (regno))
19974 return mode == SImode;
19976 if (IS_IWMMXT_REGNUM (regno))
19977 return VALID_IWMMXT_REG_MODE (mode);
19980 /* We allow almost any value to be stored in the general registers.
19981 Restrict doubleword quantities to even register pairs so that we can
19982 use ldrd. Do not allow very large Neon structure opaque modes in
19983 general registers; they would use too many. */
19984 if (regno <= LAST_ARM_REGNUM)
19985 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
19986 && ARM_NUM_REGS (mode) <= 4;
19988 if (regno == FRAME_POINTER_REGNUM
19989 || regno == ARG_POINTER_REGNUM)
19990 /* We only allow integers in the fake hard registers. */
19991 return GET_MODE_CLASS (mode) == MODE_INT;
19996 /* Implement MODES_TIEABLE_P. */
19999 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20001 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
20004 /* We specifically want to allow elements of "structure" modes to
20005 be tieable to the structure. This more general condition allows
20006 other rarer situations too. */
20008 && (VALID_NEON_DREG_MODE (mode1)
20009 || VALID_NEON_QREG_MODE (mode1)
20010 || VALID_NEON_STRUCT_MODE (mode1))
20011 && (VALID_NEON_DREG_MODE (mode2)
20012 || VALID_NEON_QREG_MODE (mode2)
20013 || VALID_NEON_STRUCT_MODE (mode2)))
20019 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
20020 not used in arm mode. */
20023 arm_regno_class (int regno)
20027 if (regno == STACK_POINTER_REGNUM)
20029 if (regno == CC_REGNUM)
20036 if (TARGET_THUMB2 && regno < 8)
20039 if ( regno <= LAST_ARM_REGNUM
20040 || regno == FRAME_POINTER_REGNUM
20041 || regno == ARG_POINTER_REGNUM)
20042 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
20044 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
20045 return TARGET_THUMB2 ? CC_REG : NO_REGS;
20047 if (IS_VFP_REGNUM (regno))
20049 if (regno <= D7_VFP_REGNUM)
20050 return VFP_D0_D7_REGS;
20051 else if (regno <= LAST_LO_VFP_REGNUM)
20052 return VFP_LO_REGS;
20054 return VFP_HI_REGS;
20057 if (IS_IWMMXT_REGNUM (regno))
20058 return IWMMXT_REGS;
20060 if (IS_IWMMXT_GR_REGNUM (regno))
20061 return IWMMXT_GR_REGS;
20066 /* Handle a special case when computing the offset
20067 of an argument from the frame pointer. */
20069 arm_debugger_arg_offset (int value, rtx addr)
20073 /* We are only interested if dbxout_parms() failed to compute the offset. */
20077 /* We can only cope with the case where the address is held in a register. */
20081 /* If we are using the frame pointer to point at the argument, then
20082 an offset of 0 is correct. */
20083 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
20086 /* If we are using the stack pointer to point at the
20087 argument, then an offset of 0 is correct. */
20088 /* ??? Check this is consistent with thumb2 frame layout. */
20089 if ((TARGET_THUMB || !frame_pointer_needed)
20090 && REGNO (addr) == SP_REGNUM)
20093 /* Oh dear. The argument is pointed to by a register rather
20094 than being held in a register, or being stored at a known
20095 offset from the frame pointer. Since GDB only understands
20096 those two kinds of argument we must translate the address
20097 held in the register into an offset from the frame pointer.
20098 We do this by searching through the insns for the function
20099 looking to see where this register gets its value. If the
20100 register is initialized from the frame pointer plus an offset
20101 then we are in luck and we can continue, otherwise we give up.
20103 This code is exercised by producing debugging information
20104 for a function with arguments like this:
20106 double func (double a, double b, int c, double d) {return d;}
20108 Without this code the stab for parameter 'd' will be set to
20109 an offset of 0 from the frame pointer, rather than 8. */
20111 /* The if() statement says:
20113 If the insn is a normal instruction
20114 and if the insn is setting the value in a register
20115 and if the register being set is the register holding the address of the argument
20116 and if the address is computing by an addition
20117 that involves adding to a register
20118 which is the frame pointer
20123 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20125 if ( NONJUMP_INSN_P (insn)
20126 && GET_CODE (PATTERN (insn)) == SET
20127 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
20128 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
20129 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
20130 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
20131 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
20134 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
20143 warning (0, "unable to compute real location of stacked parameter");
20144 value = 8; /* XXX magic hack */
20165 T_MAX /* Size of enum. Keep last. */
20166 } neon_builtin_type_mode;
20168 #define TYPE_MODE_BIT(X) (1 << (X))
20170 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
20171 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
20172 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
20173 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
20174 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
20175 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
20177 #define v8qi_UP T_V8QI
20178 #define v4hi_UP T_V4HI
20179 #define v4hf_UP T_V4HF
20180 #define v2si_UP T_V2SI
20181 #define v2sf_UP T_V2SF
20183 #define v16qi_UP T_V16QI
20184 #define v8hi_UP T_V8HI
20185 #define v4si_UP T_V4SI
20186 #define v4sf_UP T_V4SF
20187 #define v2di_UP T_V2DI
20192 #define UP(X) X##_UP
20228 NEON_LOADSTRUCTLANE,
20230 NEON_STORESTRUCTLANE,
20239 const neon_itype itype;
20240 const neon_builtin_type_mode mode;
20241 const enum insn_code code;
20242 unsigned int fcode;
20243 } neon_builtin_datum;
20245 #define CF(N,X) CODE_FOR_neon_##N##X
20247 #define VAR1(T, N, A) \
20248 {#N, NEON_##T, UP (A), CF (N, A), 0}
20249 #define VAR2(T, N, A, B) \
20251 {#N, NEON_##T, UP (B), CF (N, B), 0}
20252 #define VAR3(T, N, A, B, C) \
20253 VAR2 (T, N, A, B), \
20254 {#N, NEON_##T, UP (C), CF (N, C), 0}
20255 #define VAR4(T, N, A, B, C, D) \
20256 VAR3 (T, N, A, B, C), \
20257 {#N, NEON_##T, UP (D), CF (N, D), 0}
20258 #define VAR5(T, N, A, B, C, D, E) \
20259 VAR4 (T, N, A, B, C, D), \
20260 {#N, NEON_##T, UP (E), CF (N, E), 0}
20261 #define VAR6(T, N, A, B, C, D, E, F) \
20262 VAR5 (T, N, A, B, C, D, E), \
20263 {#N, NEON_##T, UP (F), CF (N, F), 0}
20264 #define VAR7(T, N, A, B, C, D, E, F, G) \
20265 VAR6 (T, N, A, B, C, D, E, F), \
20266 {#N, NEON_##T, UP (G), CF (N, G), 0}
20267 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20268 VAR7 (T, N, A, B, C, D, E, F, G), \
20269 {#N, NEON_##T, UP (H), CF (N, H), 0}
20270 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20271 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20272 {#N, NEON_##T, UP (I), CF (N, I), 0}
20273 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20274 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20275 {#N, NEON_##T, UP (J), CF (N, J), 0}
20277 /* The NEON builtin data can be found in arm_neon_builtins.def.
20278 The mode entries in the following table correspond to the "key" type of the
20279 instruction variant, i.e. equivalent to that which would be specified after
20280 the assembler mnemonic, which usually refers to the last vector operand.
20281 (Signed/unsigned/polynomial types are not differentiated between though, and
20282 are all mapped onto the same mode for a given element size.) The modes
20283 listed per instruction should be the same as those defined for that
20284 instruction's pattern in neon.md. */
20286 static neon_builtin_datum neon_builtin_data[] =
20288 #include "arm_neon_builtins.def"
20303 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
20304 #define VAR1(T, N, A) \
20306 #define VAR2(T, N, A, B) \
20309 #define VAR3(T, N, A, B, C) \
20310 VAR2 (T, N, A, B), \
20312 #define VAR4(T, N, A, B, C, D) \
20313 VAR3 (T, N, A, B, C), \
20315 #define VAR5(T, N, A, B, C, D, E) \
20316 VAR4 (T, N, A, B, C, D), \
20318 #define VAR6(T, N, A, B, C, D, E, F) \
20319 VAR5 (T, N, A, B, C, D, E), \
20321 #define VAR7(T, N, A, B, C, D, E, F, G) \
20322 VAR6 (T, N, A, B, C, D, E, F), \
20324 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20325 VAR7 (T, N, A, B, C, D, E, F, G), \
20327 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20328 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20330 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20331 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20335 ARM_BUILTIN_GETWCGR0,
20336 ARM_BUILTIN_GETWCGR1,
20337 ARM_BUILTIN_GETWCGR2,
20338 ARM_BUILTIN_GETWCGR3,
20340 ARM_BUILTIN_SETWCGR0,
20341 ARM_BUILTIN_SETWCGR1,
20342 ARM_BUILTIN_SETWCGR2,
20343 ARM_BUILTIN_SETWCGR3,
20347 ARM_BUILTIN_WAVG2BR,
20348 ARM_BUILTIN_WAVG2HR,
20349 ARM_BUILTIN_WAVG2B,
20350 ARM_BUILTIN_WAVG2H,
20357 ARM_BUILTIN_WMACSZ,
20359 ARM_BUILTIN_WMACUZ,
20362 ARM_BUILTIN_WSADBZ,
20364 ARM_BUILTIN_WSADHZ,
20366 ARM_BUILTIN_WALIGNI,
20367 ARM_BUILTIN_WALIGNR0,
20368 ARM_BUILTIN_WALIGNR1,
20369 ARM_BUILTIN_WALIGNR2,
20370 ARM_BUILTIN_WALIGNR3,
20373 ARM_BUILTIN_TMIAPH,
20374 ARM_BUILTIN_TMIABB,
20375 ARM_BUILTIN_TMIABT,
20376 ARM_BUILTIN_TMIATB,
20377 ARM_BUILTIN_TMIATT,
20379 ARM_BUILTIN_TMOVMSKB,
20380 ARM_BUILTIN_TMOVMSKH,
20381 ARM_BUILTIN_TMOVMSKW,
20383 ARM_BUILTIN_TBCSTB,
20384 ARM_BUILTIN_TBCSTH,
20385 ARM_BUILTIN_TBCSTW,
20387 ARM_BUILTIN_WMADDS,
20388 ARM_BUILTIN_WMADDU,
20390 ARM_BUILTIN_WPACKHSS,
20391 ARM_BUILTIN_WPACKWSS,
20392 ARM_BUILTIN_WPACKDSS,
20393 ARM_BUILTIN_WPACKHUS,
20394 ARM_BUILTIN_WPACKWUS,
20395 ARM_BUILTIN_WPACKDUS,
20400 ARM_BUILTIN_WADDSSB,
20401 ARM_BUILTIN_WADDSSH,
20402 ARM_BUILTIN_WADDSSW,
20403 ARM_BUILTIN_WADDUSB,
20404 ARM_BUILTIN_WADDUSH,
20405 ARM_BUILTIN_WADDUSW,
20409 ARM_BUILTIN_WSUBSSB,
20410 ARM_BUILTIN_WSUBSSH,
20411 ARM_BUILTIN_WSUBSSW,
20412 ARM_BUILTIN_WSUBUSB,
20413 ARM_BUILTIN_WSUBUSH,
20414 ARM_BUILTIN_WSUBUSW,
20421 ARM_BUILTIN_WCMPEQB,
20422 ARM_BUILTIN_WCMPEQH,
20423 ARM_BUILTIN_WCMPEQW,
20424 ARM_BUILTIN_WCMPGTUB,
20425 ARM_BUILTIN_WCMPGTUH,
20426 ARM_BUILTIN_WCMPGTUW,
20427 ARM_BUILTIN_WCMPGTSB,
20428 ARM_BUILTIN_WCMPGTSH,
20429 ARM_BUILTIN_WCMPGTSW,
20431 ARM_BUILTIN_TEXTRMSB,
20432 ARM_BUILTIN_TEXTRMSH,
20433 ARM_BUILTIN_TEXTRMSW,
20434 ARM_BUILTIN_TEXTRMUB,
20435 ARM_BUILTIN_TEXTRMUH,
20436 ARM_BUILTIN_TEXTRMUW,
20437 ARM_BUILTIN_TINSRB,
20438 ARM_BUILTIN_TINSRH,
20439 ARM_BUILTIN_TINSRW,
20441 ARM_BUILTIN_WMAXSW,
20442 ARM_BUILTIN_WMAXSH,
20443 ARM_BUILTIN_WMAXSB,
20444 ARM_BUILTIN_WMAXUW,
20445 ARM_BUILTIN_WMAXUH,
20446 ARM_BUILTIN_WMAXUB,
20447 ARM_BUILTIN_WMINSW,
20448 ARM_BUILTIN_WMINSH,
20449 ARM_BUILTIN_WMINSB,
20450 ARM_BUILTIN_WMINUW,
20451 ARM_BUILTIN_WMINUH,
20452 ARM_BUILTIN_WMINUB,
20454 ARM_BUILTIN_WMULUM,
20455 ARM_BUILTIN_WMULSM,
20456 ARM_BUILTIN_WMULUL,
20458 ARM_BUILTIN_PSADBH,
20459 ARM_BUILTIN_WSHUFH,
20473 ARM_BUILTIN_WSLLHI,
20474 ARM_BUILTIN_WSLLWI,
20475 ARM_BUILTIN_WSLLDI,
20476 ARM_BUILTIN_WSRAHI,
20477 ARM_BUILTIN_WSRAWI,
20478 ARM_BUILTIN_WSRADI,
20479 ARM_BUILTIN_WSRLHI,
20480 ARM_BUILTIN_WSRLWI,
20481 ARM_BUILTIN_WSRLDI,
20482 ARM_BUILTIN_WRORHI,
20483 ARM_BUILTIN_WRORWI,
20484 ARM_BUILTIN_WRORDI,
20486 ARM_BUILTIN_WUNPCKIHB,
20487 ARM_BUILTIN_WUNPCKIHH,
20488 ARM_BUILTIN_WUNPCKIHW,
20489 ARM_BUILTIN_WUNPCKILB,
20490 ARM_BUILTIN_WUNPCKILH,
20491 ARM_BUILTIN_WUNPCKILW,
20493 ARM_BUILTIN_WUNPCKEHSB,
20494 ARM_BUILTIN_WUNPCKEHSH,
20495 ARM_BUILTIN_WUNPCKEHSW,
20496 ARM_BUILTIN_WUNPCKEHUB,
20497 ARM_BUILTIN_WUNPCKEHUH,
20498 ARM_BUILTIN_WUNPCKEHUW,
20499 ARM_BUILTIN_WUNPCKELSB,
20500 ARM_BUILTIN_WUNPCKELSH,
20501 ARM_BUILTIN_WUNPCKELSW,
20502 ARM_BUILTIN_WUNPCKELUB,
20503 ARM_BUILTIN_WUNPCKELUH,
20504 ARM_BUILTIN_WUNPCKELUW,
20510 ARM_BUILTIN_WADDSUBHX,
20511 ARM_BUILTIN_WSUBADDHX,
20513 ARM_BUILTIN_WABSDIFFB,
20514 ARM_BUILTIN_WABSDIFFH,
20515 ARM_BUILTIN_WABSDIFFW,
20517 ARM_BUILTIN_WADDCH,
20518 ARM_BUILTIN_WADDCW,
20521 ARM_BUILTIN_WAVG4R,
20523 ARM_BUILTIN_WMADDSX,
20524 ARM_BUILTIN_WMADDUX,
20526 ARM_BUILTIN_WMADDSN,
20527 ARM_BUILTIN_WMADDUN,
20529 ARM_BUILTIN_WMULWSM,
20530 ARM_BUILTIN_WMULWUM,
20532 ARM_BUILTIN_WMULWSMR,
20533 ARM_BUILTIN_WMULWUMR,
20535 ARM_BUILTIN_WMULWL,
20537 ARM_BUILTIN_WMULSMR,
20538 ARM_BUILTIN_WMULUMR,
20540 ARM_BUILTIN_WQMULM,
20541 ARM_BUILTIN_WQMULMR,
20543 ARM_BUILTIN_WQMULWM,
20544 ARM_BUILTIN_WQMULWMR,
20546 ARM_BUILTIN_WADDBHUSM,
20547 ARM_BUILTIN_WADDBHUSL,
20549 ARM_BUILTIN_WQMIABB,
20550 ARM_BUILTIN_WQMIABT,
20551 ARM_BUILTIN_WQMIATB,
20552 ARM_BUILTIN_WQMIATT,
20554 ARM_BUILTIN_WQMIABBN,
20555 ARM_BUILTIN_WQMIABTN,
20556 ARM_BUILTIN_WQMIATBN,
20557 ARM_BUILTIN_WQMIATTN,
20559 ARM_BUILTIN_WMIABB,
20560 ARM_BUILTIN_WMIABT,
20561 ARM_BUILTIN_WMIATB,
20562 ARM_BUILTIN_WMIATT,
20564 ARM_BUILTIN_WMIABBN,
20565 ARM_BUILTIN_WMIABTN,
20566 ARM_BUILTIN_WMIATBN,
20567 ARM_BUILTIN_WMIATTN,
20569 ARM_BUILTIN_WMIAWBB,
20570 ARM_BUILTIN_WMIAWBT,
20571 ARM_BUILTIN_WMIAWTB,
20572 ARM_BUILTIN_WMIAWTT,
20574 ARM_BUILTIN_WMIAWBBN,
20575 ARM_BUILTIN_WMIAWBTN,
20576 ARM_BUILTIN_WMIAWTBN,
20577 ARM_BUILTIN_WMIAWTTN,
20579 ARM_BUILTIN_WMERGE,
20581 #include "arm_neon_builtins.def"
20586 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
20600 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
20603 arm_init_neon_builtins (void)
20605 unsigned int i, fcode;
20608 tree neon_intQI_type_node;
20609 tree neon_intHI_type_node;
20610 tree neon_floatHF_type_node;
20611 tree neon_polyQI_type_node;
20612 tree neon_polyHI_type_node;
20613 tree neon_intSI_type_node;
20614 tree neon_intDI_type_node;
20615 tree neon_float_type_node;
20617 tree intQI_pointer_node;
20618 tree intHI_pointer_node;
20619 tree intSI_pointer_node;
20620 tree intDI_pointer_node;
20621 tree float_pointer_node;
20623 tree const_intQI_node;
20624 tree const_intHI_node;
20625 tree const_intSI_node;
20626 tree const_intDI_node;
20627 tree const_float_node;
20629 tree const_intQI_pointer_node;
20630 tree const_intHI_pointer_node;
20631 tree const_intSI_pointer_node;
20632 tree const_intDI_pointer_node;
20633 tree const_float_pointer_node;
20635 tree V8QI_type_node;
20636 tree V4HI_type_node;
20637 tree V4HF_type_node;
20638 tree V2SI_type_node;
20639 tree V2SF_type_node;
20640 tree V16QI_type_node;
20641 tree V8HI_type_node;
20642 tree V4SI_type_node;
20643 tree V4SF_type_node;
20644 tree V2DI_type_node;
20646 tree intUQI_type_node;
20647 tree intUHI_type_node;
20648 tree intUSI_type_node;
20649 tree intUDI_type_node;
20651 tree intEI_type_node;
20652 tree intOI_type_node;
20653 tree intCI_type_node;
20654 tree intXI_type_node;
20656 tree V8QI_pointer_node;
20657 tree V4HI_pointer_node;
20658 tree V2SI_pointer_node;
20659 tree V2SF_pointer_node;
20660 tree V16QI_pointer_node;
20661 tree V8HI_pointer_node;
20662 tree V4SI_pointer_node;
20663 tree V4SF_pointer_node;
20664 tree V2DI_pointer_node;
20666 tree void_ftype_pv8qi_v8qi_v8qi;
20667 tree void_ftype_pv4hi_v4hi_v4hi;
20668 tree void_ftype_pv2si_v2si_v2si;
20669 tree void_ftype_pv2sf_v2sf_v2sf;
20670 tree void_ftype_pdi_di_di;
20671 tree void_ftype_pv16qi_v16qi_v16qi;
20672 tree void_ftype_pv8hi_v8hi_v8hi;
20673 tree void_ftype_pv4si_v4si_v4si;
20674 tree void_ftype_pv4sf_v4sf_v4sf;
20675 tree void_ftype_pv2di_v2di_v2di;
20677 tree reinterp_ftype_dreg[5][5];
20678 tree reinterp_ftype_qreg[5][5];
20679 tree dreg_types[5], qreg_types[5];
20681 /* Create distinguished type nodes for NEON vector element types,
20682 and pointers to values of such types, so we can detect them later. */
20683 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20684 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20685 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20686 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20687 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20688 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20689 neon_float_type_node = make_node (REAL_TYPE);
20690 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20691 layout_type (neon_float_type_node);
20692 neon_floatHF_type_node = make_node (REAL_TYPE);
20693 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
20694 layout_type (neon_floatHF_type_node);
20696 /* Define typedefs which exactly correspond to the modes we are basing vector
20697 types on. If you change these names you'll need to change
20698 the table used by arm_mangle_type too. */
20699 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20700 "__builtin_neon_qi");
20701 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20702 "__builtin_neon_hi");
20703 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
20704 "__builtin_neon_hf");
20705 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20706 "__builtin_neon_si");
20707 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20708 "__builtin_neon_sf");
20709 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20710 "__builtin_neon_di");
20711 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20712 "__builtin_neon_poly8");
20713 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20714 "__builtin_neon_poly16");
20716 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20717 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20718 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20719 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20720 float_pointer_node = build_pointer_type (neon_float_type_node);
20722 /* Next create constant-qualified versions of the above types. */
20723 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20725 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20727 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20729 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20731 const_float_node = build_qualified_type (neon_float_type_node,
20734 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20735 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20736 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20737 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20738 const_float_pointer_node = build_pointer_type (const_float_node);
20740 /* Now create vector types based on our NEON element types. */
20741 /* 64-bit vectors. */
20743 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20745 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20747 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
20749 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20751 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20752 /* 128-bit vectors. */
20754 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20756 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20758 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20760 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20762 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20764 /* Unsigned integer types for various mode sizes. */
20765 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20766 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20767 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20768 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20770 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20771 "__builtin_neon_uqi");
20772 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20773 "__builtin_neon_uhi");
20774 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20775 "__builtin_neon_usi");
20776 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20777 "__builtin_neon_udi");
20779 /* Opaque integer types for structures of vectors. */
20780 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20781 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20782 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20783 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20785 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20786 "__builtin_neon_ti");
20787 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20788 "__builtin_neon_ei");
20789 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20790 "__builtin_neon_oi");
20791 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20792 "__builtin_neon_ci");
20793 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20794 "__builtin_neon_xi");
20796 /* Pointers to vector types. */
20797 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20798 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20799 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20800 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20801 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20802 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20803 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20804 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20805 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20807 /* Operations which return results as pairs. */
20808 void_ftype_pv8qi_v8qi_v8qi =
20809 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20810 V8QI_type_node, NULL);
20811 void_ftype_pv4hi_v4hi_v4hi =
20812 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20813 V4HI_type_node, NULL);
20814 void_ftype_pv2si_v2si_v2si =
20815 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20816 V2SI_type_node, NULL);
20817 void_ftype_pv2sf_v2sf_v2sf =
20818 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20819 V2SF_type_node, NULL);
20820 void_ftype_pdi_di_di =
20821 build_function_type_list (void_type_node, intDI_pointer_node,
20822 neon_intDI_type_node, neon_intDI_type_node, NULL);
20823 void_ftype_pv16qi_v16qi_v16qi =
20824 build_function_type_list (void_type_node, V16QI_pointer_node,
20825 V16QI_type_node, V16QI_type_node, NULL);
20826 void_ftype_pv8hi_v8hi_v8hi =
20827 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20828 V8HI_type_node, NULL);
20829 void_ftype_pv4si_v4si_v4si =
20830 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20831 V4SI_type_node, NULL);
20832 void_ftype_pv4sf_v4sf_v4sf =
20833 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20834 V4SF_type_node, NULL);
20835 void_ftype_pv2di_v2di_v2di =
20836 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20837 V2DI_type_node, NULL);
20839 dreg_types[0] = V8QI_type_node;
20840 dreg_types[1] = V4HI_type_node;
20841 dreg_types[2] = V2SI_type_node;
20842 dreg_types[3] = V2SF_type_node;
20843 dreg_types[4] = neon_intDI_type_node;
20845 qreg_types[0] = V16QI_type_node;
20846 qreg_types[1] = V8HI_type_node;
20847 qreg_types[2] = V4SI_type_node;
20848 qreg_types[3] = V4SF_type_node;
20849 qreg_types[4] = V2DI_type_node;
20851 for (i = 0; i < 5; i++)
20854 for (j = 0; j < 5; j++)
20856 reinterp_ftype_dreg[i][j]
20857 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
20858 reinterp_ftype_qreg[i][j]
20859 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
20863 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
20864 i < ARRAY_SIZE (neon_builtin_data);
20867 neon_builtin_datum *d = &neon_builtin_data[i];
20869 const char* const modenames[] = {
20870 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
20871 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
20876 int is_load = 0, is_store = 0;
20878 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
20885 case NEON_LOAD1LANE:
20886 case NEON_LOADSTRUCT:
20887 case NEON_LOADSTRUCTLANE:
20889 /* Fall through. */
20891 case NEON_STORE1LANE:
20892 case NEON_STORESTRUCT:
20893 case NEON_STORESTRUCTLANE:
20896 /* Fall through. */
20900 case NEON_LOGICBINOP:
20901 case NEON_SHIFTINSERT:
20908 case NEON_SHIFTIMM:
20909 case NEON_SHIFTACC:
20915 case NEON_LANEMULL:
20916 case NEON_LANEMULH:
20918 case NEON_SCALARMUL:
20919 case NEON_SCALARMULL:
20920 case NEON_SCALARMULH:
20921 case NEON_SCALARMAC:
20927 tree return_type = void_type_node, args = void_list_node;
20929 /* Build a function type directly from the insn_data for
20930 this builtin. The build_function_type() function takes
20931 care of removing duplicates for us. */
20932 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
20936 if (is_load && k == 1)
20938 /* Neon load patterns always have the memory
20939 operand in the operand 1 position. */
20940 gcc_assert (insn_data[d->code].operand[k].predicate
20941 == neon_struct_operand);
20947 eltype = const_intQI_pointer_node;
20952 eltype = const_intHI_pointer_node;
20957 eltype = const_intSI_pointer_node;
20962 eltype = const_float_pointer_node;
20967 eltype = const_intDI_pointer_node;
20970 default: gcc_unreachable ();
20973 else if (is_store && k == 0)
20975 /* Similarly, Neon store patterns use operand 0 as
20976 the memory location to store to. */
20977 gcc_assert (insn_data[d->code].operand[k].predicate
20978 == neon_struct_operand);
20984 eltype = intQI_pointer_node;
20989 eltype = intHI_pointer_node;
20994 eltype = intSI_pointer_node;
20999 eltype = float_pointer_node;
21004 eltype = intDI_pointer_node;
21007 default: gcc_unreachable ();
21012 switch (insn_data[d->code].operand[k].mode)
21014 case VOIDmode: eltype = void_type_node; break;
21016 case QImode: eltype = neon_intQI_type_node; break;
21017 case HImode: eltype = neon_intHI_type_node; break;
21018 case SImode: eltype = neon_intSI_type_node; break;
21019 case SFmode: eltype = neon_float_type_node; break;
21020 case DImode: eltype = neon_intDI_type_node; break;
21021 case TImode: eltype = intTI_type_node; break;
21022 case EImode: eltype = intEI_type_node; break;
21023 case OImode: eltype = intOI_type_node; break;
21024 case CImode: eltype = intCI_type_node; break;
21025 case XImode: eltype = intXI_type_node; break;
21026 /* 64-bit vectors. */
21027 case V8QImode: eltype = V8QI_type_node; break;
21028 case V4HImode: eltype = V4HI_type_node; break;
21029 case V2SImode: eltype = V2SI_type_node; break;
21030 case V2SFmode: eltype = V2SF_type_node; break;
21031 /* 128-bit vectors. */
21032 case V16QImode: eltype = V16QI_type_node; break;
21033 case V8HImode: eltype = V8HI_type_node; break;
21034 case V4SImode: eltype = V4SI_type_node; break;
21035 case V4SFmode: eltype = V4SF_type_node; break;
21036 case V2DImode: eltype = V2DI_type_node; break;
21037 default: gcc_unreachable ();
21041 if (k == 0 && !is_store)
21042 return_type = eltype;
21044 args = tree_cons (NULL_TREE, eltype, args);
21047 ftype = build_function_type (return_type, args);
21051 case NEON_RESULTPAIR:
21053 switch (insn_data[d->code].operand[1].mode)
21055 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
21056 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
21057 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
21058 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
21059 case DImode: ftype = void_ftype_pdi_di_di; break;
21060 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
21061 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
21062 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
21063 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
21064 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
21065 default: gcc_unreachable ();
21070 case NEON_REINTERP:
21072 /* We iterate over 5 doubleword types, then 5 quadword
21073 types. V4HF is not a type used in reinterpret, so we translate
21074 d->mode to the correct index in reinterp_ftype_dreg. */
21075 int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
21076 switch (insn_data[d->code].operand[0].mode)
21078 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
21079 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
21080 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
21081 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
21082 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
21083 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
21084 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
21085 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
21086 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
21087 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
21088 default: gcc_unreachable ();
21092 case NEON_FLOAT_WIDEN:
21094 tree eltype = NULL_TREE;
21095 tree return_type = NULL_TREE;
21097 switch (insn_data[d->code].operand[1].mode)
21100 eltype = V4HF_type_node;
21101 return_type = V4SF_type_node;
21103 default: gcc_unreachable ();
21105 ftype = build_function_type_list (return_type, eltype, NULL);
21108 case NEON_FLOAT_NARROW:
21110 tree eltype = NULL_TREE;
21111 tree return_type = NULL_TREE;
21113 switch (insn_data[d->code].operand[1].mode)
21116 eltype = V4SF_type_node;
21117 return_type = V4HF_type_node;
21119 default: gcc_unreachable ();
21121 ftype = build_function_type_list (return_type, eltype, NULL);
21125 gcc_unreachable ();
21128 gcc_assert (ftype != NULL);
21130 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
21132 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
21134 arm_builtin_decls[fcode] = decl;
21138 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
21141 if ((MASK) & insn_flags) \
21144 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
21145 BUILT_IN_MD, NULL, NULL_TREE); \
21146 arm_builtin_decls[CODE] = bdecl; \
21151 struct builtin_description
21153 const unsigned int mask;
21154 const enum insn_code icode;
21155 const char * const name;
21156 const enum arm_builtins code;
21157 const enum rtx_code comparison;
21158 const unsigned int flag;
21161 static const struct builtin_description bdesc_2arg[] =
21163 #define IWMMXT_BUILTIN(code, string, builtin) \
21164 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
21165 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21167 #define IWMMXT2_BUILTIN(code, string, builtin) \
21168 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
21169 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21171 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
21172 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
21173 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
21174 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
21175 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
21176 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
21177 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
21178 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
21179 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
21180 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
21181 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
21182 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
21183 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
21184 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
21185 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
21186 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
21187 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
21188 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
21189 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
21190 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
21191 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
21192 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
21193 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
21194 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
21195 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
21196 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
21197 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
21198 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
21199 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
21200 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
21201 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
21202 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
21203 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
21204 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
21205 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
21206 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
21207 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
21208 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
21209 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
21210 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
21211 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
21212 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
21213 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
21214 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
21215 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
21216 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
21217 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
21218 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
21219 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
21220 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
21221 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
21222 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
21223 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
21224 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
21225 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
21226 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
21227 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
21228 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
21229 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
21230 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
21231 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
21232 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
21233 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
21234 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
21235 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
21236 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
21237 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
21238 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
21239 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
21240 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
21241 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
21242 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
21243 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
21244 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
21245 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
21246 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
21247 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
21248 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
21250 #define IWMMXT_BUILTIN2(code, builtin) \
21251 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21253 #define IWMMXT2_BUILTIN2(code, builtin) \
21254 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21256 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
21257 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
21258 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
21259 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
21260 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
21261 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
21262 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
21263 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
21264 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
21265 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
21268 static const struct builtin_description bdesc_1arg[] =
21270 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
21271 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
21272 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
21273 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
21274 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
21275 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
21276 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
21277 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
21278 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
21279 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
21280 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
21281 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
21282 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
21283 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
21284 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
21285 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
21286 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
21287 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
21288 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
21289 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
21290 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
21291 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
21292 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
21293 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
21296 /* Set up all the iWMMXt builtins. This is not called if
21297 TARGET_IWMMXT is zero. */
21300 arm_init_iwmmxt_builtins (void)
21302 const struct builtin_description * d;
21305 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21306 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21307 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
21309 tree v8qi_ftype_v8qi_v8qi_int
21310 = build_function_type_list (V8QI_type_node,
21311 V8QI_type_node, V8QI_type_node,
21312 integer_type_node, NULL_TREE);
21313 tree v4hi_ftype_v4hi_int
21314 = build_function_type_list (V4HI_type_node,
21315 V4HI_type_node, integer_type_node, NULL_TREE);
21316 tree v2si_ftype_v2si_int
21317 = build_function_type_list (V2SI_type_node,
21318 V2SI_type_node, integer_type_node, NULL_TREE);
21319 tree v2si_ftype_di_di
21320 = build_function_type_list (V2SI_type_node,
21321 long_long_integer_type_node,
21322 long_long_integer_type_node,
21324 tree di_ftype_di_int
21325 = build_function_type_list (long_long_integer_type_node,
21326 long_long_integer_type_node,
21327 integer_type_node, NULL_TREE);
21328 tree di_ftype_di_int_int
21329 = build_function_type_list (long_long_integer_type_node,
21330 long_long_integer_type_node,
21332 integer_type_node, NULL_TREE);
21333 tree int_ftype_v8qi
21334 = build_function_type_list (integer_type_node,
21335 V8QI_type_node, NULL_TREE);
21336 tree int_ftype_v4hi
21337 = build_function_type_list (integer_type_node,
21338 V4HI_type_node, NULL_TREE);
21339 tree int_ftype_v2si
21340 = build_function_type_list (integer_type_node,
21341 V2SI_type_node, NULL_TREE);
21342 tree int_ftype_v8qi_int
21343 = build_function_type_list (integer_type_node,
21344 V8QI_type_node, integer_type_node, NULL_TREE);
21345 tree int_ftype_v4hi_int
21346 = build_function_type_list (integer_type_node,
21347 V4HI_type_node, integer_type_node, NULL_TREE);
21348 tree int_ftype_v2si_int
21349 = build_function_type_list (integer_type_node,
21350 V2SI_type_node, integer_type_node, NULL_TREE);
21351 tree v8qi_ftype_v8qi_int_int
21352 = build_function_type_list (V8QI_type_node,
21353 V8QI_type_node, integer_type_node,
21354 integer_type_node, NULL_TREE);
21355 tree v4hi_ftype_v4hi_int_int
21356 = build_function_type_list (V4HI_type_node,
21357 V4HI_type_node, integer_type_node,
21358 integer_type_node, NULL_TREE);
21359 tree v2si_ftype_v2si_int_int
21360 = build_function_type_list (V2SI_type_node,
21361 V2SI_type_node, integer_type_node,
21362 integer_type_node, NULL_TREE);
21363 /* Miscellaneous. */
21364 tree v8qi_ftype_v4hi_v4hi
21365 = build_function_type_list (V8QI_type_node,
21366 V4HI_type_node, V4HI_type_node, NULL_TREE);
21367 tree v4hi_ftype_v2si_v2si
21368 = build_function_type_list (V4HI_type_node,
21369 V2SI_type_node, V2SI_type_node, NULL_TREE);
21370 tree v8qi_ftype_v4hi_v8qi
21371 = build_function_type_list (V8QI_type_node,
21372 V4HI_type_node, V8QI_type_node, NULL_TREE);
21373 tree v2si_ftype_v4hi_v4hi
21374 = build_function_type_list (V2SI_type_node,
21375 V4HI_type_node, V4HI_type_node, NULL_TREE);
21376 tree v2si_ftype_v8qi_v8qi
21377 = build_function_type_list (V2SI_type_node,
21378 V8QI_type_node, V8QI_type_node, NULL_TREE);
21379 tree v4hi_ftype_v4hi_di
21380 = build_function_type_list (V4HI_type_node,
21381 V4HI_type_node, long_long_integer_type_node,
21383 tree v2si_ftype_v2si_di
21384 = build_function_type_list (V2SI_type_node,
21385 V2SI_type_node, long_long_integer_type_node,
21388 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
21389 tree int_ftype_void
21390 = build_function_type_list (integer_type_node, NULL_TREE);
21392 = build_function_type_list (long_long_integer_type_node,
21393 V8QI_type_node, NULL_TREE);
21395 = build_function_type_list (long_long_integer_type_node,
21396 V4HI_type_node, NULL_TREE);
21398 = build_function_type_list (long_long_integer_type_node,
21399 V2SI_type_node, NULL_TREE);
21400 tree v2si_ftype_v4hi
21401 = build_function_type_list (V2SI_type_node,
21402 V4HI_type_node, NULL_TREE);
21403 tree v4hi_ftype_v8qi
21404 = build_function_type_list (V4HI_type_node,
21405 V8QI_type_node, NULL_TREE);
21406 tree v8qi_ftype_v8qi
21407 = build_function_type_list (V8QI_type_node,
21408 V8QI_type_node, NULL_TREE);
21409 tree v4hi_ftype_v4hi
21410 = build_function_type_list (V4HI_type_node,
21411 V4HI_type_node, NULL_TREE);
21412 tree v2si_ftype_v2si
21413 = build_function_type_list (V2SI_type_node,
21414 V2SI_type_node, NULL_TREE);
21416 tree di_ftype_di_v4hi_v4hi
21417 = build_function_type_list (long_long_unsigned_type_node,
21418 long_long_unsigned_type_node,
21419 V4HI_type_node, V4HI_type_node,
21422 tree di_ftype_v4hi_v4hi
21423 = build_function_type_list (long_long_unsigned_type_node,
21424 V4HI_type_node,V4HI_type_node,
21427 tree v2si_ftype_v2si_v4hi_v4hi
21428 = build_function_type_list (V2SI_type_node,
21429 V2SI_type_node, V4HI_type_node,
21430 V4HI_type_node, NULL_TREE);
21432 tree v2si_ftype_v2si_v8qi_v8qi
21433 = build_function_type_list (V2SI_type_node,
21434 V2SI_type_node, V8QI_type_node,
21435 V8QI_type_node, NULL_TREE);
21437 tree di_ftype_di_v2si_v2si
21438 = build_function_type_list (long_long_unsigned_type_node,
21439 long_long_unsigned_type_node,
21440 V2SI_type_node, V2SI_type_node,
21443 tree di_ftype_di_di_int
21444 = build_function_type_list (long_long_unsigned_type_node,
21445 long_long_unsigned_type_node,
21446 long_long_unsigned_type_node,
21447 integer_type_node, NULL_TREE);
21449 tree void_ftype_int
21450 = build_function_type_list (void_type_node,
21451 integer_type_node, NULL_TREE);
21453 tree v8qi_ftype_char
21454 = build_function_type_list (V8QI_type_node,
21455 signed_char_type_node, NULL_TREE);
21457 tree v4hi_ftype_short
21458 = build_function_type_list (V4HI_type_node,
21459 short_integer_type_node, NULL_TREE);
21461 tree v2si_ftype_int
21462 = build_function_type_list (V2SI_type_node,
21463 integer_type_node, NULL_TREE);
21465 /* Normal vector binops. */
21466 tree v8qi_ftype_v8qi_v8qi
21467 = build_function_type_list (V8QI_type_node,
21468 V8QI_type_node, V8QI_type_node, NULL_TREE);
21469 tree v4hi_ftype_v4hi_v4hi
21470 = build_function_type_list (V4HI_type_node,
21471 V4HI_type_node,V4HI_type_node, NULL_TREE);
21472 tree v2si_ftype_v2si_v2si
21473 = build_function_type_list (V2SI_type_node,
21474 V2SI_type_node, V2SI_type_node, NULL_TREE);
21475 tree di_ftype_di_di
21476 = build_function_type_list (long_long_unsigned_type_node,
21477 long_long_unsigned_type_node,
21478 long_long_unsigned_type_node,
21481 /* Add all builtins that are more or less simple operations on two
21483 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21485 /* Use one of the operands; the target can have a different mode for
21486 mask-generating compares. */
21487 enum machine_mode mode;
21493 mode = insn_data[d->icode].operand[1].mode;
21498 type = v8qi_ftype_v8qi_v8qi;
21501 type = v4hi_ftype_v4hi_v4hi;
21504 type = v2si_ftype_v2si_v2si;
21507 type = di_ftype_di_di;
21511 gcc_unreachable ();
21514 def_mbuiltin (d->mask, d->name, type, d->code);
21517 /* Add the remaining MMX insns with somewhat more complicated types. */
21518 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
21519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
21520 ARM_BUILTIN_ ## CODE)
21522 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
21523 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
21524 ARM_BUILTIN_ ## CODE)
21526 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
21527 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
21528 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
21529 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
21530 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
21531 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
21532 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
21533 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
21534 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
21536 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
21537 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
21538 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
21539 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
21540 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
21541 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
21543 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
21544 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
21545 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
21546 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
21547 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
21548 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
21550 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
21551 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
21552 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
21553 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
21554 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
21555 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
21557 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
21558 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
21559 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
21560 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
21561 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
21562 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
21564 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
21566 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
21567 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
21568 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
21569 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
21570 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
21571 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
21572 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
21573 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
21574 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
21575 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
21577 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
21578 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
21579 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
21580 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
21581 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
21582 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
21583 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
21584 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
21585 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
21587 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
21588 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
21589 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
21591 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
21592 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
21593 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
21595 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
21596 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
21598 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
21599 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
21600 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
21601 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
21602 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
21603 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
21605 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
21606 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
21607 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
21608 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
21609 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
21610 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
21611 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
21612 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
21613 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
21614 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
21615 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
21616 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
21618 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
21619 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
21620 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
21621 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
21623 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
21624 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
21625 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
21626 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
21627 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
21628 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
21629 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
21631 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
21632 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
21633 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
21635 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
21636 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
21637 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
21638 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
21640 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
21641 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
21642 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
21643 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
21645 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
21646 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
21647 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
21648 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
21650 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
21651 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
21652 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
21653 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
21655 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
21656 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
21657 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
21658 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
21660 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
21661 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
21662 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
21663 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
21665 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
21667 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
21668 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
21669 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
21671 #undef iwmmx_mbuiltin
21672 #undef iwmmx2_mbuiltin
21676 arm_init_fp16_builtins (void)
21678 tree fp16_type = make_node (REAL_TYPE);
21679 TYPE_PRECISION (fp16_type) = 16;
21680 layout_type (fp16_type);
21681 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
21685 arm_init_builtins (void)
21687 if (TARGET_REALLY_IWMMXT)
21688 arm_init_iwmmxt_builtins ();
21691 arm_init_neon_builtins ();
21693 if (arm_fp16_format)
21694 arm_init_fp16_builtins ();
21697 /* Return the ARM builtin for CODE. */
21700 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
21702 if (code >= ARM_BUILTIN_MAX)
21703 return error_mark_node;
21705 return arm_builtin_decls[code];
21708 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21710 static const char *
21711 arm_invalid_parameter_type (const_tree t)
21713 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21714 return N_("function parameters cannot have __fp16 type");
21718 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21720 static const char *
21721 arm_invalid_return_type (const_tree t)
21723 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21724 return N_("functions cannot return __fp16 type");
21728 /* Implement TARGET_PROMOTED_TYPE. */
21731 arm_promoted_type (const_tree t)
21733 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21734 return float_type_node;
21738 /* Implement TARGET_CONVERT_TO_TYPE.
21739 Specifically, this hook implements the peculiarity of the ARM
21740 half-precision floating-point C semantics that requires conversions between
21741 __fp16 to or from double to do an intermediate conversion to float. */
21744 arm_convert_to_type (tree type, tree expr)
21746 tree fromtype = TREE_TYPE (expr);
21747 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
21749 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
21750 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
21751 return convert (type, convert (float_type_node, expr));
21755 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
21756 This simply adds HFmode as a supported mode; even though we don't
21757 implement arithmetic on this type directly, it's supported by
21758 optabs conversions, much the way the double-word arithmetic is
21759 special-cased in the default hook. */
21762 arm_scalar_mode_supported_p (enum machine_mode mode)
21764 if (mode == HFmode)
21765 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
21766 else if (ALL_FIXED_POINT_MODE_P (mode))
21769 return default_scalar_mode_supported_p (mode);
21772 /* Errors in the source file can cause expand_expr to return const0_rtx
21773 where we expect a vector. To avoid crashing, use one of the vector
21774 clear instructions. */
21777 safe_vector_operand (rtx x, enum machine_mode mode)
21779 if (x != const0_rtx)
21781 x = gen_reg_rtx (mode);
21783 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
21784 : gen_rtx_SUBREG (DImode, x, 0)));
21788 /* Subroutine of arm_expand_builtin to take care of binop insns. */
21791 arm_expand_binop_builtin (enum insn_code icode,
21792 tree exp, rtx target)
21795 tree arg0 = CALL_EXPR_ARG (exp, 0);
21796 tree arg1 = CALL_EXPR_ARG (exp, 1);
21797 rtx op0 = expand_normal (arg0);
21798 rtx op1 = expand_normal (arg1);
21799 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21800 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21801 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21803 if (VECTOR_MODE_P (mode0))
21804 op0 = safe_vector_operand (op0, mode0);
21805 if (VECTOR_MODE_P (mode1))
21806 op1 = safe_vector_operand (op1, mode1);
21809 || GET_MODE (target) != tmode
21810 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21811 target = gen_reg_rtx (tmode);
21813 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
21814 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
21816 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21817 op0 = copy_to_mode_reg (mode0, op0);
21818 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21819 op1 = copy_to_mode_reg (mode1, op1);
21821 pat = GEN_FCN (icode) (target, op0, op1);
21828 /* Subroutine of arm_expand_builtin to take care of unop insns. */
21831 arm_expand_unop_builtin (enum insn_code icode,
21832 tree exp, rtx target, int do_load)
21835 tree arg0 = CALL_EXPR_ARG (exp, 0);
21836 rtx op0 = expand_normal (arg0);
21837 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21838 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21841 || GET_MODE (target) != tmode
21842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21843 target = gen_reg_rtx (tmode);
21845 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
21848 if (VECTOR_MODE_P (mode0))
21849 op0 = safe_vector_operand (op0, mode0);
21851 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21852 op0 = copy_to_mode_reg (mode0, op0);
21855 pat = GEN_FCN (icode) (target, op0);
21863 NEON_ARG_COPY_TO_REG,
21869 #define NEON_MAX_BUILTIN_ARGS 5
21871 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
21872 and return an expression for the accessed memory.
21874 The intrinsic function operates on a block of registers that has
21875 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
21876 function references the memory at EXP of type TYPE and in mode
21877 MEM_MODE; this mode may be BLKmode if no more suitable mode is
21881 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
21882 enum machine_mode reg_mode,
21883 neon_builtin_type_mode type_mode)
21885 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
21886 tree elem_type, upper_bound, array_type;
21888 /* Work out the size of the register block in bytes. */
21889 reg_size = GET_MODE_SIZE (reg_mode);
21891 /* Work out the size of each vector in bytes. */
21892 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
21893 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
21895 /* Work out how many vectors there are. */
21896 gcc_assert (reg_size % vector_size == 0);
21897 nvectors = reg_size / vector_size;
21899 /* Work out the type of each element. */
21900 gcc_assert (POINTER_TYPE_P (type));
21901 elem_type = TREE_TYPE (type);
21903 /* Work out how many elements are being loaded or stored.
21904 MEM_MODE == REG_MODE implies a one-to-one mapping between register
21905 and memory elements; anything else implies a lane load or store. */
21906 if (mem_mode == reg_mode)
21907 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
21911 /* Create a type that describes the full access. */
21912 upper_bound = build_int_cst (size_type_node, nelems - 1);
21913 array_type = build_array_type (elem_type, build_index_type (upper_bound));
21915 /* Dereference EXP using that type. */
21916 return fold_build2 (MEM_REF, array_type, exp,
21917 build_int_cst (build_pointer_type (array_type), 0));
21920 /* Expand a Neon builtin. */
21922 arm_expand_neon_args (rtx target, int icode, int have_retval,
21923 neon_builtin_type_mode type_mode,
21924 tree exp, int fcode, ...)
21928 tree arg[NEON_MAX_BUILTIN_ARGS];
21929 rtx op[NEON_MAX_BUILTIN_ARGS];
21932 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21933 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
21934 enum machine_mode other_mode;
21940 || GET_MODE (target) != tmode
21941 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
21942 target = gen_reg_rtx (tmode);
21944 va_start (ap, fcode);
21946 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
21950 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
21952 if (thisarg == NEON_ARG_STOP)
21956 opno = argc + have_retval;
21957 mode[argc] = insn_data[icode].operand[opno].mode;
21958 arg[argc] = CALL_EXPR_ARG (exp, argc);
21959 arg_type = TREE_VALUE (formals);
21960 if (thisarg == NEON_ARG_MEMORY)
21962 other_mode = insn_data[icode].operand[1 - opno].mode;
21963 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
21964 mode[argc], other_mode,
21968 op[argc] = expand_normal (arg[argc]);
21972 case NEON_ARG_COPY_TO_REG:
21973 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
21974 if (!(*insn_data[icode].operand[opno].predicate)
21975 (op[argc], mode[argc]))
21976 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
21979 case NEON_ARG_CONSTANT:
21980 /* FIXME: This error message is somewhat unhelpful. */
21981 if (!(*insn_data[icode].operand[opno].predicate)
21982 (op[argc], mode[argc]))
21983 error ("argument must be a constant");
21986 case NEON_ARG_MEMORY:
21987 gcc_assert (MEM_P (op[argc]));
21988 PUT_MODE (op[argc], mode[argc]);
21989 /* ??? arm_neon.h uses the same built-in functions for signed
21990 and unsigned accesses, casting where necessary. This isn't
21992 set_mem_alias_set (op[argc], 0);
21993 if (!(*insn_data[icode].operand[opno].predicate)
21994 (op[argc], mode[argc]))
21995 op[argc] = (replace_equiv_address
21996 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
21999 case NEON_ARG_STOP:
22000 gcc_unreachable ();
22004 formals = TREE_CHAIN (formals);
22014 pat = GEN_FCN (icode) (target, op[0]);
22018 pat = GEN_FCN (icode) (target, op[0], op[1]);
22022 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
22026 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
22030 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
22034 gcc_unreachable ();
22040 pat = GEN_FCN (icode) (op[0]);
22044 pat = GEN_FCN (icode) (op[0], op[1]);
22048 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
22052 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
22056 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
22060 gcc_unreachable ();
22071 /* Expand a Neon builtin. These are "special" because they don't have symbolic
22072 constants defined per-instruction or per instruction-variant. Instead, the
22073 required info is looked up in the table neon_builtin_data. */
22075 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
22077 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
22078 neon_itype itype = d->itype;
22079 enum insn_code icode = d->code;
22080 neon_builtin_type_mode type_mode = d->mode;
22087 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22088 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22092 case NEON_SCALARMUL:
22093 case NEON_SCALARMULL:
22094 case NEON_SCALARMULH:
22095 case NEON_SHIFTINSERT:
22096 case NEON_LOGICBINOP:
22097 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22098 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22102 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22103 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22104 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22108 case NEON_SHIFTIMM:
22109 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22110 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
22114 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22115 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22120 case NEON_FLOAT_WIDEN:
22121 case NEON_FLOAT_NARROW:
22122 case NEON_REINTERP:
22123 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22124 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22128 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22129 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22131 case NEON_RESULTPAIR:
22132 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22133 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22137 case NEON_LANEMULL:
22138 case NEON_LANEMULH:
22139 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22140 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22141 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22144 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22145 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22146 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22148 case NEON_SHIFTACC:
22149 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22150 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22151 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22153 case NEON_SCALARMAC:
22154 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22155 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22156 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22160 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22161 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22165 case NEON_LOADSTRUCT:
22166 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22167 NEON_ARG_MEMORY, NEON_ARG_STOP);
22169 case NEON_LOAD1LANE:
22170 case NEON_LOADSTRUCTLANE:
22171 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22172 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22176 case NEON_STORESTRUCT:
22177 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22178 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22180 case NEON_STORE1LANE:
22181 case NEON_STORESTRUCTLANE:
22182 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22183 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22187 gcc_unreachable ();
22190 /* Emit code to reinterpret one Neon type as another, without altering bits. */
22192 neon_reinterpret (rtx dest, rtx src)
22194 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
22197 /* Emit code to place a Neon pair result in memory locations (with equal
22200 neon_emit_pair_result_insn (enum machine_mode mode,
22201 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
22204 rtx mem = gen_rtx_MEM (mode, destaddr);
22205 rtx tmp1 = gen_reg_rtx (mode);
22206 rtx tmp2 = gen_reg_rtx (mode);
22208 emit_insn (intfn (tmp1, op1, op2, tmp2));
22210 emit_move_insn (mem, tmp1);
22211 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
22212 emit_move_insn (mem, tmp2);
22215 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22216 not to early-clobber SRC registers in the process.
22218 We assume that the operands described by SRC and DEST represent a
22219 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22220 number of components into which the copy has been decomposed. */
22222 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22226 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22227 || REGNO (operands[0]) < REGNO (operands[1]))
22229 for (i = 0; i < count; i++)
22231 operands[2 * i] = dest[i];
22232 operands[2 * i + 1] = src[i];
22237 for (i = 0; i < count; i++)
22239 operands[2 * i] = dest[count - i - 1];
22240 operands[2 * i + 1] = src[count - i - 1];
22245 /* Split operands into moves from op[1] + op[2] into op[0]. */
22248 neon_split_vcombine (rtx operands[3])
22250 unsigned int dest = REGNO (operands[0]);
22251 unsigned int src1 = REGNO (operands[1]);
22252 unsigned int src2 = REGNO (operands[2]);
22253 enum machine_mode halfmode = GET_MODE (operands[1]);
22254 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22255 rtx destlo, desthi;
22257 if (src1 == dest && src2 == dest + halfregs)
22259 /* No-op move. Can't split to nothing; emit something. */
22260 emit_note (NOTE_INSN_DELETED);
22264 /* Preserve register attributes for variable tracking. */
22265 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22266 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22267 GET_MODE_SIZE (halfmode));
22269 /* Special case of reversed high/low parts. Use VSWP. */
22270 if (src2 == dest && src1 == dest + halfregs)
22272 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
22273 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
22274 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22278 if (!reg_overlap_mentioned_p (operands[2], destlo))
22280 /* Try to avoid unnecessary moves if part of the result
22281 is in the right place already. */
22283 emit_move_insn (destlo, operands[1]);
22284 if (src2 != dest + halfregs)
22285 emit_move_insn (desthi, operands[2]);
22289 if (src2 != dest + halfregs)
22290 emit_move_insn (desthi, operands[2]);
22292 emit_move_insn (destlo, operands[1]);
22296 /* Expand an expression EXP that calls a built-in function,
22297 with result going to TARGET if that's convenient
22298 (and in mode MODE if that's convenient).
22299 SUBTARGET may be used as the target for computing one of EXP's operands.
22300 IGNORE is nonzero if the value is to be ignored. */
22303 arm_expand_builtin (tree exp,
22305 rtx subtarget ATTRIBUTE_UNUSED,
22306 enum machine_mode mode ATTRIBUTE_UNUSED,
22307 int ignore ATTRIBUTE_UNUSED)
22309 const struct builtin_description * d;
22310 enum insn_code icode;
22311 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22319 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22321 enum machine_mode tmode;
22322 enum machine_mode mode0;
22323 enum machine_mode mode1;
22324 enum machine_mode mode2;
22330 if (fcode >= ARM_BUILTIN_NEON_BASE)
22331 return arm_expand_neon_builtin (fcode, exp, target);
22335 case ARM_BUILTIN_TEXTRMSB:
22336 case ARM_BUILTIN_TEXTRMUB:
22337 case ARM_BUILTIN_TEXTRMSH:
22338 case ARM_BUILTIN_TEXTRMUH:
22339 case ARM_BUILTIN_TEXTRMSW:
22340 case ARM_BUILTIN_TEXTRMUW:
22341 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
22342 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
22343 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
22344 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
22345 : CODE_FOR_iwmmxt_textrmw);
22347 arg0 = CALL_EXPR_ARG (exp, 0);
22348 arg1 = CALL_EXPR_ARG (exp, 1);
22349 op0 = expand_normal (arg0);
22350 op1 = expand_normal (arg1);
22351 tmode = insn_data[icode].operand[0].mode;
22352 mode0 = insn_data[icode].operand[1].mode;
22353 mode1 = insn_data[icode].operand[2].mode;
22355 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22356 op0 = copy_to_mode_reg (mode0, op0);
22357 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22359 /* @@@ better error message */
22360 error ("selector must be an immediate");
22361 return gen_reg_rtx (tmode);
22364 opint = INTVAL (op1);
22365 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
22367 if (opint > 7 || opint < 0)
22368 error ("the range of selector should be in 0 to 7");
22370 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
22372 if (opint > 3 || opint < 0)
22373 error ("the range of selector should be in 0 to 3");
22375 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
22377 if (opint > 1 || opint < 0)
22378 error ("the range of selector should be in 0 to 1");
22382 || GET_MODE (target) != tmode
22383 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22384 target = gen_reg_rtx (tmode);
22385 pat = GEN_FCN (icode) (target, op0, op1);
22391 case ARM_BUILTIN_WALIGNI:
22392 /* If op2 is immediate, call walighi, else call walighr. */
22393 arg0 = CALL_EXPR_ARG (exp, 0);
22394 arg1 = CALL_EXPR_ARG (exp, 1);
22395 arg2 = CALL_EXPR_ARG (exp, 2);
22396 op0 = expand_normal (arg0);
22397 op1 = expand_normal (arg1);
22398 op2 = expand_normal (arg2);
22399 if (CONST_INT_P (op2))
22401 icode = CODE_FOR_iwmmxt_waligni;
22402 tmode = insn_data[icode].operand[0].mode;
22403 mode0 = insn_data[icode].operand[1].mode;
22404 mode1 = insn_data[icode].operand[2].mode;
22405 mode2 = insn_data[icode].operand[3].mode;
22406 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22407 op0 = copy_to_mode_reg (mode0, op0);
22408 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22409 op1 = copy_to_mode_reg (mode1, op1);
22410 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
22411 selector = INTVAL (op2);
22412 if (selector > 7 || selector < 0)
22413 error ("the range of selector should be in 0 to 7");
22417 icode = CODE_FOR_iwmmxt_walignr;
22418 tmode = insn_data[icode].operand[0].mode;
22419 mode0 = insn_data[icode].operand[1].mode;
22420 mode1 = insn_data[icode].operand[2].mode;
22421 mode2 = insn_data[icode].operand[3].mode;
22422 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22423 op0 = copy_to_mode_reg (mode0, op0);
22424 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22425 op1 = copy_to_mode_reg (mode1, op1);
22426 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
22427 op2 = copy_to_mode_reg (mode2, op2);
22430 || GET_MODE (target) != tmode
22431 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22432 target = gen_reg_rtx (tmode);
22433 pat = GEN_FCN (icode) (target, op0, op1, op2);
22439 case ARM_BUILTIN_TINSRB:
22440 case ARM_BUILTIN_TINSRH:
22441 case ARM_BUILTIN_TINSRW:
22442 case ARM_BUILTIN_WMERGE:
22443 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
22444 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
22445 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
22446 : CODE_FOR_iwmmxt_tinsrw);
22447 arg0 = CALL_EXPR_ARG (exp, 0);
22448 arg1 = CALL_EXPR_ARG (exp, 1);
22449 arg2 = CALL_EXPR_ARG (exp, 2);
22450 op0 = expand_normal (arg0);
22451 op1 = expand_normal (arg1);
22452 op2 = expand_normal (arg2);
22453 tmode = insn_data[icode].operand[0].mode;
22454 mode0 = insn_data[icode].operand[1].mode;
22455 mode1 = insn_data[icode].operand[2].mode;
22456 mode2 = insn_data[icode].operand[3].mode;
22458 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22459 op0 = copy_to_mode_reg (mode0, op0);
22460 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22461 op1 = copy_to_mode_reg (mode1, op1);
22462 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22464 error ("selector must be an immediate");
22467 if (icode == CODE_FOR_iwmmxt_wmerge)
22469 selector = INTVAL (op2);
22470 if (selector > 7 || selector < 0)
22471 error ("the range of selector should be in 0 to 7");
22473 if ((icode == CODE_FOR_iwmmxt_tinsrb)
22474 || (icode == CODE_FOR_iwmmxt_tinsrh)
22475 || (icode == CODE_FOR_iwmmxt_tinsrw))
22478 selector= INTVAL (op2);
22479 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
22480 error ("the range of selector should be in 0 to 7");
22481 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
22482 error ("the range of selector should be in 0 to 3");
22483 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
22484 error ("the range of selector should be in 0 to 1");
22486 op2 = GEN_INT (mask);
22489 || GET_MODE (target) != tmode
22490 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22491 target = gen_reg_rtx (tmode);
22492 pat = GEN_FCN (icode) (target, op0, op1, op2);
22498 case ARM_BUILTIN_SETWCGR0:
22499 case ARM_BUILTIN_SETWCGR1:
22500 case ARM_BUILTIN_SETWCGR2:
22501 case ARM_BUILTIN_SETWCGR3:
22502 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
22503 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
22504 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
22505 : CODE_FOR_iwmmxt_setwcgr3);
22506 arg0 = CALL_EXPR_ARG (exp, 0);
22507 op0 = expand_normal (arg0);
22508 mode0 = insn_data[icode].operand[0].mode;
22509 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
22510 op0 = copy_to_mode_reg (mode0, op0);
22511 pat = GEN_FCN (icode) (op0);
22517 case ARM_BUILTIN_GETWCGR0:
22518 case ARM_BUILTIN_GETWCGR1:
22519 case ARM_BUILTIN_GETWCGR2:
22520 case ARM_BUILTIN_GETWCGR3:
22521 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
22522 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
22523 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
22524 : CODE_FOR_iwmmxt_getwcgr3);
22525 tmode = insn_data[icode].operand[0].mode;
22527 || GET_MODE (target) != tmode
22528 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22529 target = gen_reg_rtx (tmode);
22530 pat = GEN_FCN (icode) (target);
22536 case ARM_BUILTIN_WSHUFH:
22537 icode = CODE_FOR_iwmmxt_wshufh;
22538 arg0 = CALL_EXPR_ARG (exp, 0);
22539 arg1 = CALL_EXPR_ARG (exp, 1);
22540 op0 = expand_normal (arg0);
22541 op1 = expand_normal (arg1);
22542 tmode = insn_data[icode].operand[0].mode;
22543 mode1 = insn_data[icode].operand[1].mode;
22544 mode2 = insn_data[icode].operand[2].mode;
22546 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
22547 op0 = copy_to_mode_reg (mode1, op0);
22548 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
22550 error ("mask must be an immediate");
22553 selector = INTVAL (op1);
22554 if (selector < 0 || selector > 255)
22555 error ("the range of mask should be in 0 to 255");
22557 || GET_MODE (target) != tmode
22558 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22559 target = gen_reg_rtx (tmode);
22560 pat = GEN_FCN (icode) (target, op0, op1);
22566 case ARM_BUILTIN_WMADDS:
22567 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
22568 case ARM_BUILTIN_WMADDSX:
22569 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
22570 case ARM_BUILTIN_WMADDSN:
22571 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
22572 case ARM_BUILTIN_WMADDU:
22573 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
22574 case ARM_BUILTIN_WMADDUX:
22575 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
22576 case ARM_BUILTIN_WMADDUN:
22577 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
22578 case ARM_BUILTIN_WSADBZ:
22579 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
22580 case ARM_BUILTIN_WSADHZ:
22581 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
22583 /* Several three-argument builtins. */
22584 case ARM_BUILTIN_WMACS:
22585 case ARM_BUILTIN_WMACU:
22586 case ARM_BUILTIN_TMIA:
22587 case ARM_BUILTIN_TMIAPH:
22588 case ARM_BUILTIN_TMIATT:
22589 case ARM_BUILTIN_TMIATB:
22590 case ARM_BUILTIN_TMIABT:
22591 case ARM_BUILTIN_TMIABB:
22592 case ARM_BUILTIN_WQMIABB:
22593 case ARM_BUILTIN_WQMIABT:
22594 case ARM_BUILTIN_WQMIATB:
22595 case ARM_BUILTIN_WQMIATT:
22596 case ARM_BUILTIN_WQMIABBN:
22597 case ARM_BUILTIN_WQMIABTN:
22598 case ARM_BUILTIN_WQMIATBN:
22599 case ARM_BUILTIN_WQMIATTN:
22600 case ARM_BUILTIN_WMIABB:
22601 case ARM_BUILTIN_WMIABT:
22602 case ARM_BUILTIN_WMIATB:
22603 case ARM_BUILTIN_WMIATT:
22604 case ARM_BUILTIN_WMIABBN:
22605 case ARM_BUILTIN_WMIABTN:
22606 case ARM_BUILTIN_WMIATBN:
22607 case ARM_BUILTIN_WMIATTN:
22608 case ARM_BUILTIN_WMIAWBB:
22609 case ARM_BUILTIN_WMIAWBT:
22610 case ARM_BUILTIN_WMIAWTB:
22611 case ARM_BUILTIN_WMIAWTT:
22612 case ARM_BUILTIN_WMIAWBBN:
22613 case ARM_BUILTIN_WMIAWBTN:
22614 case ARM_BUILTIN_WMIAWTBN:
22615 case ARM_BUILTIN_WMIAWTTN:
22616 case ARM_BUILTIN_WSADB:
22617 case ARM_BUILTIN_WSADH:
22618 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
22619 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
22620 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
22621 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
22622 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
22623 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
22624 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
22625 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
22626 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
22627 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
22628 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
22629 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
22630 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
22631 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
22632 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
22633 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
22634 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
22635 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
22636 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
22637 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
22638 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
22639 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
22640 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
22641 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
22642 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
22643 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
22644 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
22645 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
22646 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
22647 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
22648 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
22649 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
22650 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
22651 : CODE_FOR_iwmmxt_wsadh);
22652 arg0 = CALL_EXPR_ARG (exp, 0);
22653 arg1 = CALL_EXPR_ARG (exp, 1);
22654 arg2 = CALL_EXPR_ARG (exp, 2);
22655 op0 = expand_normal (arg0);
22656 op1 = expand_normal (arg1);
22657 op2 = expand_normal (arg2);
22658 tmode = insn_data[icode].operand[0].mode;
22659 mode0 = insn_data[icode].operand[1].mode;
22660 mode1 = insn_data[icode].operand[2].mode;
22661 mode2 = insn_data[icode].operand[3].mode;
22663 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22664 op0 = copy_to_mode_reg (mode0, op0);
22665 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22666 op1 = copy_to_mode_reg (mode1, op1);
22667 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22668 op2 = copy_to_mode_reg (mode2, op2);
22670 || GET_MODE (target) != tmode
22671 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22672 target = gen_reg_rtx (tmode);
22673 pat = GEN_FCN (icode) (target, op0, op1, op2);
22679 case ARM_BUILTIN_WZERO:
22680 target = gen_reg_rtx (DImode);
22681 emit_insn (gen_iwmmxt_clrdi (target));
22684 case ARM_BUILTIN_WSRLHI:
22685 case ARM_BUILTIN_WSRLWI:
22686 case ARM_BUILTIN_WSRLDI:
22687 case ARM_BUILTIN_WSLLHI:
22688 case ARM_BUILTIN_WSLLWI:
22689 case ARM_BUILTIN_WSLLDI:
22690 case ARM_BUILTIN_WSRAHI:
22691 case ARM_BUILTIN_WSRAWI:
22692 case ARM_BUILTIN_WSRADI:
22693 case ARM_BUILTIN_WRORHI:
22694 case ARM_BUILTIN_WRORWI:
22695 case ARM_BUILTIN_WRORDI:
22696 case ARM_BUILTIN_WSRLH:
22697 case ARM_BUILTIN_WSRLW:
22698 case ARM_BUILTIN_WSRLD:
22699 case ARM_BUILTIN_WSLLH:
22700 case ARM_BUILTIN_WSLLW:
22701 case ARM_BUILTIN_WSLLD:
22702 case ARM_BUILTIN_WSRAH:
22703 case ARM_BUILTIN_WSRAW:
22704 case ARM_BUILTIN_WSRAD:
22705 case ARM_BUILTIN_WRORH:
22706 case ARM_BUILTIN_WRORW:
22707 case ARM_BUILTIN_WRORD:
22708 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
22709 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
22710 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
22711 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
22712 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
22713 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
22714 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
22715 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
22716 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
22717 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
22718 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
22719 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
22720 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
22721 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
22722 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
22723 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
22724 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
22725 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
22726 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
22727 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
22728 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
22729 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
22730 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
22731 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
22732 : CODE_FOR_nothing);
22733 arg1 = CALL_EXPR_ARG (exp, 1);
22734 op1 = expand_normal (arg1);
22735 if (GET_MODE (op1) == VOIDmode)
22737 imm = INTVAL (op1);
22738 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
22739 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
22740 && (imm < 0 || imm > 32))
22742 if (fcode == ARM_BUILTIN_WRORHI)
22743 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
22744 else if (fcode == ARM_BUILTIN_WRORWI)
22745 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
22746 else if (fcode == ARM_BUILTIN_WRORH)
22747 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
22749 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
22751 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
22752 && (imm < 0 || imm > 64))
22754 if (fcode == ARM_BUILTIN_WRORDI)
22755 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
22757 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
22761 if (fcode == ARM_BUILTIN_WSRLHI)
22762 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
22763 else if (fcode == ARM_BUILTIN_WSRLWI)
22764 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
22765 else if (fcode == ARM_BUILTIN_WSRLDI)
22766 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
22767 else if (fcode == ARM_BUILTIN_WSLLHI)
22768 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
22769 else if (fcode == ARM_BUILTIN_WSLLWI)
22770 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
22771 else if (fcode == ARM_BUILTIN_WSLLDI)
22772 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
22773 else if (fcode == ARM_BUILTIN_WSRAHI)
22774 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
22775 else if (fcode == ARM_BUILTIN_WSRAWI)
22776 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
22777 else if (fcode == ARM_BUILTIN_WSRADI)
22778 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
22779 else if (fcode == ARM_BUILTIN_WSRLH)
22780 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
22781 else if (fcode == ARM_BUILTIN_WSRLW)
22782 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
22783 else if (fcode == ARM_BUILTIN_WSRLD)
22784 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
22785 else if (fcode == ARM_BUILTIN_WSLLH)
22786 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
22787 else if (fcode == ARM_BUILTIN_WSLLW)
22788 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
22789 else if (fcode == ARM_BUILTIN_WSLLD)
22790 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
22791 else if (fcode == ARM_BUILTIN_WSRAH)
22792 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
22793 else if (fcode == ARM_BUILTIN_WSRAW)
22794 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
22796 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
22799 return arm_expand_binop_builtin (icode, exp, target);
22805 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
22806 if (d->code == (const enum arm_builtins) fcode)
22807 return arm_expand_binop_builtin (d->icode, exp, target);
22809 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
22810 if (d->code == (const enum arm_builtins) fcode)
22811 return arm_expand_unop_builtin (d->icode, exp, target, 0);
22813 /* @@@ Should really do something sensible here. */
22817 /* Return the number (counting from 0) of
22818 the least significant set bit in MASK. */
22821 number_of_first_bit_set (unsigned mask)
22823 return ctz_hwi (mask);
22826 /* Like emit_multi_reg_push, but allowing for a different set of
22827 registers to be described as saved. MASK is the set of registers
22828 to be saved; REAL_REGS is the set of registers to be described as
22829 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22832 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22834 unsigned long regno;
22835 rtx par[10], tmp, reg, insn;
22838 /* Build the parallel of the registers actually being stored. */
22839 for (i = 0; mask; ++i, mask &= mask - 1)
22841 regno = ctz_hwi (mask);
22842 reg = gen_rtx_REG (SImode, regno);
22845 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22847 tmp = gen_rtx_USE (VOIDmode, reg);
22852 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22853 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22854 tmp = gen_frame_mem (BLKmode, tmp);
22855 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
22858 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22859 insn = emit_insn (tmp);
22861 /* Always build the stack adjustment note for unwind info. */
22862 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22863 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
22866 /* Build the parallel of the registers recorded as saved for unwind. */
22867 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22869 regno = ctz_hwi (real_regs);
22870 reg = gen_rtx_REG (SImode, regno);
22872 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22873 tmp = gen_frame_mem (SImode, tmp);
22874 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
22875 RTX_FRAME_RELATED_P (tmp) = 1;
22883 RTX_FRAME_RELATED_P (par[0]) = 1;
22884 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22887 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22892 /* Emit code to push or pop registers to or from the stack. F is the
22893 assembly file. MASK is the registers to pop. */
22895 thumb_pop (FILE *f, unsigned long mask)
22898 int lo_mask = mask & 0xFF;
22899 int pushed_words = 0;
22903 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22905 /* Special case. Do not generate a POP PC statement here, do it in
22907 thumb_exit (f, -1);
22911 fprintf (f, "\tpop\t{");
22913 /* Look at the low registers first. */
22914 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22918 asm_fprintf (f, "%r", regno);
22920 if ((lo_mask & ~1) != 0)
22927 if (mask & (1 << PC_REGNUM))
22929 /* Catch popping the PC. */
22930 if (TARGET_INTERWORK || TARGET_BACKTRACE
22931 || crtl->calls_eh_return)
22933 /* The PC is never poped directly, instead
22934 it is popped into r3 and then BX is used. */
22935 fprintf (f, "}\n");
22937 thumb_exit (f, -1);
22946 asm_fprintf (f, "%r", PC_REGNUM);
22950 fprintf (f, "}\n");
22953 /* Generate code to return from a thumb function.
22954 If 'reg_containing_return_addr' is -1, then the return address is
22955 actually on the stack, at the stack pointer. */
22957 thumb_exit (FILE *f, int reg_containing_return_addr)
22959 unsigned regs_available_for_popping;
22960 unsigned regs_to_pop;
22962 unsigned available;
22966 int restore_a4 = FALSE;
22968 /* Compute the registers we need to pop. */
22972 if (reg_containing_return_addr == -1)
22974 regs_to_pop |= 1 << LR_REGNUM;
22978 if (TARGET_BACKTRACE)
22980 /* Restore the (ARM) frame pointer and stack pointer. */
22981 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22985 /* If there is nothing to pop then just emit the BX instruction and
22987 if (pops_needed == 0)
22989 if (crtl->calls_eh_return)
22990 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22992 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22995 /* Otherwise if we are not supporting interworking and we have not created
22996 a backtrace structure and the function was not entered in ARM mode then
22997 just pop the return address straight into the PC. */
22998 else if (!TARGET_INTERWORK
22999 && !TARGET_BACKTRACE
23000 && !is_called_in_ARM_mode (current_function_decl)
23001 && !crtl->calls_eh_return)
23003 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23007 /* Find out how many of the (return) argument registers we can corrupt. */
23008 regs_available_for_popping = 0;
23010 /* If returning via __builtin_eh_return, the bottom three registers
23011 all contain information needed for the return. */
23012 if (crtl->calls_eh_return)
23016 /* If we can deduce the registers used from the function's
23017 return value. This is more reliable that examining
23018 df_regs_ever_live_p () because that will be set if the register is
23019 ever used in the function, not just if the register is used
23020 to hold a return value. */
23022 if (crtl->return_rtx != 0)
23023 mode = GET_MODE (crtl->return_rtx);
23025 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23027 size = GET_MODE_SIZE (mode);
23031 /* In a void function we can use any argument register.
23032 In a function that returns a structure on the stack
23033 we can use the second and third argument registers. */
23034 if (mode == VOIDmode)
23035 regs_available_for_popping =
23036 (1 << ARG_REGISTER (1))
23037 | (1 << ARG_REGISTER (2))
23038 | (1 << ARG_REGISTER (3));
23040 regs_available_for_popping =
23041 (1 << ARG_REGISTER (2))
23042 | (1 << ARG_REGISTER (3));
23044 else if (size <= 4)
23045 regs_available_for_popping =
23046 (1 << ARG_REGISTER (2))
23047 | (1 << ARG_REGISTER (3));
23048 else if (size <= 8)
23049 regs_available_for_popping =
23050 (1 << ARG_REGISTER (3));
23053 /* Match registers to be popped with registers into which we pop them. */
23054 for (available = regs_available_for_popping,
23055 required = regs_to_pop;
23056 required != 0 && available != 0;
23057 available &= ~(available & - available),
23058 required &= ~(required & - required))
23061 /* If we have any popping registers left over, remove them. */
23063 regs_available_for_popping &= ~available;
23065 /* Otherwise if we need another popping register we can use
23066 the fourth argument register. */
23067 else if (pops_needed)
23069 /* If we have not found any free argument registers and
23070 reg a4 contains the return address, we must move it. */
23071 if (regs_available_for_popping == 0
23072 && reg_containing_return_addr == LAST_ARG_REGNUM)
23074 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23075 reg_containing_return_addr = LR_REGNUM;
23077 else if (size > 12)
23079 /* Register a4 is being used to hold part of the return value,
23080 but we have dire need of a free, low register. */
23083 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23086 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23088 /* The fourth argument register is available. */
23089 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23095 /* Pop as many registers as we can. */
23096 thumb_pop (f, regs_available_for_popping);
23098 /* Process the registers we popped. */
23099 if (reg_containing_return_addr == -1)
23101 /* The return address was popped into the lowest numbered register. */
23102 regs_to_pop &= ~(1 << LR_REGNUM);
23104 reg_containing_return_addr =
23105 number_of_first_bit_set (regs_available_for_popping);
23107 /* Remove this register for the mask of available registers, so that
23108 the return address will not be corrupted by further pops. */
23109 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23112 /* If we popped other registers then handle them here. */
23113 if (regs_available_for_popping)
23117 /* Work out which register currently contains the frame pointer. */
23118 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23120 /* Move it into the correct place. */
23121 asm_fprintf (f, "\tmov\t%r, %r\n",
23122 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23124 /* (Temporarily) remove it from the mask of popped registers. */
23125 regs_available_for_popping &= ~(1 << frame_pointer);
23126 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23128 if (regs_available_for_popping)
23132 /* We popped the stack pointer as well,
23133 find the register that contains it. */
23134 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23136 /* Move it into the stack register. */
23137 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23139 /* At this point we have popped all necessary registers, so
23140 do not worry about restoring regs_available_for_popping
23141 to its correct value:
23143 assert (pops_needed == 0)
23144 assert (regs_available_for_popping == (1 << frame_pointer))
23145 assert (regs_to_pop == (1 << STACK_POINTER)) */
23149 /* Since we have just move the popped value into the frame
23150 pointer, the popping register is available for reuse, and
23151 we know that we still have the stack pointer left to pop. */
23152 regs_available_for_popping |= (1 << frame_pointer);
23156 /* If we still have registers left on the stack, but we no longer have
23157 any registers into which we can pop them, then we must move the return
23158 address into the link register and make available the register that
23160 if (regs_available_for_popping == 0 && pops_needed > 0)
23162 regs_available_for_popping |= 1 << reg_containing_return_addr;
23164 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23165 reg_containing_return_addr);
23167 reg_containing_return_addr = LR_REGNUM;
23170 /* If we have registers left on the stack then pop some more.
23171 We know that at most we will want to pop FP and SP. */
23172 if (pops_needed > 0)
23177 thumb_pop (f, regs_available_for_popping);
23179 /* We have popped either FP or SP.
23180 Move whichever one it is into the correct register. */
23181 popped_into = number_of_first_bit_set (regs_available_for_popping);
23182 move_to = number_of_first_bit_set (regs_to_pop);
23184 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23186 regs_to_pop &= ~(1 << move_to);
23191 /* If we still have not popped everything then we must have only
23192 had one register available to us and we are now popping the SP. */
23193 if (pops_needed > 0)
23197 thumb_pop (f, regs_available_for_popping);
23199 popped_into = number_of_first_bit_set (regs_available_for_popping);
23201 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23203 assert (regs_to_pop == (1 << STACK_POINTER))
23204 assert (pops_needed == 1)
23208 /* If necessary restore the a4 register. */
23211 if (reg_containing_return_addr != LR_REGNUM)
23213 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23214 reg_containing_return_addr = LR_REGNUM;
23217 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23220 if (crtl->calls_eh_return)
23221 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23223 /* Return to caller. */
23224 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23227 /* Scan INSN just before assembler is output for it.
23228 For Thumb-1, we track the status of the condition codes; this
23229 information is used in the cbranchsi4_insn pattern. */
23231 thumb1_final_prescan_insn (rtx insn)
23233 if (flag_print_asm_name)
23234 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23235 INSN_ADDRESSES (INSN_UID (insn)));
23236 /* Don't overwrite the previous setter when we get to a cbranch. */
23237 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23239 enum attr_conds conds;
23241 if (cfun->machine->thumb1_cc_insn)
23243 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23244 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23247 conds = get_attr_conds (insn);
23248 if (conds == CONDS_SET)
23250 rtx set = single_set (insn);
23251 cfun->machine->thumb1_cc_insn = insn;
23252 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23253 cfun->machine->thumb1_cc_op1 = const0_rtx;
23254 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23255 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23257 rtx src1 = XEXP (SET_SRC (set), 1);
23258 if (src1 == const0_rtx)
23259 cfun->machine->thumb1_cc_mode = CCmode;
23261 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23263 /* Record the src register operand instead of dest because
23264 cprop_hardreg pass propagates src. */
23265 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23268 else if (conds != CONDS_NOCOND)
23269 cfun->machine->thumb1_cc_insn = NULL_RTX;
23272 /* Check if unexpected far jump is used. */
23273 if (cfun->machine->lr_save_eliminated
23274 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23275 internal_error("Unexpected thumb1 far jump");
23279 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23281 unsigned HOST_WIDE_INT mask = 0xff;
23284 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23285 if (val == 0) /* XXX */
23288 for (i = 0; i < 25; i++)
23289 if ((val & (mask << i)) == val)
23295 /* Returns nonzero if the current function contains,
23296 or might contain a far jump. */
23298 thumb_far_jump_used_p (void)
23301 bool far_jump = false;
23302 unsigned int func_size = 0;
23304 /* This test is only important for leaf functions. */
23305 /* assert (!leaf_function_p ()); */
23307 /* If we have already decided that far jumps may be used,
23308 do not bother checking again, and always return true even if
23309 it turns out that they are not being used. Once we have made
23310 the decision that far jumps are present (and that hence the link
23311 register will be pushed onto the stack) we cannot go back on it. */
23312 if (cfun->machine->far_jump_used)
23315 /* If this function is not being called from the prologue/epilogue
23316 generation code then it must be being called from the
23317 INITIAL_ELIMINATION_OFFSET macro. */
23318 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23320 /* In this case we know that we are being asked about the elimination
23321 of the arg pointer register. If that register is not being used,
23322 then there are no arguments on the stack, and we do not have to
23323 worry that a far jump might force the prologue to push the link
23324 register, changing the stack offsets. In this case we can just
23325 return false, since the presence of far jumps in the function will
23326 not affect stack offsets.
23328 If the arg pointer is live (or if it was live, but has now been
23329 eliminated and so set to dead) then we do have to test to see if
23330 the function might contain a far jump. This test can lead to some
23331 false negatives, since before reload is completed, then length of
23332 branch instructions is not known, so gcc defaults to returning their
23333 longest length, which in turn sets the far jump attribute to true.
23335 A false negative will not result in bad code being generated, but it
23336 will result in a needless push and pop of the link register. We
23337 hope that this does not occur too often.
23339 If we need doubleword stack alignment this could affect the other
23340 elimination offsets so we can't risk getting it wrong. */
23341 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23342 cfun->machine->arg_pointer_live = 1;
23343 else if (!cfun->machine->arg_pointer_live)
23347 /* Check to see if the function contains a branch
23348 insn with the far jump attribute set. */
23349 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23351 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23355 func_size += get_attr_length (insn);
23358 /* Attribute far_jump will always be true for thumb1 before
23359 shorten_branch pass. So checking far_jump attribute before
23360 shorten_branch isn't much useful.
23362 Following heuristic tries to estimate more accurately if a far jump
23363 may finally be used. The heuristic is very conservative as there is
23364 no chance to roll-back the decision of not to use far jump.
23366 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23367 2-byte insn is associated with a 4 byte constant pool. Using
23368 function size 2048/3 as the threshold is conservative enough. */
23371 if ((func_size * 3) >= 2048)
23373 /* Record the fact that we have decided that
23374 the function does use far jumps. */
23375 cfun->machine->far_jump_used = 1;
23383 /* Return nonzero if FUNC must be entered in ARM mode. */
23385 is_called_in_ARM_mode (tree func)
23387 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23389 /* Ignore the problem about functions whose address is taken. */
23390 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23394 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23400 /* Given the stack offsets and register mask in OFFSETS, decide how
23401 many additional registers to push instead of subtracting a constant
23402 from SP. For epilogues the principle is the same except we use pop.
23403 FOR_PROLOGUE indicates which we're generating. */
23405 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23407 HOST_WIDE_INT amount;
23408 unsigned long live_regs_mask = offsets->saved_regs_mask;
23409 /* Extract a mask of the ones we can give to the Thumb's push/pop
23411 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23412 /* Then count how many other high registers will need to be pushed. */
23413 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23414 int n_free, reg_base, size;
23416 if (!for_prologue && frame_pointer_needed)
23417 amount = offsets->locals_base - offsets->saved_regs;
23419 amount = offsets->outgoing_args - offsets->saved_regs;
23421 /* If the stack frame size is 512 exactly, we can save one load
23422 instruction, which should make this a win even when optimizing
23424 if (!optimize_size && amount != 512)
23427 /* Can't do this if there are high registers to push. */
23428 if (high_regs_pushed != 0)
23431 /* Shouldn't do it in the prologue if no registers would normally
23432 be pushed at all. In the epilogue, also allow it if we'll have
23433 a pop insn for the PC. */
23436 || TARGET_BACKTRACE
23437 || (live_regs_mask & 1 << LR_REGNUM) == 0
23438 || TARGET_INTERWORK
23439 || crtl->args.pretend_args_size != 0))
23442 /* Don't do this if thumb_expand_prologue wants to emit instructions
23443 between the push and the stack frame allocation. */
23445 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23446 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23453 size = arm_size_return_regs ();
23454 reg_base = ARM_NUM_INTS (size);
23455 live_regs_mask >>= reg_base;
23458 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23459 && (for_prologue || call_used_regs[reg_base + n_free]))
23461 live_regs_mask >>= 1;
23467 gcc_assert (amount / 4 * 4 == amount);
23469 if (amount >= 512 && (amount - n_free * 4) < 512)
23470 return (amount - 508) / 4;
23471 if (amount <= n_free * 4)
23476 /* The bits which aren't usefully expanded as rtl. */
23478 thumb1_unexpanded_epilogue (void)
23480 arm_stack_offsets *offsets;
23482 unsigned long live_regs_mask = 0;
23483 int high_regs_pushed = 0;
23485 int had_to_push_lr;
23488 if (cfun->machine->return_used_this_function != 0)
23491 if (IS_NAKED (arm_current_func_type ()))
23494 offsets = arm_get_frame_offsets ();
23495 live_regs_mask = offsets->saved_regs_mask;
23496 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23498 /* If we can deduce the registers used from the function's return value.
23499 This is more reliable that examining df_regs_ever_live_p () because that
23500 will be set if the register is ever used in the function, not just if
23501 the register is used to hold a return value. */
23502 size = arm_size_return_regs ();
23504 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23507 unsigned long extra_mask = (1 << extra_pop) - 1;
23508 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23511 /* The prolog may have pushed some high registers to use as
23512 work registers. e.g. the testsuite file:
23513 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23514 compiles to produce:
23515 push {r4, r5, r6, r7, lr}
23519 as part of the prolog. We have to undo that pushing here. */
23521 if (high_regs_pushed)
23523 unsigned long mask = live_regs_mask & 0xff;
23526 /* The available low registers depend on the size of the value we are
23534 /* Oh dear! We have no low registers into which we can pop
23537 ("no low registers available for popping high registers");
23539 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23540 if (live_regs_mask & (1 << next_hi_reg))
23543 while (high_regs_pushed)
23545 /* Find lo register(s) into which the high register(s) can
23547 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23549 if (mask & (1 << regno))
23550 high_regs_pushed--;
23551 if (high_regs_pushed == 0)
23555 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23557 /* Pop the values into the low register(s). */
23558 thumb_pop (asm_out_file, mask);
23560 /* Move the value(s) into the high registers. */
23561 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23563 if (mask & (1 << regno))
23565 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23568 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23569 if (live_regs_mask & (1 << next_hi_reg))
23574 live_regs_mask &= ~0x0f00;
23577 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
23578 live_regs_mask &= 0xff;
23580 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
23582 /* Pop the return address into the PC. */
23583 if (had_to_push_lr)
23584 live_regs_mask |= 1 << PC_REGNUM;
23586 /* Either no argument registers were pushed or a backtrace
23587 structure was created which includes an adjusted stack
23588 pointer, so just pop everything. */
23589 if (live_regs_mask)
23590 thumb_pop (asm_out_file, live_regs_mask);
23592 /* We have either just popped the return address into the
23593 PC or it is was kept in LR for the entire function.
23594 Note that thumb_pop has already called thumb_exit if the
23595 PC was in the list. */
23596 if (!had_to_push_lr)
23597 thumb_exit (asm_out_file, LR_REGNUM);
23601 /* Pop everything but the return address. */
23602 if (live_regs_mask)
23603 thumb_pop (asm_out_file, live_regs_mask);
23605 if (had_to_push_lr)
23609 /* We have no free low regs, so save one. */
23610 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
23614 /* Get the return address into a temporary register. */
23615 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
23619 /* Move the return address to lr. */
23620 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
23622 /* Restore the low register. */
23623 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
23628 regno = LAST_ARG_REGNUM;
23633 /* Remove the argument registers that were pushed onto the stack. */
23634 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
23635 SP_REGNUM, SP_REGNUM,
23636 crtl->args.pretend_args_size);
23638 thumb_exit (asm_out_file, regno);
23644 /* Functions to save and restore machine-specific function data. */
23645 static struct machine_function *
23646 arm_init_machine_status (void)
23648 struct machine_function *machine;
23649 machine = ggc_alloc_cleared_machine_function ();
23651 #if ARM_FT_UNKNOWN != 0
23652 machine->func_type = ARM_FT_UNKNOWN;
23657 /* Return an RTX indicating where the return address to the
23658 calling function can be found. */
23660 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
23665 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
23668 /* Do anything needed before RTL is emitted for each function. */
23670 arm_init_expanders (void)
23672 /* Arrange to initialize and mark the machine per-function status. */
23673 init_machine_status = arm_init_machine_status;
23675 /* This is to stop the combine pass optimizing away the alignment
23676 adjustment of va_arg. */
23677 /* ??? It is claimed that this should not be necessary. */
23679 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
23683 /* Like arm_compute_initial_elimination offset. Simpler because there
23684 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23685 to point at the base of the local variables after static stack
23686 space for a function has been allocated. */
23689 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23691 arm_stack_offsets *offsets;
23693 offsets = arm_get_frame_offsets ();
23697 case ARG_POINTER_REGNUM:
23700 case STACK_POINTER_REGNUM:
23701 return offsets->outgoing_args - offsets->saved_args;
23703 case FRAME_POINTER_REGNUM:
23704 return offsets->soft_frame - offsets->saved_args;
23706 case ARM_HARD_FRAME_POINTER_REGNUM:
23707 return offsets->saved_regs - offsets->saved_args;
23709 case THUMB_HARD_FRAME_POINTER_REGNUM:
23710 return offsets->locals_base - offsets->saved_args;
23713 gcc_unreachable ();
23717 case FRAME_POINTER_REGNUM:
23720 case STACK_POINTER_REGNUM:
23721 return offsets->outgoing_args - offsets->soft_frame;
23723 case ARM_HARD_FRAME_POINTER_REGNUM:
23724 return offsets->saved_regs - offsets->soft_frame;
23726 case THUMB_HARD_FRAME_POINTER_REGNUM:
23727 return offsets->locals_base - offsets->soft_frame;
23730 gcc_unreachable ();
23735 gcc_unreachable ();
23739 /* Generate the function's prologue. */
23742 thumb1_expand_prologue (void)
23746 HOST_WIDE_INT amount;
23747 arm_stack_offsets *offsets;
23748 unsigned long func_type;
23750 unsigned long live_regs_mask;
23751 unsigned long l_mask;
23752 unsigned high_regs_pushed = 0;
23754 func_type = arm_current_func_type ();
23756 /* Naked functions don't have prologues. */
23757 if (IS_NAKED (func_type))
23760 if (IS_INTERRUPT (func_type))
23762 error ("interrupt Service Routines cannot be coded in Thumb mode");
23766 if (is_called_in_ARM_mode (current_function_decl))
23767 emit_insn (gen_prologue_thumb1_interwork ());
23769 offsets = arm_get_frame_offsets ();
23770 live_regs_mask = offsets->saved_regs_mask;
23772 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23773 l_mask = live_regs_mask & 0x40ff;
23774 /* Then count how many other high registers will need to be pushed. */
23775 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23777 if (crtl->args.pretend_args_size)
23779 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23781 if (cfun->machine->uses_anonymous_args)
23783 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23784 unsigned long mask;
23786 mask = 1ul << (LAST_ARG_REGNUM + 1);
23787 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23789 insn = thumb1_emit_multi_reg_push (mask, 0);
23793 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23794 stack_pointer_rtx, x));
23796 RTX_FRAME_RELATED_P (insn) = 1;
23799 if (TARGET_BACKTRACE)
23801 HOST_WIDE_INT offset = 0;
23802 unsigned work_register;
23803 rtx work_reg, x, arm_hfp_rtx;
23805 /* We have been asked to create a stack backtrace structure.
23806 The code looks like this:
23810 0 sub SP, #16 Reserve space for 4 registers.
23811 2 push {R7} Push low registers.
23812 4 add R7, SP, #20 Get the stack pointer before the push.
23813 6 str R7, [SP, #8] Store the stack pointer
23814 (before reserving the space).
23815 8 mov R7, PC Get hold of the start of this code + 12.
23816 10 str R7, [SP, #16] Store it.
23817 12 mov R7, FP Get hold of the current frame pointer.
23818 14 str R7, [SP, #4] Store it.
23819 16 mov R7, LR Get hold of the current return address.
23820 18 str R7, [SP, #12] Store it.
23821 20 add R7, SP, #16 Point at the start of the
23822 backtrace structure.
23823 22 mov FP, R7 Put this value into the frame pointer. */
23825 work_register = thumb_find_work_register (live_regs_mask);
23826 work_reg = gen_rtx_REG (SImode, work_register);
23827 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23829 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23830 stack_pointer_rtx, GEN_INT (-16)));
23831 RTX_FRAME_RELATED_P (insn) = 1;
23835 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23836 RTX_FRAME_RELATED_P (insn) = 1;
23838 offset = bit_count (l_mask) * UNITS_PER_WORD;
23841 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23842 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23844 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23845 x = gen_frame_mem (SImode, x);
23846 emit_move_insn (x, work_reg);
23848 /* Make sure that the instruction fetching the PC is in the right place
23849 to calculate "start of backtrace creation code + 12". */
23850 /* ??? The stores using the common WORK_REG ought to be enough to
23851 prevent the scheduler from doing anything weird. Failing that
23852 we could always move all of the following into an UNSPEC_VOLATILE. */
23855 x = gen_rtx_REG (SImode, PC_REGNUM);
23856 emit_move_insn (work_reg, x);
23858 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23859 x = gen_frame_mem (SImode, x);
23860 emit_move_insn (x, work_reg);
23862 emit_move_insn (work_reg, arm_hfp_rtx);
23864 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23865 x = gen_frame_mem (SImode, x);
23866 emit_move_insn (x, work_reg);
23870 emit_move_insn (work_reg, arm_hfp_rtx);
23872 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23873 x = gen_frame_mem (SImode, x);
23874 emit_move_insn (x, work_reg);
23876 x = gen_rtx_REG (SImode, PC_REGNUM);
23877 emit_move_insn (work_reg, x);
23879 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23880 x = gen_frame_mem (SImode, x);
23881 emit_move_insn (x, work_reg);
23884 x = gen_rtx_REG (SImode, LR_REGNUM);
23885 emit_move_insn (work_reg, x);
23887 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23888 x = gen_frame_mem (SImode, x);
23889 emit_move_insn (x, work_reg);
23891 x = GEN_INT (offset + 12);
23892 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23894 emit_move_insn (arm_hfp_rtx, work_reg);
23896 /* Optimization: If we are not pushing any low registers but we are going
23897 to push some high registers then delay our first push. This will just
23898 be a push of LR and we can combine it with the push of the first high
23900 else if ((l_mask & 0xff) != 0
23901 || (high_regs_pushed == 0 && l_mask))
23903 unsigned long mask = l_mask;
23904 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23905 insn = thumb1_emit_multi_reg_push (mask, mask);
23906 RTX_FRAME_RELATED_P (insn) = 1;
23909 if (high_regs_pushed)
23911 unsigned pushable_regs;
23912 unsigned next_hi_reg;
23913 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23914 : crtl->args.info.nregs;
23915 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23917 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23918 if (live_regs_mask & (1 << next_hi_reg))
23921 /* Here we need to mask out registers used for passing arguments
23922 even if they can be pushed. This is to avoid using them to stash the high
23923 registers. Such kind of stash may clobber the use of arguments. */
23924 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23926 if (pushable_regs == 0)
23927 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23929 while (high_regs_pushed > 0)
23931 unsigned long real_regs_mask = 0;
23933 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23935 if (pushable_regs & (1 << regno))
23937 emit_move_insn (gen_rtx_REG (SImode, regno),
23938 gen_rtx_REG (SImode, next_hi_reg));
23940 high_regs_pushed --;
23941 real_regs_mask |= (1 << next_hi_reg);
23943 if (high_regs_pushed)
23945 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23947 if (live_regs_mask & (1 << next_hi_reg))
23952 pushable_regs &= ~((1 << regno) - 1);
23958 /* If we had to find a work register and we have not yet
23959 saved the LR then add it to the list of regs to push. */
23960 if (l_mask == (1 << LR_REGNUM))
23962 pushable_regs |= l_mask;
23963 real_regs_mask |= l_mask;
23967 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23968 RTX_FRAME_RELATED_P (insn) = 1;
23972 /* Load the pic register before setting the frame pointer,
23973 so we can use r7 as a temporary work register. */
23974 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23975 arm_load_pic_register (live_regs_mask);
23977 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23978 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23979 stack_pointer_rtx);
23981 if (flag_stack_usage_info)
23982 current_function_static_stack_size
23983 = offsets->outgoing_args - offsets->saved_args;
23985 amount = offsets->outgoing_args - offsets->saved_regs;
23986 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23991 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23992 GEN_INT (- amount)));
23993 RTX_FRAME_RELATED_P (insn) = 1;
23999 /* The stack decrement is too big for an immediate value in a single
24000 insn. In theory we could issue multiple subtracts, but after
24001 three of them it becomes more space efficient to place the full
24002 value in the constant pool and load into a register. (Also the
24003 ARM debugger really likes to see only one stack decrement per
24004 function). So instead we look for a scratch register into which
24005 we can load the decrement, and then we subtract this from the
24006 stack pointer. Unfortunately on the thumb the only available
24007 scratch registers are the argument registers, and we cannot use
24008 these as they may hold arguments to the function. Instead we
24009 attempt to locate a call preserved register which is used by this
24010 function. If we can find one, then we know that it will have
24011 been pushed at the start of the prologue and so we can corrupt
24013 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24014 if (live_regs_mask & (1 << regno))
24017 gcc_assert(regno <= LAST_LO_REGNUM);
24019 reg = gen_rtx_REG (SImode, regno);
24021 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24023 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24024 stack_pointer_rtx, reg));
24026 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24027 plus_constant (Pmode, stack_pointer_rtx,
24029 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24030 RTX_FRAME_RELATED_P (insn) = 1;
24034 if (frame_pointer_needed)
24035 thumb_set_frame_pointer (offsets);
24037 /* If we are profiling, make sure no instructions are scheduled before
24038 the call to mcount. Similarly if the user has requested no
24039 scheduling in the prolog. Similarly if we want non-call exceptions
24040 using the EABI unwinder, to prevent faulting instructions from being
24041 swapped with a stack adjustment. */
24042 if (crtl->profile || !TARGET_SCHED_PROLOG
24043 || (arm_except_unwind_info (&global_options) == UI_TARGET
24044 && cfun->can_throw_non_call_exceptions))
24045 emit_insn (gen_blockage ());
24047 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24048 if (live_regs_mask & 0xff)
24049 cfun->machine->lr_save_eliminated = 0;
24052 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24053 POP instruction can be generated. LR should be replaced by PC. All
24054 the checks required are already done by USE_RETURN_INSN (). Hence,
24055 all we really need to check here is if single register is to be
24056 returned, or multiple register return. */
24058 thumb2_expand_return (bool simple_return)
24061 unsigned long saved_regs_mask;
24062 arm_stack_offsets *offsets;
24064 offsets = arm_get_frame_offsets ();
24065 saved_regs_mask = offsets->saved_regs_mask;
24067 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24068 if (saved_regs_mask & (1 << i))
24071 if (!simple_return && saved_regs_mask)
24075 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24076 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24077 rtx addr = gen_rtx_MEM (SImode,
24078 gen_rtx_POST_INC (SImode,
24079 stack_pointer_rtx));
24080 set_mem_alias_set (addr, get_frame_alias_set ());
24081 XVECEXP (par, 0, 0) = ret_rtx;
24082 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24083 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24084 emit_jump_insn (par);
24088 saved_regs_mask &= ~ (1 << LR_REGNUM);
24089 saved_regs_mask |= (1 << PC_REGNUM);
24090 arm_emit_multi_reg_pop (saved_regs_mask);
24095 emit_jump_insn (simple_return_rtx);
24100 thumb1_expand_epilogue (void)
24102 HOST_WIDE_INT amount;
24103 arm_stack_offsets *offsets;
24106 /* Naked functions don't have prologues. */
24107 if (IS_NAKED (arm_current_func_type ()))
24110 offsets = arm_get_frame_offsets ();
24111 amount = offsets->outgoing_args - offsets->saved_regs;
24113 if (frame_pointer_needed)
24115 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24116 amount = offsets->locals_base - offsets->saved_regs;
24118 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24120 gcc_assert (amount >= 0);
24123 emit_insn (gen_blockage ());
24126 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24127 GEN_INT (amount)));
24130 /* r3 is always free in the epilogue. */
24131 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24133 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24134 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24138 /* Emit a USE (stack_pointer_rtx), so that
24139 the stack adjustment will not be deleted. */
24140 emit_insn (gen_force_register_use (stack_pointer_rtx));
24142 if (crtl->profile || !TARGET_SCHED_PROLOG)
24143 emit_insn (gen_blockage ());
24145 /* Emit a clobber for each insn that will be restored in the epilogue,
24146 so that flow2 will get register lifetimes correct. */
24147 for (regno = 0; regno < 13; regno++)
24148 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24149 emit_clobber (gen_rtx_REG (SImode, regno));
24151 if (! df_regs_ever_live_p (LR_REGNUM))
24152 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24155 /* Epilogue code for APCS frame. */
24157 arm_expand_epilogue_apcs_frame (bool really_return)
24159 unsigned long func_type;
24160 unsigned long saved_regs_mask;
24163 int floats_from_frame = 0;
24164 arm_stack_offsets *offsets;
24166 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24167 func_type = arm_current_func_type ();
24169 /* Get frame offsets for ARM. */
24170 offsets = arm_get_frame_offsets ();
24171 saved_regs_mask = offsets->saved_regs_mask;
24173 /* Find the offset of the floating-point save area in the frame. */
24174 floats_from_frame = offsets->saved_args - offsets->frame;
24176 /* Compute how many core registers saved and how far away the floats are. */
24177 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24178 if (saved_regs_mask & (1 << i))
24181 floats_from_frame += 4;
24184 if (TARGET_HARD_FLOAT && TARGET_VFP)
24188 /* The offset is from IP_REGNUM. */
24189 int saved_size = arm_get_vfp_saved_size ();
24190 if (saved_size > 0)
24192 floats_from_frame += saved_size;
24193 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
24194 hard_frame_pointer_rtx,
24195 GEN_INT (-floats_from_frame)));
24198 /* Generate VFP register multi-pop. */
24199 start_reg = FIRST_VFP_REGNUM;
24201 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24202 /* Look for a case where a reg does not need restoring. */
24203 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24204 && (!df_regs_ever_live_p (i + 1)
24205 || call_used_regs[i + 1]))
24207 if (start_reg != i)
24208 arm_emit_vfp_multi_reg_pop (start_reg,
24209 (i - start_reg) / 2,
24210 gen_rtx_REG (SImode,
24215 /* Restore the remaining regs that we have discovered (or possibly
24216 even all of them, if the conditional in the for loop never
24218 if (start_reg != i)
24219 arm_emit_vfp_multi_reg_pop (start_reg,
24220 (i - start_reg) / 2,
24221 gen_rtx_REG (SImode, IP_REGNUM));
24226 /* The frame pointer is guaranteed to be non-double-word aligned, as
24227 it is set to double-word-aligned old_stack_pointer - 4. */
24229 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24231 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24232 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24234 rtx addr = gen_frame_mem (V2SImode,
24235 plus_constant (Pmode, hard_frame_pointer_rtx,
24237 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24238 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24239 gen_rtx_REG (V2SImode, i),
24245 /* saved_regs_mask should contain IP which contains old stack pointer
24246 at the time of activation creation. Since SP and IP are adjacent registers,
24247 we can restore the value directly into SP. */
24248 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24249 saved_regs_mask &= ~(1 << IP_REGNUM);
24250 saved_regs_mask |= (1 << SP_REGNUM);
24252 /* There are two registers left in saved_regs_mask - LR and PC. We
24253 only need to restore LR (the return address), but to
24254 save time we can load it directly into PC, unless we need a
24255 special function exit sequence, or we are not really returning. */
24257 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24258 && !crtl->calls_eh_return)
24259 /* Delete LR from the register mask, so that LR on
24260 the stack is loaded into the PC in the register mask. */
24261 saved_regs_mask &= ~(1 << LR_REGNUM);
24263 saved_regs_mask &= ~(1 << PC_REGNUM);
24265 num_regs = bit_count (saved_regs_mask);
24266 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24268 /* Unwind the stack to just below the saved registers. */
24269 emit_insn (gen_addsi3 (stack_pointer_rtx,
24270 hard_frame_pointer_rtx,
24271 GEN_INT (- 4 * num_regs)));
24274 arm_emit_multi_reg_pop (saved_regs_mask);
24276 if (IS_INTERRUPT (func_type))
24278 /* Interrupt handlers will have pushed the
24279 IP onto the stack, so restore it now. */
24281 rtx addr = gen_rtx_MEM (SImode,
24282 gen_rtx_POST_INC (SImode,
24283 stack_pointer_rtx));
24284 set_mem_alias_set (addr, get_frame_alias_set ());
24285 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24286 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24287 gen_rtx_REG (SImode, IP_REGNUM),
24291 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24294 if (crtl->calls_eh_return)
24295 emit_insn (gen_addsi3 (stack_pointer_rtx,
24297 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
24299 if (IS_STACKALIGN (func_type))
24300 /* Restore the original stack pointer. Before prologue, the stack was
24301 realigned and the original stack pointer saved in r0. For details,
24302 see comment in arm_expand_prologue. */
24303 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24305 emit_jump_insn (simple_return_rtx);
24308 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24309 function is not a sibcall. */
24311 arm_expand_epilogue (bool really_return)
24313 unsigned long func_type;
24314 unsigned long saved_regs_mask;
24318 arm_stack_offsets *offsets;
24320 func_type = arm_current_func_type ();
24322 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24323 let output_return_instruction take care of instruction emition if any. */
24324 if (IS_NAKED (func_type)
24325 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24328 emit_jump_insn (simple_return_rtx);
24332 /* If we are throwing an exception, then we really must be doing a
24333 return, so we can't tail-call. */
24334 gcc_assert (!crtl->calls_eh_return || really_return);
24336 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24338 arm_expand_epilogue_apcs_frame (really_return);
24342 /* Get frame offsets for ARM. */
24343 offsets = arm_get_frame_offsets ();
24344 saved_regs_mask = offsets->saved_regs_mask;
24345 num_regs = bit_count (saved_regs_mask);
24347 if (frame_pointer_needed)
24350 /* Restore stack pointer if necessary. */
24353 /* In ARM mode, frame pointer points to first saved register.
24354 Restore stack pointer to last saved register. */
24355 amount = offsets->frame - offsets->saved_regs;
24357 /* Force out any pending memory operations that reference stacked data
24358 before stack de-allocation occurs. */
24359 emit_insn (gen_blockage ());
24360 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24361 hard_frame_pointer_rtx,
24362 GEN_INT (amount)));
24363 arm_add_cfa_adjust_cfa_note (insn, amount,
24365 hard_frame_pointer_rtx);
24367 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24369 emit_insn (gen_force_register_use (stack_pointer_rtx));
24373 /* In Thumb-2 mode, the frame pointer points to the last saved
24375 amount = offsets->locals_base - offsets->saved_regs;
24378 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24379 hard_frame_pointer_rtx,
24380 GEN_INT (amount)));
24381 arm_add_cfa_adjust_cfa_note (insn, amount,
24382 hard_frame_pointer_rtx,
24383 hard_frame_pointer_rtx);
24386 /* Force out any pending memory operations that reference stacked data
24387 before stack de-allocation occurs. */
24388 emit_insn (gen_blockage ());
24389 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24390 hard_frame_pointer_rtx));
24391 arm_add_cfa_adjust_cfa_note (insn, 0,
24393 hard_frame_pointer_rtx);
24394 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24396 emit_insn (gen_force_register_use (stack_pointer_rtx));
24401 /* Pop off outgoing args and local frame to adjust stack pointer to
24402 last saved register. */
24403 amount = offsets->outgoing_args - offsets->saved_regs;
24407 /* Force out any pending memory operations that reference stacked data
24408 before stack de-allocation occurs. */
24409 emit_insn (gen_blockage ());
24410 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24412 GEN_INT (amount)));
24413 arm_add_cfa_adjust_cfa_note (tmp, amount,
24414 stack_pointer_rtx, stack_pointer_rtx);
24415 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24417 emit_insn (gen_force_register_use (stack_pointer_rtx));
24421 if (TARGET_HARD_FLOAT && TARGET_VFP)
24423 /* Generate VFP register multi-pop. */
24424 int end_reg = LAST_VFP_REGNUM + 1;
24426 /* Scan the registers in reverse order. We need to match
24427 any groupings made in the prologue and generate matching
24428 vldm operations. The need to match groups is because,
24429 unlike pop, vldm can only do consecutive regs. */
24430 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24431 /* Look for a case where a reg does not need restoring. */
24432 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24433 && (!df_regs_ever_live_p (i + 1)
24434 || call_used_regs[i + 1]))
24436 /* Restore the regs discovered so far (from reg+2 to
24438 if (end_reg > i + 2)
24439 arm_emit_vfp_multi_reg_pop (i + 2,
24440 (end_reg - (i + 2)) / 2,
24441 stack_pointer_rtx);
24445 /* Restore the remaining regs that we have discovered (or possibly
24446 even all of them, if the conditional in the for loop never
24448 if (end_reg > i + 2)
24449 arm_emit_vfp_multi_reg_pop (i + 2,
24450 (end_reg - (i + 2)) / 2,
24451 stack_pointer_rtx);
24455 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24456 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24459 rtx addr = gen_rtx_MEM (V2SImode,
24460 gen_rtx_POST_INC (SImode,
24461 stack_pointer_rtx));
24462 set_mem_alias_set (addr, get_frame_alias_set ());
24463 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24464 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24465 gen_rtx_REG (V2SImode, i),
24467 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24468 stack_pointer_rtx, stack_pointer_rtx);
24471 if (saved_regs_mask)
24474 bool return_in_pc = false;
24476 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24477 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24478 && !IS_STACKALIGN (func_type)
24480 && crtl->args.pretend_args_size == 0
24481 && saved_regs_mask & (1 << LR_REGNUM)
24482 && !crtl->calls_eh_return)
24484 saved_regs_mask &= ~(1 << LR_REGNUM);
24485 saved_regs_mask |= (1 << PC_REGNUM);
24486 return_in_pc = true;
24489 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24491 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24492 if (saved_regs_mask & (1 << i))
24494 rtx addr = gen_rtx_MEM (SImode,
24495 gen_rtx_POST_INC (SImode,
24496 stack_pointer_rtx));
24497 set_mem_alias_set (addr, get_frame_alias_set ());
24499 if (i == PC_REGNUM)
24501 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24502 XVECEXP (insn, 0, 0) = ret_rtx;
24503 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
24504 gen_rtx_REG (SImode, i),
24506 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24507 insn = emit_jump_insn (insn);
24511 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24513 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24514 gen_rtx_REG (SImode, i),
24516 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24518 stack_pointer_rtx);
24524 if (current_tune->prefer_ldrd_strd
24525 && !optimize_function_for_size_p (cfun))
24528 thumb2_emit_ldrd_pop (saved_regs_mask);
24529 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24530 arm_emit_ldrd_pop (saved_regs_mask);
24532 arm_emit_multi_reg_pop (saved_regs_mask);
24535 arm_emit_multi_reg_pop (saved_regs_mask);
24538 if (return_in_pc == true)
24542 if (crtl->args.pretend_args_size)
24545 rtx dwarf = NULL_RTX;
24546 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24548 GEN_INT (crtl->args.pretend_args_size)));
24550 RTX_FRAME_RELATED_P (tmp) = 1;
24552 if (cfun->machine->uses_anonymous_args)
24554 /* Restore pretend args. Refer arm_expand_prologue on how to save
24555 pretend_args in stack. */
24556 int num_regs = crtl->args.pretend_args_size / 4;
24557 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24558 for (j = 0, i = 0; j < num_regs; i++)
24559 if (saved_regs_mask & (1 << i))
24561 rtx reg = gen_rtx_REG (SImode, i);
24562 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
24565 REG_NOTES (tmp) = dwarf;
24567 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
24568 stack_pointer_rtx, stack_pointer_rtx);
24571 if (!really_return)
24574 if (crtl->calls_eh_return)
24575 emit_insn (gen_addsi3 (stack_pointer_rtx,
24577 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24579 if (IS_STACKALIGN (func_type))
24580 /* Restore the original stack pointer. Before prologue, the stack was
24581 realigned and the original stack pointer saved in r0. For details,
24582 see comment in arm_expand_prologue. */
24583 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24585 emit_jump_insn (simple_return_rtx);
24588 /* Implementation of insn prologue_thumb1_interwork. This is the first
24589 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24592 thumb1_output_interwork (void)
24595 FILE *f = asm_out_file;
24597 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
24598 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
24600 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24602 /* Generate code sequence to switch us into Thumb mode. */
24603 /* The .code 32 directive has already been emitted by
24604 ASM_DECLARE_FUNCTION_NAME. */
24605 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
24606 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
24608 /* Generate a label, so that the debugger will notice the
24609 change in instruction sets. This label is also used by
24610 the assembler to bypass the ARM code when this function
24611 is called from a Thumb encoded function elsewhere in the
24612 same file. Hence the definition of STUB_NAME here must
24613 agree with the definition in gas/config/tc-arm.c. */
24615 #define STUB_NAME ".real_start_of"
24617 fprintf (f, "\t.code\t16\n");
24619 if (arm_dllexport_name_p (name))
24620 name = arm_strip_name_encoding (name);
24622 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
24623 fprintf (f, "\t.thumb_func\n");
24624 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
24629 /* Handle the case of a double word load into a low register from
24630 a computed memory address. The computed address may involve a
24631 register which is overwritten by the load. */
24633 thumb_load_double_from_address (rtx *operands)
24641 gcc_assert (REG_P (operands[0]));
24642 gcc_assert (MEM_P (operands[1]));
24644 /* Get the memory address. */
24645 addr = XEXP (operands[1], 0);
24647 /* Work out how the memory address is computed. */
24648 switch (GET_CODE (addr))
24651 operands[2] = adjust_address (operands[1], SImode, 4);
24653 if (REGNO (operands[0]) == REGNO (addr))
24655 output_asm_insn ("ldr\t%H0, %2", operands);
24656 output_asm_insn ("ldr\t%0, %1", operands);
24660 output_asm_insn ("ldr\t%0, %1", operands);
24661 output_asm_insn ("ldr\t%H0, %2", operands);
24666 /* Compute <address> + 4 for the high order load. */
24667 operands[2] = adjust_address (operands[1], SImode, 4);
24669 output_asm_insn ("ldr\t%0, %1", operands);
24670 output_asm_insn ("ldr\t%H0, %2", operands);
24674 arg1 = XEXP (addr, 0);
24675 arg2 = XEXP (addr, 1);
24677 if (CONSTANT_P (arg1))
24678 base = arg2, offset = arg1;
24680 base = arg1, offset = arg2;
24682 gcc_assert (REG_P (base));
24684 /* Catch the case of <address> = <reg> + <reg> */
24685 if (REG_P (offset))
24687 int reg_offset = REGNO (offset);
24688 int reg_base = REGNO (base);
24689 int reg_dest = REGNO (operands[0]);
24691 /* Add the base and offset registers together into the
24692 higher destination register. */
24693 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
24694 reg_dest + 1, reg_base, reg_offset);
24696 /* Load the lower destination register from the address in
24697 the higher destination register. */
24698 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
24699 reg_dest, reg_dest + 1);
24701 /* Load the higher destination register from its own address
24703 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
24704 reg_dest + 1, reg_dest + 1);
24708 /* Compute <address> + 4 for the high order load. */
24709 operands[2] = adjust_address (operands[1], SImode, 4);
24711 /* If the computed address is held in the low order register
24712 then load the high order register first, otherwise always
24713 load the low order register first. */
24714 if (REGNO (operands[0]) == REGNO (base))
24716 output_asm_insn ("ldr\t%H0, %2", operands);
24717 output_asm_insn ("ldr\t%0, %1", operands);
24721 output_asm_insn ("ldr\t%0, %1", operands);
24722 output_asm_insn ("ldr\t%H0, %2", operands);
24728 /* With no registers to worry about we can just load the value
24730 operands[2] = adjust_address (operands[1], SImode, 4);
24732 output_asm_insn ("ldr\t%H0, %2", operands);
24733 output_asm_insn ("ldr\t%0, %1", operands);
24737 gcc_unreachable ();
24744 thumb_output_move_mem_multiple (int n, rtx *operands)
24751 if (REGNO (operands[4]) > REGNO (operands[5]))
24754 operands[4] = operands[5];
24757 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24758 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24762 if (REGNO (operands[4]) > REGNO (operands[5]))
24765 operands[4] = operands[5];
24768 if (REGNO (operands[5]) > REGNO (operands[6]))
24771 operands[5] = operands[6];
24774 if (REGNO (operands[4]) > REGNO (operands[5]))
24777 operands[4] = operands[5];
24781 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24782 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24786 gcc_unreachable ();
24792 /* Output a call-via instruction for thumb state. */
24794 thumb_call_via_reg (rtx reg)
24796 int regno = REGNO (reg);
24799 gcc_assert (regno < LR_REGNUM);
24801 /* If we are in the normal text section we can use a single instance
24802 per compilation unit. If we are doing function sections, then we need
24803 an entry per section, since we can't rely on reachability. */
24804 if (in_section == text_section)
24806 thumb_call_reg_needed = 1;
24808 if (thumb_call_via_label[regno] == NULL)
24809 thumb_call_via_label[regno] = gen_label_rtx ();
24810 labelp = thumb_call_via_label + regno;
24814 if (cfun->machine->call_via[regno] == NULL)
24815 cfun->machine->call_via[regno] = gen_label_rtx ();
24816 labelp = cfun->machine->call_via + regno;
24819 output_asm_insn ("bl\t%a0", labelp);
24823 /* Routines for generating rtl. */
24825 thumb_expand_movmemqi (rtx *operands)
24827 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24828 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24829 HOST_WIDE_INT len = INTVAL (operands[2]);
24830 HOST_WIDE_INT offset = 0;
24834 emit_insn (gen_movmem12b (out, in, out, in));
24840 emit_insn (gen_movmem8b (out, in, out, in));
24846 rtx reg = gen_reg_rtx (SImode);
24847 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24848 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24855 rtx reg = gen_reg_rtx (HImode);
24856 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24857 plus_constant (Pmode, in,
24859 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24868 rtx reg = gen_reg_rtx (QImode);
24869 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24870 plus_constant (Pmode, in,
24872 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24879 thumb_reload_out_hi (rtx *operands)
24881 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24884 /* Handle reading a half-word from memory during reload. */
24886 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
24888 gcc_unreachable ();
24891 /* Return the length of a function name prefix
24892 that starts with the character 'c'. */
24894 arm_get_strip_length (int c)
24898 ARM_NAME_ENCODING_LENGTHS
24903 /* Return a pointer to a function's name with any
24904 and all prefix encodings stripped from it. */
24906 arm_strip_name_encoding (const char *name)
24910 while ((skip = arm_get_strip_length (* name)))
24916 /* If there is a '*' anywhere in the name's prefix, then
24917 emit the stripped name verbatim, otherwise prepend an
24918 underscore if leading underscores are being used. */
24920 arm_asm_output_labelref (FILE *stream, const char *name)
24925 while ((skip = arm_get_strip_length (* name)))
24927 verbatim |= (*name == '*');
24932 fputs (name, stream);
24934 asm_fprintf (stream, "%U%s", name);
24937 /* This function is used to emit an EABI tag and its associated value.
24938 We emit the numerical value of the tag in case the assembler does not
24939 support textual tags. (Eg gas prior to 2.20). If requested we include
24940 the tag name in a comment so that anyone reading the assembler output
24941 will know which tag is being set.
24943 This function is not static because arm-c.c needs it too. */
24946 arm_emit_eabi_attribute (const char *name, int num, int val)
24948 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24949 if (flag_verbose_asm || flag_debug_asm)
24950 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24951 asm_fprintf (asm_out_file, "\n");
24955 arm_file_start (void)
24959 if (TARGET_UNIFIED_ASM)
24960 asm_fprintf (asm_out_file, "\t.syntax unified\n");
24964 const char *fpu_name;
24965 if (arm_selected_arch)
24966 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24967 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24968 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24970 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
24972 if (TARGET_SOFT_FLOAT)
24974 fpu_name = "softvfp";
24978 fpu_name = arm_fpu_desc->name;
24979 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
24981 if (TARGET_HARD_FLOAT)
24982 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
24983 if (TARGET_HARD_FLOAT_ABI)
24984 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24987 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
24989 /* Some of these attributes only apply when the corresponding features
24990 are used. However we don't have any easy way of figuring this out.
24991 Conservatively record the setting that would have been used. */
24993 if (flag_rounding_math)
24994 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24996 if (!flag_unsafe_math_optimizations)
24998 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24999 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25001 if (flag_signaling_nans)
25002 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25004 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25005 flag_finite_math_only ? 1 : 3);
25007 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25008 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25009 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25010 flag_short_enums ? 1 : 2);
25012 /* Tag_ABI_optimization_goals. */
25015 else if (optimize >= 2)
25021 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25023 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25026 if (arm_fp16_format)
25027 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25028 (int) arm_fp16_format);
25030 if (arm_lang_output_object_attributes_hook)
25031 arm_lang_output_object_attributes_hook();
25034 default_file_start ();
25038 arm_file_end (void)
25042 if (NEED_INDICATE_EXEC_STACK)
25043 /* Add .note.GNU-stack. */
25044 file_end_indicate_exec_stack ();
25046 if (! thumb_call_reg_needed)
25049 switch_to_section (text_section);
25050 asm_fprintf (asm_out_file, "\t.code 16\n");
25051 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25053 for (regno = 0; regno < LR_REGNUM; regno++)
25055 rtx label = thumb_call_via_label[regno];
25059 targetm.asm_out.internal_label (asm_out_file, "L",
25060 CODE_LABEL_NUMBER (label));
25061 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25067 /* Symbols in the text segment can be accessed without indirecting via the
25068 constant pool; it may take an extra binary operation, but this is still
25069 faster than indirecting via memory. Don't do this when not optimizing,
25070 since we won't be calculating al of the offsets necessary to do this
25074 arm_encode_section_info (tree decl, rtx rtl, int first)
25076 if (optimize > 0 && TREE_CONSTANT (decl))
25077 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25079 default_encode_section_info (decl, rtl, first);
25081 #endif /* !ARM_PE */
25084 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25086 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25087 && !strcmp (prefix, "L"))
25089 arm_ccfsm_state = 0;
25090 arm_target_insn = NULL;
25092 default_internal_label (stream, prefix, labelno);
25095 /* Output code to add DELTA to the first argument, and then jump
25096 to FUNCTION. Used for C++ multiple inheritance. */
25098 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25099 HOST_WIDE_INT delta,
25100 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25103 static int thunk_label = 0;
25106 int mi_delta = delta;
25107 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25109 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25112 mi_delta = - mi_delta;
25114 final_start_function (emit_barrier (), file, 1);
25118 int labelno = thunk_label++;
25119 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25120 /* Thunks are entered in arm mode when avaiable. */
25121 if (TARGET_THUMB1_ONLY)
25123 /* push r3 so we can use it as a temporary. */
25124 /* TODO: Omit this save if r3 is not used. */
25125 fputs ("\tpush {r3}\n", file);
25126 fputs ("\tldr\tr3, ", file);
25130 fputs ("\tldr\tr12, ", file);
25132 assemble_name (file, label);
25133 fputc ('\n', file);
25136 /* If we are generating PIC, the ldr instruction below loads
25137 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25138 the address of the add + 8, so we have:
25140 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25143 Note that we have "+ 1" because some versions of GNU ld
25144 don't set the low bit of the result for R_ARM_REL32
25145 relocations against thumb function symbols.
25146 On ARMv6M this is +4, not +8. */
25147 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25148 assemble_name (file, labelpc);
25149 fputs (":\n", file);
25150 if (TARGET_THUMB1_ONLY)
25152 /* This is 2 insns after the start of the thunk, so we know it
25153 is 4-byte aligned. */
25154 fputs ("\tadd\tr3, pc, r3\n", file);
25155 fputs ("\tmov r12, r3\n", file);
25158 fputs ("\tadd\tr12, pc, r12\n", file);
25160 else if (TARGET_THUMB1_ONLY)
25161 fputs ("\tmov r12, r3\n", file);
25163 if (TARGET_THUMB1_ONLY)
25165 if (mi_delta > 255)
25167 fputs ("\tldr\tr3, ", file);
25168 assemble_name (file, label);
25169 fputs ("+4\n", file);
25170 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
25171 mi_op, this_regno, this_regno);
25173 else if (mi_delta != 0)
25175 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25176 mi_op, this_regno, this_regno,
25182 /* TODO: Use movw/movt for large constants when available. */
25183 while (mi_delta != 0)
25185 if ((mi_delta & (3 << shift)) == 0)
25189 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25190 mi_op, this_regno, this_regno,
25191 mi_delta & (0xff << shift));
25192 mi_delta &= ~(0xff << shift);
25199 if (TARGET_THUMB1_ONLY)
25200 fputs ("\tpop\t{r3}\n", file);
25202 fprintf (file, "\tbx\tr12\n");
25203 ASM_OUTPUT_ALIGN (file, 2);
25204 assemble_name (file, label);
25205 fputs (":\n", file);
25208 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
25209 rtx tem = XEXP (DECL_RTL (function), 0);
25210 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
25211 tem = gen_rtx_MINUS (GET_MODE (tem),
25213 gen_rtx_SYMBOL_REF (Pmode,
25214 ggc_strdup (labelpc)));
25215 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25218 /* Output ".word .LTHUNKn". */
25219 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25221 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25222 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25226 fputs ("\tb\t", file);
25227 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25228 if (NEED_PLT_RELOC)
25229 fputs ("(PLT)", file);
25230 fputc ('\n', file);
25233 final_end_function ();
25237 arm_emit_vector_const (FILE *file, rtx x)
25240 const char * pattern;
25242 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25244 switch (GET_MODE (x))
25246 case V2SImode: pattern = "%08x"; break;
25247 case V4HImode: pattern = "%04x"; break;
25248 case V8QImode: pattern = "%02x"; break;
25249 default: gcc_unreachable ();
25252 fprintf (file, "0x");
25253 for (i = CONST_VECTOR_NUNITS (x); i--;)
25257 element = CONST_VECTOR_ELT (x, i);
25258 fprintf (file, pattern, INTVAL (element));
25264 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25265 HFmode constant pool entries are actually loaded with ldr. */
25267 arm_emit_fp16_const (rtx c)
25272 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25273 bits = real_to_target (NULL, &r, HFmode);
25274 if (WORDS_BIG_ENDIAN)
25275 assemble_zeros (2);
25276 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25277 if (!WORDS_BIG_ENDIAN)
25278 assemble_zeros (2);
25282 arm_output_load_gr (rtx *operands)
25289 if (!MEM_P (operands [1])
25290 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25291 || !REG_P (reg = XEXP (sum, 0))
25292 || !CONST_INT_P (offset = XEXP (sum, 1))
25293 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25294 return "wldrw%?\t%0, %1";
25296 /* Fix up an out-of-range load of a GR register. */
25297 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25298 wcgr = operands[0];
25300 output_asm_insn ("ldr%?\t%0, %1", operands);
25302 operands[0] = wcgr;
25304 output_asm_insn ("tmcr%?\t%0, %1", operands);
25305 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25310 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25312 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25313 named arg and all anonymous args onto the stack.
25314 XXX I know the prologue shouldn't be pushing registers, but it is faster
25318 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25319 enum machine_mode mode,
25322 int second_time ATTRIBUTE_UNUSED)
25324 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25327 cfun->machine->uses_anonymous_args = 1;
25328 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25330 nregs = pcum->aapcs_ncrn;
25331 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25335 nregs = pcum->nregs;
25337 if (nregs < NUM_ARG_REGS)
25338 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25341 /* Return nonzero if the CONSUMER instruction (a store) does not need
25342 PRODUCER's value to calculate the address. */
25345 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
25347 rtx value = PATTERN (producer);
25348 rtx addr = PATTERN (consumer);
25350 if (GET_CODE (value) == COND_EXEC)
25351 value = COND_EXEC_CODE (value);
25352 if (GET_CODE (value) == PARALLEL)
25353 value = XVECEXP (value, 0, 0);
25354 value = XEXP (value, 0);
25355 if (GET_CODE (addr) == COND_EXEC)
25356 addr = COND_EXEC_CODE (addr);
25357 if (GET_CODE (addr) == PARALLEL)
25358 addr = XVECEXP (addr, 0, 0);
25359 addr = XEXP (addr, 0);
25361 return !reg_overlap_mentioned_p (value, addr);
25364 /* Return nonzero if the CONSUMER instruction (a store) does need
25365 PRODUCER's value to calculate the address. */
25368 arm_early_store_addr_dep (rtx producer, rtx consumer)
25370 return !arm_no_early_store_addr_dep (producer, consumer);
25373 /* Return nonzero if the CONSUMER instruction (a load) does need
25374 PRODUCER's value to calculate the address. */
25377 arm_early_load_addr_dep (rtx producer, rtx consumer)
25379 rtx value = PATTERN (producer);
25380 rtx addr = PATTERN (consumer);
25382 if (GET_CODE (value) == COND_EXEC)
25383 value = COND_EXEC_CODE (value);
25384 if (GET_CODE (value) == PARALLEL)
25385 value = XVECEXP (value, 0, 0);
25386 value = XEXP (value, 0);
25387 if (GET_CODE (addr) == COND_EXEC)
25388 addr = COND_EXEC_CODE (addr);
25389 if (GET_CODE (addr) == PARALLEL)
25391 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
25392 addr = XVECEXP (addr, 0, 1);
25394 addr = XVECEXP (addr, 0, 0);
25396 addr = XEXP (addr, 1);
25398 return reg_overlap_mentioned_p (value, addr);
25401 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25402 have an early register shift value or amount dependency on the
25403 result of PRODUCER. */
25406 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
25408 rtx value = PATTERN (producer);
25409 rtx op = PATTERN (consumer);
25412 if (GET_CODE (value) == COND_EXEC)
25413 value = COND_EXEC_CODE (value);
25414 if (GET_CODE (value) == PARALLEL)
25415 value = XVECEXP (value, 0, 0);
25416 value = XEXP (value, 0);
25417 if (GET_CODE (op) == COND_EXEC)
25418 op = COND_EXEC_CODE (op);
25419 if (GET_CODE (op) == PARALLEL)
25420 op = XVECEXP (op, 0, 0);
25423 early_op = XEXP (op, 0);
25424 /* This is either an actual independent shift, or a shift applied to
25425 the first operand of another operation. We want the whole shift
25427 if (REG_P (early_op))
25430 return !reg_overlap_mentioned_p (value, early_op);
25433 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25434 have an early register shift value dependency on the result of
25438 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
25440 rtx value = PATTERN (producer);
25441 rtx op = PATTERN (consumer);
25444 if (GET_CODE (value) == COND_EXEC)
25445 value = COND_EXEC_CODE (value);
25446 if (GET_CODE (value) == PARALLEL)
25447 value = XVECEXP (value, 0, 0);
25448 value = XEXP (value, 0);
25449 if (GET_CODE (op) == COND_EXEC)
25450 op = COND_EXEC_CODE (op);
25451 if (GET_CODE (op) == PARALLEL)
25452 op = XVECEXP (op, 0, 0);
25455 early_op = XEXP (op, 0);
25457 /* This is either an actual independent shift, or a shift applied to
25458 the first operand of another operation. We want the value being
25459 shifted, in either case. */
25460 if (!REG_P (early_op))
25461 early_op = XEXP (early_op, 0);
25463 return !reg_overlap_mentioned_p (value, early_op);
25466 /* Return nonzero if the CONSUMER (a mul or mac op) does not
25467 have an early register mult dependency on the result of
25471 arm_no_early_mul_dep (rtx producer, rtx consumer)
25473 rtx value = PATTERN (producer);
25474 rtx op = PATTERN (consumer);
25476 if (GET_CODE (value) == COND_EXEC)
25477 value = COND_EXEC_CODE (value);
25478 if (GET_CODE (value) == PARALLEL)
25479 value = XVECEXP (value, 0, 0);
25480 value = XEXP (value, 0);
25481 if (GET_CODE (op) == COND_EXEC)
25482 op = COND_EXEC_CODE (op);
25483 if (GET_CODE (op) == PARALLEL)
25484 op = XVECEXP (op, 0, 0);
25487 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
25489 if (GET_CODE (XEXP (op, 0)) == MULT)
25490 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
25492 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
25498 /* We can't rely on the caller doing the proper promotion when
25499 using APCS or ATPCS. */
25502 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25504 return !TARGET_AAPCS_BASED;
25507 static enum machine_mode
25508 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25509 enum machine_mode mode,
25510 int *punsignedp ATTRIBUTE_UNUSED,
25511 const_tree fntype ATTRIBUTE_UNUSED,
25512 int for_return ATTRIBUTE_UNUSED)
25514 if (GET_MODE_CLASS (mode) == MODE_INT
25515 && GET_MODE_SIZE (mode) < 4)
25521 /* AAPCS based ABIs use short enums by default. */
25524 arm_default_short_enums (void)
25526 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25530 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25533 arm_align_anon_bitfield (void)
25535 return TARGET_AAPCS_BASED;
25539 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25542 arm_cxx_guard_type (void)
25544 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25547 /* Return non-zero iff the consumer (a multiply-accumulate or a
25548 multiple-subtract instruction) has an accumulator dependency on the
25549 result of the producer and no other dependency on that result. It
25550 does not check if the producer is multiply-accumulate instruction. */
25552 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
25557 producer = PATTERN (producer);
25558 consumer = PATTERN (consumer);
25560 if (GET_CODE (producer) == COND_EXEC)
25561 producer = COND_EXEC_CODE (producer);
25562 if (GET_CODE (consumer) == COND_EXEC)
25563 consumer = COND_EXEC_CODE (consumer);
25565 if (GET_CODE (producer) != SET)
25568 result = XEXP (producer, 0);
25570 if (GET_CODE (consumer) != SET)
25573 /* Check that the consumer is of the form
25574 (set (...) (plus (mult ...) (...)))
25576 (set (...) (minus (...) (mult ...))). */
25577 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
25579 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
25582 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
25583 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
25584 acc = XEXP (XEXP (consumer, 1), 1);
25586 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
25588 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
25591 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
25592 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
25593 acc = XEXP (XEXP (consumer, 1), 0);
25598 return (reg_overlap_mentioned_p (result, acc)
25599 && !reg_overlap_mentioned_p (result, op0)
25600 && !reg_overlap_mentioned_p (result, op1));
25603 /* Return non-zero if the consumer (a multiply-accumulate instruction)
25604 has an accumulator dependency on the result of the producer (a
25605 multiplication instruction) and no other dependency on that result. */
25607 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
25609 rtx mul = PATTERN (producer);
25610 rtx mac = PATTERN (consumer);
25612 rtx mac_op0, mac_op1, mac_acc;
25614 if (GET_CODE (mul) == COND_EXEC)
25615 mul = COND_EXEC_CODE (mul);
25616 if (GET_CODE (mac) == COND_EXEC)
25617 mac = COND_EXEC_CODE (mac);
25619 /* Check that mul is of the form (set (...) (mult ...))
25620 and mla is of the form (set (...) (plus (mult ...) (...))). */
25621 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
25622 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
25623 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
25626 mul_result = XEXP (mul, 0);
25627 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
25628 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
25629 mac_acc = XEXP (XEXP (mac, 1), 1);
25631 return (reg_overlap_mentioned_p (mul_result, mac_acc)
25632 && !reg_overlap_mentioned_p (mul_result, mac_op0)
25633 && !reg_overlap_mentioned_p (mul_result, mac_op1));
25637 /* The EABI says test the least significant bit of a guard variable. */
25640 arm_cxx_guard_mask_bit (void)
25642 return TARGET_AAPCS_BASED;
25646 /* The EABI specifies that all array cookies are 8 bytes long. */
25649 arm_get_cookie_size (tree type)
25653 if (!TARGET_AAPCS_BASED)
25654 return default_cxx_get_cookie_size (type);
25656 size = build_int_cst (sizetype, 8);
25661 /* The EABI says that array cookies should also contain the element size. */
25664 arm_cookie_has_size (void)
25666 return TARGET_AAPCS_BASED;
25670 /* The EABI says constructors and destructors should return a pointer to
25671 the object constructed/destroyed. */
25674 arm_cxx_cdtor_returns_this (void)
25676 return TARGET_AAPCS_BASED;
25679 /* The EABI says that an inline function may never be the key
25683 arm_cxx_key_method_may_be_inline (void)
25685 return !TARGET_AAPCS_BASED;
25689 arm_cxx_determine_class_data_visibility (tree decl)
25691 if (!TARGET_AAPCS_BASED
25692 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25695 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25696 is exported. However, on systems without dynamic vague linkage,
25697 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25698 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
25699 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
25701 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
25702 DECL_VISIBILITY_SPECIFIED (decl) = 1;
25706 arm_cxx_class_data_always_comdat (void)
25708 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25709 vague linkage if the class has no key function. */
25710 return !TARGET_AAPCS_BASED;
25714 /* The EABI says __aeabi_atexit should be used to register static
25718 arm_cxx_use_aeabi_atexit (void)
25720 return TARGET_AAPCS_BASED;
25725 arm_set_return_address (rtx source, rtx scratch)
25727 arm_stack_offsets *offsets;
25728 HOST_WIDE_INT delta;
25730 unsigned long saved_regs;
25732 offsets = arm_get_frame_offsets ();
25733 saved_regs = offsets->saved_regs_mask;
25735 if ((saved_regs & (1 << LR_REGNUM)) == 0)
25736 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25739 if (frame_pointer_needed)
25740 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25743 /* LR will be the first saved register. */
25744 delta = offsets->outgoing_args - (offsets->frame + 4);
25749 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25750 GEN_INT (delta & ~4095)));
25755 addr = stack_pointer_rtx;
25757 addr = plus_constant (Pmode, addr, delta);
25759 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25765 thumb_set_return_address (rtx source, rtx scratch)
25767 arm_stack_offsets *offsets;
25768 HOST_WIDE_INT delta;
25769 HOST_WIDE_INT limit;
25772 unsigned long mask;
25776 offsets = arm_get_frame_offsets ();
25777 mask = offsets->saved_regs_mask;
25778 if (mask & (1 << LR_REGNUM))
25781 /* Find the saved regs. */
25782 if (frame_pointer_needed)
25784 delta = offsets->soft_frame - offsets->saved_args;
25785 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25791 delta = offsets->outgoing_args - offsets->saved_args;
25794 /* Allow for the stack frame. */
25795 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25797 /* The link register is always the first saved register. */
25800 /* Construct the address. */
25801 addr = gen_rtx_REG (SImode, reg);
25804 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25805 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25809 addr = plus_constant (Pmode, addr, delta);
25811 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25814 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25817 /* Implements target hook vector_mode_supported_p. */
25819 arm_vector_mode_supported_p (enum machine_mode mode)
25821 /* Neon also supports V2SImode, etc. listed in the clause below. */
25822 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25823 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
25826 if ((TARGET_NEON || TARGET_IWMMXT)
25827 && ((mode == V2SImode)
25828 || (mode == V4HImode)
25829 || (mode == V8QImode)))
25832 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25833 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25834 || mode == V2HAmode))
25840 /* Implements target hook array_mode_supported_p. */
25843 arm_array_mode_supported_p (enum machine_mode mode,
25844 unsigned HOST_WIDE_INT nelems)
25847 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25848 && (nelems >= 2 && nelems <= 4))
25854 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25855 registers when autovectorizing for Neon, at least until multiple vector
25856 widths are supported properly by the middle-end. */
25858 static enum machine_mode
25859 arm_preferred_simd_mode (enum machine_mode mode)
25865 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25867 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25869 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25871 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25873 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25880 if (TARGET_REALLY_IWMMXT)
25896 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25898 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25899 using r0-r4 for function arguments, r7 for the stack frame and don't have
25900 enough left over to do doubleword arithmetic. For Thumb-2 all the
25901 potentially problematic instructions accept high registers so this is not
25902 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25903 that require many low registers. */
25905 arm_class_likely_spilled_p (reg_class_t rclass)
25907 if ((TARGET_THUMB1 && rclass == LO_REGS)
25908 || rclass == CC_REG)
25914 /* Implements target hook small_register_classes_for_mode_p. */
25916 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
25918 return TARGET_THUMB1;
25921 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25922 ARM insns and therefore guarantee that the shift count is modulo 256.
25923 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25924 guarantee no particular behavior for out-of-range counts. */
25926 static unsigned HOST_WIDE_INT
25927 arm_shift_truncation_mask (enum machine_mode mode)
25929 return mode == SImode ? 255 : 0;
25933 /* Map internal gcc register numbers to DWARF2 register numbers. */
25936 arm_dbx_register_number (unsigned int regno)
25941 if (IS_VFP_REGNUM (regno))
25943 /* See comment in arm_dwarf_register_span. */
25944 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25945 return 64 + regno - FIRST_VFP_REGNUM;
25947 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25950 if (IS_IWMMXT_GR_REGNUM (regno))
25951 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25953 if (IS_IWMMXT_REGNUM (regno))
25954 return 112 + regno - FIRST_IWMMXT_REGNUM;
25956 gcc_unreachable ();
25959 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25960 GCC models tham as 64 32-bit registers, so we need to describe this to
25961 the DWARF generation code. Other registers can use the default. */
25963 arm_dwarf_register_span (rtx rtl)
25970 regno = REGNO (rtl);
25971 if (!IS_VFP_REGNUM (regno))
25974 /* XXX FIXME: The EABI defines two VFP register ranges:
25975 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25977 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25978 corresponding D register. Until GDB supports this, we shall use the
25979 legacy encodings. We also use these encodings for D0-D15 for
25980 compatibility with older debuggers. */
25981 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25984 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
25985 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
25986 for (i = 0; i < nregs; i++)
25987 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
25992 #if ARM_UNWIND_INFO
25993 /* Emit unwind directives for a store-multiple instruction or stack pointer
25994 push during alignment.
25995 These should only ever be generated by the function prologue code, so
25996 expect them to have a particular form. */
25999 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26002 HOST_WIDE_INT offset;
26003 HOST_WIDE_INT nregs;
26009 e = XVECEXP (p, 0, 0);
26010 if (GET_CODE (e) != SET)
26013 /* First insn will adjust the stack pointer. */
26014 if (GET_CODE (e) != SET
26015 || !REG_P (XEXP (e, 0))
26016 || REGNO (XEXP (e, 0)) != SP_REGNUM
26017 || GET_CODE (XEXP (e, 1)) != PLUS)
26020 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
26021 nregs = XVECLEN (p, 0) - 1;
26023 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
26026 /* The function prologue may also push pc, but not annotate it as it is
26027 never restored. We turn this into a stack pointer adjustment. */
26028 if (nregs * 4 == offset - 4)
26030 fprintf (asm_out_file, "\t.pad #4\n");
26034 fprintf (asm_out_file, "\t.save {");
26036 else if (IS_VFP_REGNUM (reg))
26039 fprintf (asm_out_file, "\t.vsave {");
26042 /* Unknown register type. */
26045 /* If the stack increment doesn't match the size of the saved registers,
26046 something has gone horribly wrong. */
26047 if (offset != nregs * reg_size)
26052 /* The remaining insns will describe the stores. */
26053 for (i = 1; i <= nregs; i++)
26055 /* Expect (set (mem <addr>) (reg)).
26056 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26057 e = XVECEXP (p, 0, i);
26058 if (GET_CODE (e) != SET
26059 || !MEM_P (XEXP (e, 0))
26060 || !REG_P (XEXP (e, 1)))
26063 reg = REGNO (XEXP (e, 1));
26068 fprintf (asm_out_file, ", ");
26069 /* We can't use %r for vfp because we need to use the
26070 double precision register names. */
26071 if (IS_VFP_REGNUM (reg))
26072 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26074 asm_fprintf (asm_out_file, "%r", reg);
26076 #ifdef ENABLE_CHECKING
26077 /* Check that the addresses are consecutive. */
26078 e = XEXP (XEXP (e, 0), 0);
26079 if (GET_CODE (e) == PLUS)
26081 offset += reg_size;
26082 if (!REG_P (XEXP (e, 0))
26083 || REGNO (XEXP (e, 0)) != SP_REGNUM
26084 || !CONST_INT_P (XEXP (e, 1))
26085 || offset != INTVAL (XEXP (e, 1)))
26090 || REGNO (e) != SP_REGNUM)
26094 fprintf (asm_out_file, "}\n");
26097 /* Emit unwind directives for a SET. */
26100 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26108 switch (GET_CODE (e0))
26111 /* Pushing a single register. */
26112 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26113 || !REG_P (XEXP (XEXP (e0, 0), 0))
26114 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26117 asm_fprintf (asm_out_file, "\t.save ");
26118 if (IS_VFP_REGNUM (REGNO (e1)))
26119 asm_fprintf(asm_out_file, "{d%d}\n",
26120 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26122 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26126 if (REGNO (e0) == SP_REGNUM)
26128 /* A stack increment. */
26129 if (GET_CODE (e1) != PLUS
26130 || !REG_P (XEXP (e1, 0))
26131 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26132 || !CONST_INT_P (XEXP (e1, 1)))
26135 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26136 -INTVAL (XEXP (e1, 1)));
26138 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26140 HOST_WIDE_INT offset;
26142 if (GET_CODE (e1) == PLUS)
26144 if (!REG_P (XEXP (e1, 0))
26145 || !CONST_INT_P (XEXP (e1, 1)))
26147 reg = REGNO (XEXP (e1, 0));
26148 offset = INTVAL (XEXP (e1, 1));
26149 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26150 HARD_FRAME_POINTER_REGNUM, reg,
26153 else if (REG_P (e1))
26156 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26157 HARD_FRAME_POINTER_REGNUM, reg);
26162 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26164 /* Move from sp to reg. */
26165 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26167 else if (GET_CODE (e1) == PLUS
26168 && REG_P (XEXP (e1, 0))
26169 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26170 && CONST_INT_P (XEXP (e1, 1)))
26172 /* Set reg to offset from sp. */
26173 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26174 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26186 /* Emit unwind directives for the given insn. */
26189 arm_unwind_emit (FILE * asm_out_file, rtx insn)
26192 bool handled_one = false;
26194 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26197 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26198 && (TREE_NOTHROW (current_function_decl)
26199 || crtl->all_throwers_are_sibcalls))
26202 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26205 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26207 pat = XEXP (note, 0);
26208 switch (REG_NOTE_KIND (note))
26210 case REG_FRAME_RELATED_EXPR:
26213 case REG_CFA_REGISTER:
26216 pat = PATTERN (insn);
26217 if (GET_CODE (pat) == PARALLEL)
26218 pat = XVECEXP (pat, 0, 0);
26221 /* Only emitted for IS_STACKALIGN re-alignment. */
26226 src = SET_SRC (pat);
26227 dest = SET_DEST (pat);
26229 gcc_assert (src == stack_pointer_rtx);
26230 reg = REGNO (dest);
26231 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26234 handled_one = true;
26237 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26238 to get correct dwarf information for shrink-wrap. We should not
26239 emit unwind information for it because these are used either for
26240 pretend arguments or notes to adjust sp and restore registers from
26242 case REG_CFA_ADJUST_CFA:
26243 case REG_CFA_RESTORE:
26246 case REG_CFA_DEF_CFA:
26247 case REG_CFA_EXPRESSION:
26248 case REG_CFA_OFFSET:
26249 /* ??? Only handling here what we actually emit. */
26250 gcc_unreachable ();
26258 pat = PATTERN (insn);
26261 switch (GET_CODE (pat))
26264 arm_unwind_emit_set (asm_out_file, pat);
26268 /* Store multiple. */
26269 arm_unwind_emit_sequence (asm_out_file, pat);
26278 /* Output a reference from a function exception table to the type_info
26279 object X. The EABI specifies that the symbol should be relocated by
26280 an R_ARM_TARGET2 relocation. */
26283 arm_output_ttype (rtx x)
26285 fputs ("\t.word\t", asm_out_file);
26286 output_addr_const (asm_out_file, x);
26287 /* Use special relocations for symbol references. */
26288 if (!CONST_INT_P (x))
26289 fputs ("(TARGET2)", asm_out_file);
26290 fputc ('\n', asm_out_file);
26295 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26298 arm_asm_emit_except_personality (rtx personality)
26300 fputs ("\t.personality\t", asm_out_file);
26301 output_addr_const (asm_out_file, personality);
26302 fputc ('\n', asm_out_file);
26305 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26308 arm_asm_init_sections (void)
26310 exception_section = get_unnamed_section (0, output_section_asm_op,
26313 #endif /* ARM_UNWIND_INFO */
26315 /* Output unwind directives for the start/end of a function. */
26318 arm_output_fn_unwind (FILE * f, bool prologue)
26320 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26324 fputs ("\t.fnstart\n", f);
26327 /* If this function will never be unwound, then mark it as such.
26328 The came condition is used in arm_unwind_emit to suppress
26329 the frame annotations. */
26330 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26331 && (TREE_NOTHROW (current_function_decl)
26332 || crtl->all_throwers_are_sibcalls))
26333 fputs("\t.cantunwind\n", f);
26335 fputs ("\t.fnend\n", f);
26340 arm_emit_tls_decoration (FILE *fp, rtx x)
26342 enum tls_reloc reloc;
26345 val = XVECEXP (x, 0, 0);
26346 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26348 output_addr_const (fp, val);
26353 fputs ("(tlsgd)", fp);
26356 fputs ("(tlsldm)", fp);
26359 fputs ("(tlsldo)", fp);
26362 fputs ("(gottpoff)", fp);
26365 fputs ("(tpoff)", fp);
26368 fputs ("(tlsdesc)", fp);
26371 gcc_unreachable ();
26380 fputs (" + (. - ", fp);
26381 output_addr_const (fp, XVECEXP (x, 0, 2));
26382 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26383 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26384 output_addr_const (fp, XVECEXP (x, 0, 3));
26394 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26397 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26399 gcc_assert (size == 4);
26400 fputs ("\t.word\t", file);
26401 output_addr_const (file, x);
26402 fputs ("(tlsldo)", file);
26405 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26408 arm_output_addr_const_extra (FILE *fp, rtx x)
26410 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26411 return arm_emit_tls_decoration (fp, x);
26412 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26415 int labelno = INTVAL (XVECEXP (x, 0, 0));
26417 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26418 assemble_name_raw (fp, label);
26422 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26424 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26428 output_addr_const (fp, XVECEXP (x, 0, 0));
26432 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26434 output_addr_const (fp, XVECEXP (x, 0, 0));
26438 output_addr_const (fp, XVECEXP (x, 0, 1));
26442 else if (GET_CODE (x) == CONST_VECTOR)
26443 return arm_emit_vector_const (fp, x);
26448 /* Output assembly for a shift instruction.
26449 SET_FLAGS determines how the instruction modifies the condition codes.
26450 0 - Do not set condition codes.
26451 1 - Set condition codes.
26452 2 - Use smallest instruction. */
26454 arm_output_shift(rtx * operands, int set_flags)
26457 static const char flag_chars[3] = {'?', '.', '!'};
26462 c = flag_chars[set_flags];
26463 if (TARGET_UNIFIED_ASM)
26465 shift = shift_op(operands[3], &val);
26469 operands[2] = GEN_INT(val);
26470 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26473 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26476 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26477 output_asm_insn (pattern, operands);
26481 /* Output assembly for a WMMX immediate shift instruction. */
26483 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26485 int shift = INTVAL (operands[2]);
26487 enum machine_mode opmode = GET_MODE (operands[0]);
26489 gcc_assert (shift >= 0);
26491 /* If the shift value in the register versions is > 63 (for D qualifier),
26492 31 (for W qualifier) or 15 (for H qualifier). */
26493 if (((opmode == V4HImode) && (shift > 15))
26494 || ((opmode == V2SImode) && (shift > 31))
26495 || ((opmode == DImode) && (shift > 63)))
26499 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26500 output_asm_insn (templ, operands);
26501 if (opmode == DImode)
26503 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26504 output_asm_insn (templ, operands);
26509 /* The destination register will contain all zeros. */
26510 sprintf (templ, "wzero\t%%0");
26511 output_asm_insn (templ, operands);
26516 if ((opmode == DImode) && (shift > 32))
26518 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26519 output_asm_insn (templ, operands);
26520 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26521 output_asm_insn (templ, operands);
26525 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26526 output_asm_insn (templ, operands);
26531 /* Output assembly for a WMMX tinsr instruction. */
26533 arm_output_iwmmxt_tinsr (rtx *operands)
26535 int mask = INTVAL (operands[3]);
26538 int units = mode_nunits[GET_MODE (operands[0])];
26539 gcc_assert ((mask & (mask - 1)) == 0);
26540 for (i = 0; i < units; ++i)
26542 if ((mask & 0x01) == 1)
26548 gcc_assert (i < units);
26550 switch (GET_MODE (operands[0]))
26553 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26556 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26559 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26562 gcc_unreachable ();
26565 output_asm_insn (templ, operands);
26570 /* Output a Thumb-1 casesi dispatch sequence. */
26572 thumb1_output_casesi (rtx *operands)
26574 rtx diff_vec = PATTERN (next_active_insn (operands[0]));
26576 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26578 switch (GET_MODE(diff_vec))
26581 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26582 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26584 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26585 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26587 return "bl\t%___gnu_thumb1_case_si";
26589 gcc_unreachable ();
26593 /* Output a Thumb-2 casesi instruction. */
26595 thumb2_output_casesi (rtx *operands)
26597 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
26599 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26601 output_asm_insn ("cmp\t%0, %1", operands);
26602 output_asm_insn ("bhi\t%l3", operands);
26603 switch (GET_MODE(diff_vec))
26606 return "tbb\t[%|pc, %0]";
26608 return "tbh\t[%|pc, %0, lsl #1]";
26612 output_asm_insn ("adr\t%4, %l2", operands);
26613 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26614 output_asm_insn ("add\t%4, %4, %5", operands);
26619 output_asm_insn ("adr\t%4, %l2", operands);
26620 return "ldr\t%|pc, [%4, %0, lsl #2]";
26623 gcc_unreachable ();
26627 /* Most ARM cores are single issue, but some newer ones can dual issue.
26628 The scheduler descriptions rely on this being correct. */
26630 arm_issue_rate (void)
26655 /* A table and a function to perform ARM-specific name mangling for
26656 NEON vector types in order to conform to the AAPCS (see "Procedure
26657 Call Standard for the ARM Architecture", Appendix A). To qualify
26658 for emission with the mangled names defined in that document, a
26659 vector type must not only be of the correct mode but also be
26660 composed of NEON vector element types (e.g. __builtin_neon_qi). */
26663 enum machine_mode mode;
26664 const char *element_type_name;
26665 const char *aapcs_name;
26666 } arm_mangle_map_entry;
26668 static arm_mangle_map_entry arm_mangle_map[] = {
26669 /* 64-bit containerized types. */
26670 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
26671 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
26672 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
26673 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
26674 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
26675 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
26676 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
26677 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
26678 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
26679 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
26680 /* 128-bit containerized types. */
26681 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
26682 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
26683 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
26684 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
26685 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
26686 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
26687 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
26688 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
26689 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
26690 { VOIDmode, NULL, NULL }
26694 arm_mangle_type (const_tree type)
26696 arm_mangle_map_entry *pos = arm_mangle_map;
26698 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26699 has to be managled as if it is in the "std" namespace. */
26700 if (TARGET_AAPCS_BASED
26701 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
26702 return "St9__va_list";
26704 /* Half-precision float. */
26705 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
26708 if (TREE_CODE (type) != VECTOR_TYPE)
26711 /* Check the mode of the vector type, and the name of the vector
26712 element type, against the table. */
26713 while (pos->mode != VOIDmode)
26715 tree elt_type = TREE_TYPE (type);
26717 if (pos->mode == TYPE_MODE (type)
26718 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
26719 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
26720 pos->element_type_name))
26721 return pos->aapcs_name;
26726 /* Use the default mangling for unrecognized (possibly user-defined)
26731 /* Order of allocation of core registers for Thumb: this allocation is
26732 written over the corresponding initial entries of the array
26733 initialized with REG_ALLOC_ORDER. We allocate all low registers
26734 first. Saving and restoring a low register is usually cheaper than
26735 using a call-clobbered high register. */
26737 static const int thumb_core_reg_alloc_order[] =
26739 3, 2, 1, 0, 4, 5, 6, 7,
26740 14, 12, 8, 9, 10, 11
26743 /* Adjust register allocation order when compiling for Thumb. */
26746 arm_order_regs_for_local_alloc (void)
26748 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26749 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26751 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26752 sizeof (thumb_core_reg_alloc_order));
26755 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26758 arm_frame_pointer_required (void)
26760 return (cfun->has_nonlocal_label
26761 || SUBTARGET_FRAME_POINTER_REQUIRED
26762 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
26765 /* Only thumb1 can't support conditional execution, so return true if
26766 the target is not thumb1. */
26768 arm_have_conditional_execution (void)
26770 return !TARGET_THUMB1;
26774 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
26776 enum machine_mode in_mode, out_mode;
26779 if (TREE_CODE (type_out) != VECTOR_TYPE
26780 || TREE_CODE (type_in) != VECTOR_TYPE
26781 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
26784 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26785 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26786 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26787 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26789 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
26790 decl of the vectorized builtin for the appropriate vector mode.
26791 NULL_TREE is returned if no such builtin is available. */
26792 #undef ARM_CHECK_BUILTIN_MODE
26793 #define ARM_CHECK_BUILTIN_MODE(C) \
26794 (out_mode == SFmode && out_n == C \
26795 && in_mode == SFmode && in_n == C)
26797 #undef ARM_FIND_VRINT_VARIANT
26798 #define ARM_FIND_VRINT_VARIANT(N) \
26799 (ARM_CHECK_BUILTIN_MODE (2) \
26800 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
26801 : (ARM_CHECK_BUILTIN_MODE (4) \
26802 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
26805 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
26807 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26810 case BUILT_IN_FLOORF:
26811 return ARM_FIND_VRINT_VARIANT (vrintm);
26812 case BUILT_IN_CEILF:
26813 return ARM_FIND_VRINT_VARIANT (vrintp);
26814 case BUILT_IN_TRUNCF:
26815 return ARM_FIND_VRINT_VARIANT (vrintz);
26816 case BUILT_IN_ROUNDF:
26817 return ARM_FIND_VRINT_VARIANT (vrinta);
26824 #undef ARM_CHECK_BUILTIN_MODE
26825 #undef ARM_FIND_VRINT_VARIANT
26827 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26828 static HOST_WIDE_INT
26829 arm_vector_alignment (const_tree type)
26831 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
26833 if (TARGET_AAPCS_BASED)
26834 align = MIN (align, 64);
26839 static unsigned int
26840 arm_autovectorize_vector_sizes (void)
26842 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26846 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26848 /* Vectors which aren't in packed structures will not be less aligned than
26849 the natural alignment of their element type, so this is safe. */
26850 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26853 return default_builtin_vector_alignment_reachable (type, is_packed);
26857 arm_builtin_support_vector_misalignment (enum machine_mode mode,
26858 const_tree type, int misalignment,
26861 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26863 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26868 /* If the misalignment is unknown, we should be able to handle the access
26869 so long as it is not to a member of a packed data structure. */
26870 if (misalignment == -1)
26873 /* Return true if the misalignment is a multiple of the natural alignment
26874 of the vector's element type. This is probably always going to be
26875 true in practice, since we've already established that this isn't a
26877 return ((misalignment % align) == 0);
26880 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26885 arm_conditional_register_usage (void)
26889 if (TARGET_THUMB1 && optimize_size)
26891 /* When optimizing for size on Thumb-1, it's better not
26892 to use the HI regs, because of the overhead of
26894 for (regno = FIRST_HI_REGNUM;
26895 regno <= LAST_HI_REGNUM; ++regno)
26896 fixed_regs[regno] = call_used_regs[regno] = 1;
26899 /* The link register can be clobbered by any branch insn,
26900 but we have no way to track that at present, so mark
26901 it as unavailable. */
26903 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26905 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
26907 /* VFPv3 registers are disabled when earlier VFP
26908 versions are selected due to the definition of
26909 LAST_VFP_REGNUM. */
26910 for (regno = FIRST_VFP_REGNUM;
26911 regno <= LAST_VFP_REGNUM; ++ regno)
26913 fixed_regs[regno] = 0;
26914 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26915 || regno >= FIRST_VFP_REGNUM + 32;
26919 if (TARGET_REALLY_IWMMXT)
26921 regno = FIRST_IWMMXT_GR_REGNUM;
26922 /* The 2002/10/09 revision of the XScale ABI has wCG0
26923 and wCG1 as call-preserved registers. The 2002/11/21
26924 revision changed this so that all wCG registers are
26925 scratch registers. */
26926 for (regno = FIRST_IWMMXT_GR_REGNUM;
26927 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26928 fixed_regs[regno] = 0;
26929 /* The XScale ABI has wR0 - wR9 as scratch registers,
26930 the rest as call-preserved registers. */
26931 for (regno = FIRST_IWMMXT_REGNUM;
26932 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26934 fixed_regs[regno] = 0;
26935 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26939 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26941 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26942 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26944 else if (TARGET_APCS_STACK)
26946 fixed_regs[10] = 1;
26947 call_used_regs[10] = 1;
26949 /* -mcaller-super-interworking reserves r11 for calls to
26950 _interwork_r11_call_via_rN(). Making the register global
26951 is an easy way of ensuring that it remains valid for all
26953 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26954 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26956 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26957 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26958 if (TARGET_CALLER_INTERWORKING)
26959 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26961 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26965 arm_preferred_rename_class (reg_class_t rclass)
26967 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26968 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26969 and code size can be reduced. */
26970 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26976 /* Compute the atrribute "length" of insn "*push_multi".
26977 So this function MUST be kept in sync with that insn pattern. */
26979 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26981 int i, regno, hi_reg;
26982 int num_saves = XVECLEN (parallel_op, 0);
26992 regno = REGNO (first_op);
26993 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26994 for (i = 1; i < num_saves && !hi_reg; i++)
26996 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26997 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27005 /* Compute the number of instructions emitted by output_move_double. */
27007 arm_count_output_move_double_insns (rtx *operands)
27011 /* output_move_double may modify the operands array, so call it
27012 here on a copy of the array. */
27013 ops[0] = operands[0];
27014 ops[1] = operands[1];
27015 output_move_double (ops, false, &count);
27020 vfp3_const_double_for_fract_bits (rtx operand)
27022 REAL_VALUE_TYPE r0;
27024 if (!CONST_DOUBLE_P (operand))
27027 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27028 if (exact_real_inverse (DFmode, &r0))
27030 if (exact_real_truncate (DFmode, &r0))
27032 HOST_WIDE_INT value = real_to_integer (&r0);
27033 value = value & 0xffffffff;
27034 if ((value != 0) && ( (value & (value - 1)) == 0))
27035 return int_log2 (value);
27041 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27044 arm_pre_atomic_barrier (enum memmodel model)
27046 if (need_atomic_barrier_p (model, true))
27047 emit_insn (gen_memory_barrier ());
27051 arm_post_atomic_barrier (enum memmodel model)
27053 if (need_atomic_barrier_p (model, false))
27054 emit_insn (gen_memory_barrier ());
27057 /* Emit the load-exclusive and store-exclusive instructions.
27058 Use acquire and release versions if necessary. */
27061 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
27063 rtx (*gen) (rtx, rtx);
27069 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27070 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27071 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27072 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27074 gcc_unreachable ();
27081 case QImode: gen = gen_arm_load_exclusiveqi; break;
27082 case HImode: gen = gen_arm_load_exclusivehi; break;
27083 case SImode: gen = gen_arm_load_exclusivesi; break;
27084 case DImode: gen = gen_arm_load_exclusivedi; break;
27086 gcc_unreachable ();
27090 emit_insn (gen (rval, mem));
27094 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
27097 rtx (*gen) (rtx, rtx, rtx);
27103 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27104 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27105 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27106 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27108 gcc_unreachable ();
27115 case QImode: gen = gen_arm_store_exclusiveqi; break;
27116 case HImode: gen = gen_arm_store_exclusivehi; break;
27117 case SImode: gen = gen_arm_store_exclusivesi; break;
27118 case DImode: gen = gen_arm_store_exclusivedi; break;
27120 gcc_unreachable ();
27124 emit_insn (gen (bval, rval, mem));
27127 /* Mark the previous jump instruction as unlikely. */
27130 emit_unlikely_jump (rtx insn)
27132 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
27134 insn = emit_jump_insn (insn);
27135 add_reg_note (insn, REG_BR_PROB, very_unlikely);
27138 /* Expand a compare and swap pattern. */
27141 arm_expand_compare_and_swap (rtx operands[])
27143 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27144 enum machine_mode mode;
27145 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27147 bval = operands[0];
27148 rval = operands[1];
27150 oldval = operands[3];
27151 newval = operands[4];
27152 is_weak = operands[5];
27153 mod_s = operands[6];
27154 mod_f = operands[7];
27155 mode = GET_MODE (mem);
27157 /* Normally the succ memory model must be stronger than fail, but in the
27158 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27159 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27161 if (TARGET_HAVE_LDACQ
27162 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27163 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27164 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27170 /* For narrow modes, we're going to perform the comparison in SImode,
27171 so do the zero-extension now. */
27172 rval = gen_reg_rtx (SImode);
27173 oldval = convert_modes (SImode, mode, oldval, true);
27177 /* Force the value into a register if needed. We waited until after
27178 the zero-extension above to do this properly. */
27179 if (!arm_add_operand (oldval, SImode))
27180 oldval = force_reg (SImode, oldval);
27184 if (!cmpdi_operand (oldval, mode))
27185 oldval = force_reg (mode, oldval);
27189 gcc_unreachable ();
27194 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27195 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27196 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27197 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27199 gcc_unreachable ();
27202 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27204 if (mode == QImode || mode == HImode)
27205 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27207 /* In all cases, we arrange for success to be signaled by Z set.
27208 This arrangement allows for the boolean result to be used directly
27209 in a subsequent branch, post optimization. */
27210 x = gen_rtx_REG (CCmode, CC_REGNUM);
27211 x = gen_rtx_EQ (SImode, x, const0_rtx);
27212 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27215 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27216 another memory store between the load-exclusive and store-exclusive can
27217 reset the monitor from Exclusive to Open state. This means we must wait
27218 until after reload to split the pattern, lest we get a register spill in
27219 the middle of the atomic sequence. */
27222 arm_split_compare_and_swap (rtx operands[])
27224 rtx rval, mem, oldval, newval, scratch;
27225 enum machine_mode mode;
27226 enum memmodel mod_s, mod_f;
27228 rtx label1, label2, x, cond;
27230 rval = operands[0];
27232 oldval = operands[2];
27233 newval = operands[3];
27234 is_weak = (operands[4] != const0_rtx);
27235 mod_s = (enum memmodel) INTVAL (operands[5]);
27236 mod_f = (enum memmodel) INTVAL (operands[6]);
27237 scratch = operands[7];
27238 mode = GET_MODE (mem);
27240 bool use_acquire = TARGET_HAVE_LDACQ
27241 && !(mod_s == MEMMODEL_RELAXED
27242 || mod_s == MEMMODEL_CONSUME
27243 || mod_s == MEMMODEL_RELEASE);
27245 bool use_release = TARGET_HAVE_LDACQ
27246 && !(mod_s == MEMMODEL_RELAXED
27247 || mod_s == MEMMODEL_CONSUME
27248 || mod_s == MEMMODEL_ACQUIRE);
27250 /* Checks whether a barrier is needed and emits one accordingly. */
27251 if (!(use_acquire || use_release))
27252 arm_pre_atomic_barrier (mod_s);
27257 label1 = gen_label_rtx ();
27258 emit_label (label1);
27260 label2 = gen_label_rtx ();
27262 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27264 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27265 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27266 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27267 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27268 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27270 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27272 /* Weak or strong, we want EQ to be true for success, so that we
27273 match the flags that we got from the compare above. */
27274 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27275 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27276 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27280 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27281 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27282 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27283 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27286 if (mod_f != MEMMODEL_RELAXED)
27287 emit_label (label2);
27289 /* Checks whether a barrier is needed and emits one accordingly. */
27290 if (!(use_acquire || use_release))
27291 arm_post_atomic_barrier (mod_s);
27293 if (mod_f == MEMMODEL_RELAXED)
27294 emit_label (label2);
27298 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27299 rtx value, rtx model_rtx, rtx cond)
27301 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27302 enum machine_mode mode = GET_MODE (mem);
27303 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
27306 bool use_acquire = TARGET_HAVE_LDACQ
27307 && !(model == MEMMODEL_RELAXED
27308 || model == MEMMODEL_CONSUME
27309 || model == MEMMODEL_RELEASE);
27311 bool use_release = TARGET_HAVE_LDACQ
27312 && !(model == MEMMODEL_RELAXED
27313 || model == MEMMODEL_CONSUME
27314 || model == MEMMODEL_ACQUIRE);
27316 /* Checks whether a barrier is needed and emits one accordingly. */
27317 if (!(use_acquire || use_release))
27318 arm_pre_atomic_barrier (model);
27320 label = gen_label_rtx ();
27321 emit_label (label);
27324 new_out = gen_lowpart (wmode, new_out);
27326 old_out = gen_lowpart (wmode, old_out);
27329 value = simplify_gen_subreg (wmode, value, mode, 0);
27331 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27340 x = gen_rtx_AND (wmode, old_out, value);
27341 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27342 x = gen_rtx_NOT (wmode, new_out);
27343 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27347 if (CONST_INT_P (value))
27349 value = GEN_INT (-INTVAL (value));
27355 if (mode == DImode)
27357 /* DImode plus/minus need to clobber flags. */
27358 /* The adddi3 and subdi3 patterns are incorrectly written so that
27359 they require matching operands, even when we could easily support
27360 three operands. Thankfully, this can be fixed up post-splitting,
27361 as the individual add+adc patterns do accept three operands and
27362 post-reload cprop can make these moves go away. */
27363 emit_move_insn (new_out, old_out);
27365 x = gen_adddi3 (new_out, new_out, value);
27367 x = gen_subdi3 (new_out, new_out, value);
27374 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27375 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27379 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27382 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27383 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27385 /* Checks whether a barrier is needed and emits one accordingly. */
27386 if (!(use_acquire || use_release))
27387 arm_post_atomic_barrier (model);
27390 #define MAX_VECT_LEN 16
27392 struct expand_vec_perm_d
27394 rtx target, op0, op1;
27395 unsigned char perm[MAX_VECT_LEN];
27396 enum machine_mode vmode;
27397 unsigned char nelt;
27402 /* Generate a variable permutation. */
27405 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27407 enum machine_mode vmode = GET_MODE (target);
27408 bool one_vector_p = rtx_equal_p (op0, op1);
27410 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27411 gcc_checking_assert (GET_MODE (op0) == vmode);
27412 gcc_checking_assert (GET_MODE (op1) == vmode);
27413 gcc_checking_assert (GET_MODE (sel) == vmode);
27414 gcc_checking_assert (TARGET_NEON);
27418 if (vmode == V8QImode)
27419 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27421 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27427 if (vmode == V8QImode)
27429 pair = gen_reg_rtx (V16QImode);
27430 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27431 pair = gen_lowpart (TImode, pair);
27432 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27436 pair = gen_reg_rtx (OImode);
27437 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27438 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27444 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27446 enum machine_mode vmode = GET_MODE (target);
27447 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27448 bool one_vector_p = rtx_equal_p (op0, op1);
27449 rtx rmask[MAX_VECT_LEN], mask;
27451 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27452 numbering of elements for big-endian, we must reverse the order. */
27453 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27455 /* The VTBL instruction does not use a modulo index, so we must take care
27456 of that ourselves. */
27457 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27458 for (i = 0; i < nelt; ++i)
27460 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27461 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27463 arm_expand_vec_perm_1 (target, op0, op1, sel);
27466 /* Generate or test for an insn that supports a constant permutation. */
27468 /* Recognize patterns for the VUZP insns. */
27471 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27473 unsigned int i, odd, mask, nelt = d->nelt;
27474 rtx out0, out1, in0, in1, x;
27475 rtx (*gen)(rtx, rtx, rtx, rtx);
27477 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27480 /* Note that these are little-endian tests. Adjust for big-endian later. */
27481 if (d->perm[0] == 0)
27483 else if (d->perm[0] == 1)
27487 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27489 for (i = 0; i < nelt; i++)
27491 unsigned elt = (i * 2 + odd) & mask;
27492 if (d->perm[i] != elt)
27502 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27503 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27504 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27505 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27506 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27507 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27508 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27509 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27511 gcc_unreachable ();
27516 if (BYTES_BIG_ENDIAN)
27518 x = in0, in0 = in1, in1 = x;
27523 out1 = gen_reg_rtx (d->vmode);
27525 x = out0, out0 = out1, out1 = x;
27527 emit_insn (gen (out0, in0, in1, out1));
27531 /* Recognize patterns for the VZIP insns. */
27534 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27536 unsigned int i, high, mask, nelt = d->nelt;
27537 rtx out0, out1, in0, in1, x;
27538 rtx (*gen)(rtx, rtx, rtx, rtx);
27540 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27543 /* Note that these are little-endian tests. Adjust for big-endian later. */
27545 if (d->perm[0] == high)
27547 else if (d->perm[0] == 0)
27551 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27553 for (i = 0; i < nelt / 2; i++)
27555 unsigned elt = (i + high) & mask;
27556 if (d->perm[i * 2] != elt)
27558 elt = (elt + nelt) & mask;
27559 if (d->perm[i * 2 + 1] != elt)
27569 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27570 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27571 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27572 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27573 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27574 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27575 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27576 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27578 gcc_unreachable ();
27583 if (BYTES_BIG_ENDIAN)
27585 x = in0, in0 = in1, in1 = x;
27590 out1 = gen_reg_rtx (d->vmode);
27592 x = out0, out0 = out1, out1 = x;
27594 emit_insn (gen (out0, in0, in1, out1));
27598 /* Recognize patterns for the VREV insns. */
27601 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27603 unsigned int i, j, diff, nelt = d->nelt;
27604 rtx (*gen)(rtx, rtx, rtx);
27606 if (!d->one_vector_p)
27615 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27616 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27624 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27625 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27626 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27627 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27635 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27636 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27637 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27638 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27639 case V4SImode: gen = gen_neon_vrev64v4si; break;
27640 case V2SImode: gen = gen_neon_vrev64v2si; break;
27641 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27642 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27651 for (i = 0; i < nelt ; i += diff + 1)
27652 for (j = 0; j <= diff; j += 1)
27654 /* This is guaranteed to be true as the value of diff
27655 is 7, 3, 1 and we should have enough elements in the
27656 queue to generate this. Getting a vector mask with a
27657 value of diff other than these values implies that
27658 something is wrong by the time we get here. */
27659 gcc_assert (i + j < nelt);
27660 if (d->perm[i + j] != i + diff - j)
27668 /* ??? The third operand is an artifact of the builtin infrastructure
27669 and is ignored by the actual instruction. */
27670 emit_insn (gen (d->target, d->op0, const0_rtx));
27674 /* Recognize patterns for the VTRN insns. */
27677 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27679 unsigned int i, odd, mask, nelt = d->nelt;
27680 rtx out0, out1, in0, in1, x;
27681 rtx (*gen)(rtx, rtx, rtx, rtx);
27683 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27686 /* Note that these are little-endian tests. Adjust for big-endian later. */
27687 if (d->perm[0] == 0)
27689 else if (d->perm[0] == 1)
27693 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27695 for (i = 0; i < nelt; i += 2)
27697 if (d->perm[i] != i + odd)
27699 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27709 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27710 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27711 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27712 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27713 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27714 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27715 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27716 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27718 gcc_unreachable ();
27723 if (BYTES_BIG_ENDIAN)
27725 x = in0, in0 = in1, in1 = x;
27730 out1 = gen_reg_rtx (d->vmode);
27732 x = out0, out0 = out1, out1 = x;
27734 emit_insn (gen (out0, in0, in1, out1));
27738 /* Recognize patterns for the VEXT insns. */
27741 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27743 unsigned int i, nelt = d->nelt;
27744 rtx (*gen) (rtx, rtx, rtx, rtx);
27747 unsigned int location;
27749 unsigned int next = d->perm[0] + 1;
27751 /* TODO: Handle GCC's numbering of elements for big-endian. */
27752 if (BYTES_BIG_ENDIAN)
27755 /* Check if the extracted indexes are increasing by one. */
27756 for (i = 1; i < nelt; next++, i++)
27758 /* If we hit the most significant element of the 2nd vector in
27759 the previous iteration, no need to test further. */
27760 if (next == 2 * nelt)
27763 /* If we are operating on only one vector: it could be a
27764 rotation. If there are only two elements of size < 64, let
27765 arm_evpc_neon_vrev catch it. */
27766 if (d->one_vector_p && (next == nelt))
27768 if ((nelt == 2) && (d->vmode != V2DImode))
27774 if (d->perm[i] != next)
27778 location = d->perm[0];
27782 case V16QImode: gen = gen_neon_vextv16qi; break;
27783 case V8QImode: gen = gen_neon_vextv8qi; break;
27784 case V4HImode: gen = gen_neon_vextv4hi; break;
27785 case V8HImode: gen = gen_neon_vextv8hi; break;
27786 case V2SImode: gen = gen_neon_vextv2si; break;
27787 case V4SImode: gen = gen_neon_vextv4si; break;
27788 case V2SFmode: gen = gen_neon_vextv2sf; break;
27789 case V4SFmode: gen = gen_neon_vextv4sf; break;
27790 case V2DImode: gen = gen_neon_vextv2di; break;
27799 offset = GEN_INT (location);
27800 emit_insn (gen (d->target, d->op0, d->op1, offset));
27804 /* The NEON VTBL instruction is a fully variable permuation that's even
27805 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27806 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27807 can do slightly better by expanding this as a constant where we don't
27808 have to apply a mask. */
27811 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
27813 rtx rperm[MAX_VECT_LEN], sel;
27814 enum machine_mode vmode = d->vmode;
27815 unsigned int i, nelt = d->nelt;
27817 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27818 numbering of elements for big-endian, we must reverse the order. */
27819 if (BYTES_BIG_ENDIAN)
27825 /* Generic code will try constant permutation twice. Once with the
27826 original mode and again with the elements lowered to QImode.
27827 So wait and don't do the selector expansion ourselves. */
27828 if (vmode != V8QImode && vmode != V16QImode)
27831 for (i = 0; i < nelt; ++i)
27832 rperm[i] = GEN_INT (d->perm[i]);
27833 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
27834 sel = force_reg (vmode, sel);
27836 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
27841 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
27843 /* Check if the input mask matches vext before reordering the
27846 if (arm_evpc_neon_vext (d))
27849 /* The pattern matching functions above are written to look for a small
27850 number to begin the sequence (0, 1, N/2). If we begin with an index
27851 from the second operand, we can swap the operands. */
27852 if (d->perm[0] >= d->nelt)
27854 unsigned i, nelt = d->nelt;
27857 for (i = 0; i < nelt; ++i)
27858 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
27867 if (arm_evpc_neon_vuzp (d))
27869 if (arm_evpc_neon_vzip (d))
27871 if (arm_evpc_neon_vrev (d))
27873 if (arm_evpc_neon_vtrn (d))
27875 return arm_evpc_neon_vtbl (d);
27880 /* Expand a vec_perm_const pattern. */
27883 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
27885 struct expand_vec_perm_d d;
27886 int i, nelt, which;
27892 d.vmode = GET_MODE (target);
27893 gcc_assert (VECTOR_MODE_P (d.vmode));
27894 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27895 d.testing_p = false;
27897 for (i = which = 0; i < nelt; ++i)
27899 rtx e = XVECEXP (sel, 0, i);
27900 int ei = INTVAL (e) & (2 * nelt - 1);
27901 which |= (ei < nelt ? 1 : 2);
27911 d.one_vector_p = false;
27912 if (!rtx_equal_p (op0, op1))
27915 /* The elements of PERM do not suggest that only the first operand
27916 is used, but both operands are identical. Allow easier matching
27917 of the permutation by folding the permutation into the single
27921 for (i = 0; i < nelt; ++i)
27922 d.perm[i] &= nelt - 1;
27924 d.one_vector_p = true;
27929 d.one_vector_p = true;
27933 return arm_expand_vec_perm_const_1 (&d);
27936 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27939 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
27940 const unsigned char *sel)
27942 struct expand_vec_perm_d d;
27943 unsigned int i, nelt, which;
27947 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27948 d.testing_p = true;
27949 memcpy (d.perm, sel, nelt);
27951 /* Categorize the set of elements in the selector. */
27952 for (i = which = 0; i < nelt; ++i)
27954 unsigned char e = d.perm[i];
27955 gcc_assert (e < 2 * nelt);
27956 which |= (e < nelt ? 1 : 2);
27959 /* For all elements from second vector, fold the elements to first. */
27961 for (i = 0; i < nelt; ++i)
27964 /* Check whether the mask can be applied to the vector type. */
27965 d.one_vector_p = (which != 3);
27967 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27968 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27969 if (!d.one_vector_p)
27970 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27973 ret = arm_expand_vec_perm_const_1 (&d);
27980 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
27982 /* If we are soft float and we do not have ldrd
27983 then all auto increment forms are ok. */
27984 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27989 /* Post increment and Pre Decrement are supported for all
27990 instruction forms except for vector forms. */
27993 if (VECTOR_MODE_P (mode))
27995 if (code != ARM_PRE_DEC)
28005 /* Without LDRD and mode size greater than
28006 word size, there is no point in auto-incrementing
28007 because ldm and stm will not have these forms. */
28008 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28011 /* Vector and floating point modes do not support
28012 these auto increment forms. */
28013 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28026 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28027 on ARM, since we know that shifts by negative amounts are no-ops.
28028 Additionally, the default expansion code is not available or suitable
28029 for post-reload insn splits (this can occur when the register allocator
28030 chooses not to do a shift in NEON).
28032 This function is used in both initial expand and post-reload splits, and
28033 handles all kinds of 64-bit shifts.
28035 Input requirements:
28036 - It is safe for the input and output to be the same register, but
28037 early-clobber rules apply for the shift amount and scratch registers.
28038 - Shift by register requires both scratch registers. In all other cases
28039 the scratch registers may be NULL.
28040 - Ashiftrt by a register also clobbers the CC register. */
28042 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28043 rtx amount, rtx scratch1, rtx scratch2)
28045 rtx out_high = gen_highpart (SImode, out);
28046 rtx out_low = gen_lowpart (SImode, out);
28047 rtx in_high = gen_highpart (SImode, in);
28048 rtx in_low = gen_lowpart (SImode, in);
28051 in = the register pair containing the input value.
28052 out = the destination register pair.
28053 up = the high- or low-part of each pair.
28054 down = the opposite part to "up".
28055 In a shift, we can consider bits to shift from "up"-stream to
28056 "down"-stream, so in a left-shift "up" is the low-part and "down"
28057 is the high-part of each register pair. */
28059 rtx out_up = code == ASHIFT ? out_low : out_high;
28060 rtx out_down = code == ASHIFT ? out_high : out_low;
28061 rtx in_up = code == ASHIFT ? in_low : in_high;
28062 rtx in_down = code == ASHIFT ? in_high : in_low;
28064 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28066 && (REG_P (out) || GET_CODE (out) == SUBREG)
28067 && GET_MODE (out) == DImode);
28069 && (REG_P (in) || GET_CODE (in) == SUBREG)
28070 && GET_MODE (in) == DImode);
28072 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28073 && GET_MODE (amount) == SImode)
28074 || CONST_INT_P (amount)));
28075 gcc_assert (scratch1 == NULL
28076 || (GET_CODE (scratch1) == SCRATCH)
28077 || (GET_MODE (scratch1) == SImode
28078 && REG_P (scratch1)));
28079 gcc_assert (scratch2 == NULL
28080 || (GET_CODE (scratch2) == SCRATCH)
28081 || (GET_MODE (scratch2) == SImode
28082 && REG_P (scratch2)));
28083 gcc_assert (!REG_P (out) || !REG_P (amount)
28084 || !HARD_REGISTER_P (out)
28085 || (REGNO (out) != REGNO (amount)
28086 && REGNO (out) + 1 != REGNO (amount)));
28088 /* Macros to make following code more readable. */
28089 #define SUB_32(DEST,SRC) \
28090 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28091 #define RSB_32(DEST,SRC) \
28092 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28093 #define SUB_S_32(DEST,SRC) \
28094 gen_addsi3_compare0 ((DEST), (SRC), \
28096 #define SET(DEST,SRC) \
28097 gen_rtx_SET (SImode, (DEST), (SRC))
28098 #define SHIFT(CODE,SRC,AMOUNT) \
28099 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28100 #define LSHIFT(CODE,SRC,AMOUNT) \
28101 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28102 SImode, (SRC), (AMOUNT))
28103 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28104 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28105 SImode, (SRC), (AMOUNT))
28107 gen_rtx_IOR (SImode, (A), (B))
28108 #define BRANCH(COND,LABEL) \
28109 gen_arm_cond_branch ((LABEL), \
28110 gen_rtx_ ## COND (CCmode, cc_reg, \
28114 /* Shifts by register and shifts by constant are handled separately. */
28115 if (CONST_INT_P (amount))
28117 /* We have a shift-by-constant. */
28119 /* First, handle out-of-range shift amounts.
28120 In both cases we try to match the result an ARM instruction in a
28121 shift-by-register would give. This helps reduce execution
28122 differences between optimization levels, but it won't stop other
28123 parts of the compiler doing different things. This is "undefined
28124 behaviour, in any case. */
28125 if (INTVAL (amount) <= 0)
28126 emit_insn (gen_movdi (out, in));
28127 else if (INTVAL (amount) >= 64)
28129 if (code == ASHIFTRT)
28131 rtx const31_rtx = GEN_INT (31);
28132 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28133 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28136 emit_insn (gen_movdi (out, const0_rtx));
28139 /* Now handle valid shifts. */
28140 else if (INTVAL (amount) < 32)
28142 /* Shifts by a constant less than 32. */
28143 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28145 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28146 emit_insn (SET (out_down,
28147 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28149 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28153 /* Shifts by a constant greater than 31. */
28154 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28156 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28157 if (code == ASHIFTRT)
28158 emit_insn (gen_ashrsi3 (out_up, in_up,
28161 emit_insn (SET (out_up, const0_rtx));
28166 /* We have a shift-by-register. */
28167 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28169 /* This alternative requires the scratch registers. */
28170 gcc_assert (scratch1 && REG_P (scratch1));
28171 gcc_assert (scratch2 && REG_P (scratch2));
28173 /* We will need the values "amount-32" and "32-amount" later.
28174 Swapping them around now allows the later code to be more general. */
28178 emit_insn (SUB_32 (scratch1, amount));
28179 emit_insn (RSB_32 (scratch2, amount));
28182 emit_insn (RSB_32 (scratch1, amount));
28183 /* Also set CC = amount > 32. */
28184 emit_insn (SUB_S_32 (scratch2, amount));
28187 emit_insn (RSB_32 (scratch1, amount));
28188 emit_insn (SUB_32 (scratch2, amount));
28191 gcc_unreachable ();
28194 /* Emit code like this:
28197 out_down = in_down << amount;
28198 out_down = (in_up << (amount - 32)) | out_down;
28199 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28200 out_up = in_up << amount;
28203 out_down = in_down >> amount;
28204 out_down = (in_up << (32 - amount)) | out_down;
28206 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28207 out_up = in_up << amount;
28210 out_down = in_down >> amount;
28211 out_down = (in_up << (32 - amount)) | out_down;
28213 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28214 out_up = in_up << amount;
28216 The ARM and Thumb2 variants are the same but implemented slightly
28217 differently. If this were only called during expand we could just
28218 use the Thumb2 case and let combine do the right thing, but this
28219 can also be called from post-reload splitters. */
28221 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28223 if (!TARGET_THUMB2)
28225 /* Emit code for ARM mode. */
28226 emit_insn (SET (out_down,
28227 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28228 if (code == ASHIFTRT)
28230 rtx done_label = gen_label_rtx ();
28231 emit_jump_insn (BRANCH (LT, done_label));
28232 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28234 emit_label (done_label);
28237 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28242 /* Emit code for Thumb2 mode.
28243 Thumb2 can't do shift and or in one insn. */
28244 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28245 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28247 if (code == ASHIFTRT)
28249 rtx done_label = gen_label_rtx ();
28250 emit_jump_insn (BRANCH (LT, done_label));
28251 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28252 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28253 emit_label (done_label);
28257 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28258 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28262 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28277 /* Returns true if a valid comparison operation and makes
28278 the operands in a form that is valid. */
28280 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28282 enum rtx_code code = GET_CODE (*comparison);
28284 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28285 ? GET_MODE (*op2) : GET_MODE (*op1);
28287 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28289 if (code == UNEQ || code == LTGT)
28292 code_int = (int)code;
28293 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28294 PUT_CODE (*comparison, (enum rtx_code)code_int);
28299 if (!arm_add_operand (*op1, mode))
28300 *op1 = force_reg (mode, *op1);
28301 if (!arm_add_operand (*op2, mode))
28302 *op2 = force_reg (mode, *op2);
28306 if (!cmpdi_operand (*op1, mode))
28307 *op1 = force_reg (mode, *op1);
28308 if (!cmpdi_operand (*op2, mode))
28309 *op2 = force_reg (mode, *op2);
28314 if (!arm_float_compare_operand (*op1, mode))
28315 *op1 = force_reg (mode, *op1);
28316 if (!arm_float_compare_operand (*op2, mode))
28317 *op2 = force_reg (mode, *op2);
28327 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28329 static unsigned HOST_WIDE_INT
28330 arm_asan_shadow_offset (void)
28332 return (unsigned HOST_WIDE_INT) 1 << 29;
28335 #include "gt-arm.h"