1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
41 #include "diagnostic-core.h"
48 #include "target-def.h"
50 #include "langhooks.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 void (*arm_lang_output_object_attributes_hook)(void);
69 /* Forward function declarations. */
70 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
71 static int arm_compute_static_chain_stack_bytes (void);
72 static arm_stack_offsets *arm_get_frame_offsets (void);
73 static void arm_add_gc_roots (void);
74 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
75 HOST_WIDE_INT, rtx, rtx, int, int);
76 static unsigned bit_count (unsigned long);
77 static int arm_address_register_rtx_p (rtx, int);
78 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
79 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
80 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
81 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
82 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
83 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
84 inline static int thumb1_index_register_rtx_p (rtx, int);
85 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
86 static int thumb_far_jump_used_p (void);
87 static bool thumb_force_lr_save (void);
88 static unsigned arm_size_return_regs (void);
89 static bool arm_assemble_integer (rtx, unsigned int, int);
90 static void arm_print_operand (FILE *, rtx, int);
91 static void arm_print_operand_address (FILE *, rtx);
92 static bool arm_print_operand_punct_valid_p (unsigned char code);
93 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
94 static arm_cc get_arm_condition_code (rtx);
95 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
96 static rtx is_jump_table (rtx);
97 static const char *output_multi_immediate (rtx *, const char *, const char *,
99 static const char *shift_op (rtx, HOST_WIDE_INT *);
100 static struct machine_function *arm_init_machine_status (void);
101 static void thumb_exit (FILE *, int);
102 static rtx is_jump_table (rtx);
103 static HOST_WIDE_INT get_jump_table_size (rtx);
104 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_forward_ref (Mfix *);
106 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_backward_ref (Mfix *);
108 static void assign_minipool_offsets (Mfix *);
109 static void arm_print_value (FILE *, rtx);
110 static void dump_minipool (rtx);
111 static int arm_barrier_cost (rtx);
112 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
113 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
114 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
116 static void arm_reorg (void);
117 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
118 static unsigned long arm_compute_save_reg0_reg12_mask (void);
119 static unsigned long arm_compute_save_reg_mask (void);
120 static unsigned long arm_isr_value (tree);
121 static unsigned long arm_compute_func_type (void);
122 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
125 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
126 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
128 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
129 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
134 static int optimal_immediate_sequence (enum rtx_code code,
135 unsigned HOST_WIDE_INT val,
136 struct four_ints *return_sequence);
137 static int optimal_immediate_sequence_1 (enum rtx_code code,
138 unsigned HOST_WIDE_INT val,
139 struct four_ints *return_sequence,
141 static int arm_get_strip_length (int);
142 static bool arm_function_ok_for_sibcall (tree, tree);
143 static enum machine_mode arm_promote_function_mode (const_tree,
144 enum machine_mode, int *,
146 static bool arm_return_in_memory (const_tree, const_tree);
147 static rtx arm_function_value (const_tree, const_tree, bool);
148 static rtx arm_libcall_value_1 (enum machine_mode);
149 static rtx arm_libcall_value (enum machine_mode, const_rtx);
150 static bool arm_function_value_regno_p (const unsigned int);
151 static void arm_internal_label (FILE *, const char *, unsigned long);
152 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 static bool arm_have_conditional_execution (void);
155 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
156 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
157 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
158 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
159 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
164 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
165 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
166 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
167 static void arm_init_builtins (void);
168 static void arm_init_iwmmxt_builtins (void);
169 static rtx safe_vector_operand (rtx, enum machine_mode);
170 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
171 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
172 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
173 static tree arm_builtin_decl (unsigned, bool);
174 static void emit_constant_insn (rtx cond, rtx pattern);
175 static rtx emit_set_insn (rtx, rtx);
176 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
183 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 static rtx aapcs_libcall_value (enum machine_mode);
186 static int aapcs_select_return_coproc (const_tree, const_tree);
188 #ifdef OBJECT_FORMAT_ELF
189 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
190 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_encode_section_info (tree, rtx, int);
196 static void arm_file_end (void);
197 static void arm_file_start (void);
199 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 static bool arm_pass_by_reference (cumulative_args_t,
202 enum machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
210 static void arm_unwind_emit (FILE *, rtx);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 static void arm_asm_init_sections (void);
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
232 static bool arm_cannot_copy_insn_p (rtx);
233 static bool arm_tls_symbol_p (rtx x);
234 static int arm_issue_rate (void);
235 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
236 static bool arm_output_addr_const_extra (FILE *, rtx);
237 static bool arm_allocate_stack_slots_for_args (void);
238 static bool arm_warn_func_return (tree);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
258 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
259 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
263 static void arm_conditional_register_usage (void);
264 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
265 static unsigned int arm_autovectorize_vector_sizes (void);
266 static int arm_default_branch_cost (bool, bool);
267 static int arm_cortex_a5_branch_cost (bool, bool);
269 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
270 const unsigned char *sel);
272 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
274 int misalign ATTRIBUTE_UNUSED);
275 static unsigned arm_add_stmt_cost (void *data, int count,
276 enum vect_cost_for_stmt kind,
277 struct _stmt_vec_info *stmt_info,
279 enum vect_cost_model_location where);
281 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
282 bool op0_preserve_value);
284 /* Table of machine attributes. */
285 static const struct attribute_spec arm_attribute_table[] =
287 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
288 affects_type_identity } */
289 /* Function calls made to this symbol must be done indirectly, because
290 it may lie outside of the 26 bit addressing range of a normal function
292 { "long_call", 0, 0, false, true, true, NULL, false },
293 /* Whereas these functions are always known to reside within the 26 bit
295 { "short_call", 0, 0, false, true, true, NULL, false },
296 /* Specify the procedure call conventions for a function. */
297 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
299 /* Interrupt Service Routines have special prologue and epilogue requirements. */
300 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
302 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
304 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
307 /* ARM/PE has three new attributes:
309 dllexport - for exporting a function/variable that will live in a dll
310 dllimport - for importing a function/variable from a dll
312 Microsoft allows multiple declspecs in one __declspec, separating
313 them with spaces. We do NOT support this. Instead, use __declspec
316 { "dllimport", 0, 0, true, false, false, NULL, false },
317 { "dllexport", 0, 0, true, false, false, NULL, false },
318 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
320 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
322 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
323 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
326 { NULL, 0, 0, false, false, false, NULL, false }
329 /* Initialize the GCC target structure. */
330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 #undef TARGET_MERGE_DECL_ATTRIBUTES
332 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
338 #undef TARGET_ATTRIBUTE_TABLE
339 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
341 #undef TARGET_ASM_FILE_START
342 #define TARGET_ASM_FILE_START arm_file_start
343 #undef TARGET_ASM_FILE_END
344 #define TARGET_ASM_FILE_END arm_file_end
346 #undef TARGET_ASM_ALIGNED_SI_OP
347 #define TARGET_ASM_ALIGNED_SI_OP NULL
348 #undef TARGET_ASM_INTEGER
349 #define TARGET_ASM_INTEGER arm_assemble_integer
351 #undef TARGET_PRINT_OPERAND
352 #define TARGET_PRINT_OPERAND arm_print_operand
353 #undef TARGET_PRINT_OPERAND_ADDRESS
354 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
355 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
356 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
358 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
359 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
361 #undef TARGET_ASM_FUNCTION_PROLOGUE
362 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
364 #undef TARGET_ASM_FUNCTION_EPILOGUE
365 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE arm_option_override
370 #undef TARGET_COMP_TYPE_ATTRIBUTES
371 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
373 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
374 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
379 #undef TARGET_SCHED_REORDER
380 #define TARGET_SCHED_REORDER arm_sched_reorder
382 #undef TARGET_REGISTER_MOVE_COST
383 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
385 #undef TARGET_MEMORY_MOVE_COST
386 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
388 #undef TARGET_ENCODE_SECTION_INFO
390 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
392 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
395 #undef TARGET_STRIP_NAME_ENCODING
396 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
398 #undef TARGET_ASM_INTERNAL_LABEL
399 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
404 #undef TARGET_FUNCTION_VALUE
405 #define TARGET_FUNCTION_VALUE arm_function_value
407 #undef TARGET_LIBCALL_VALUE
408 #define TARGET_LIBCALL_VALUE arm_libcall_value
410 #undef TARGET_FUNCTION_VALUE_REGNO_P
411 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
413 #undef TARGET_ASM_OUTPUT_MI_THUNK
414 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
415 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
416 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
418 #undef TARGET_RTX_COSTS
419 #define TARGET_RTX_COSTS arm_rtx_costs
420 #undef TARGET_ADDRESS_COST
421 #define TARGET_ADDRESS_COST arm_address_cost
423 #undef TARGET_SHIFT_TRUNCATION_MASK
424 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
425 #undef TARGET_VECTOR_MODE_SUPPORTED_P
426 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
427 #undef TARGET_ARRAY_MODE_SUPPORTED_P
428 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
429 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
430 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
431 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
432 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
433 arm_autovectorize_vector_sizes
435 #undef TARGET_MACHINE_DEPENDENT_REORG
436 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
438 #undef TARGET_INIT_BUILTINS
439 #define TARGET_INIT_BUILTINS arm_init_builtins
440 #undef TARGET_EXPAND_BUILTIN
441 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
442 #undef TARGET_BUILTIN_DECL
443 #define TARGET_BUILTIN_DECL arm_builtin_decl
445 #undef TARGET_INIT_LIBFUNCS
446 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
448 #undef TARGET_PROMOTE_FUNCTION_MODE
449 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
450 #undef TARGET_PROMOTE_PROTOTYPES
451 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
452 #undef TARGET_PASS_BY_REFERENCE
453 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
454 #undef TARGET_ARG_PARTIAL_BYTES
455 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
456 #undef TARGET_FUNCTION_ARG
457 #define TARGET_FUNCTION_ARG arm_function_arg
458 #undef TARGET_FUNCTION_ARG_ADVANCE
459 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
460 #undef TARGET_FUNCTION_ARG_BOUNDARY
461 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
463 #undef TARGET_SETUP_INCOMING_VARARGS
464 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
466 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
467 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
469 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
470 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
471 #undef TARGET_TRAMPOLINE_INIT
472 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
473 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
474 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
476 #undef TARGET_WARN_FUNC_RETURN
477 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
479 #undef TARGET_DEFAULT_SHORT_ENUMS
480 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
482 #undef TARGET_ALIGN_ANON_BITFIELD
483 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
485 #undef TARGET_NARROW_VOLATILE_BITFIELD
486 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
488 #undef TARGET_CXX_GUARD_TYPE
489 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
491 #undef TARGET_CXX_GUARD_MASK_BIT
492 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
494 #undef TARGET_CXX_GET_COOKIE_SIZE
495 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
497 #undef TARGET_CXX_COOKIE_HAS_SIZE
498 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
500 #undef TARGET_CXX_CDTOR_RETURNS_THIS
501 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
503 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
504 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
506 #undef TARGET_CXX_USE_AEABI_ATEXIT
507 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
509 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
510 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
511 arm_cxx_determine_class_data_visibility
513 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
514 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
516 #undef TARGET_RETURN_IN_MSB
517 #define TARGET_RETURN_IN_MSB arm_return_in_msb
519 #undef TARGET_RETURN_IN_MEMORY
520 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
522 #undef TARGET_MUST_PASS_IN_STACK
523 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
526 #undef TARGET_ASM_UNWIND_EMIT
527 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
529 /* EABI unwinding tables use a different format for the typeinfo tables. */
530 #undef TARGET_ASM_TTYPE
531 #define TARGET_ASM_TTYPE arm_output_ttype
533 #undef TARGET_ARM_EABI_UNWINDER
534 #define TARGET_ARM_EABI_UNWINDER true
536 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
537 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
539 #undef TARGET_ASM_INIT_SECTIONS
540 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
541 #endif /* ARM_UNWIND_INFO */
543 #undef TARGET_DWARF_REGISTER_SPAN
544 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
546 #undef TARGET_CANNOT_COPY_INSN_P
547 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
550 #undef TARGET_HAVE_TLS
551 #define TARGET_HAVE_TLS true
554 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
555 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
557 #undef TARGET_LEGITIMATE_CONSTANT_P
558 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
560 #undef TARGET_CANNOT_FORCE_CONST_MEM
561 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
563 #undef TARGET_MAX_ANCHOR_OFFSET
564 #define TARGET_MAX_ANCHOR_OFFSET 4095
566 /* The minimum is set such that the total size of the block
567 for a particular anchor is -4088 + 1 + 4095 bytes, which is
568 divisible by eight, ensuring natural spacing of anchors. */
569 #undef TARGET_MIN_ANCHOR_OFFSET
570 #define TARGET_MIN_ANCHOR_OFFSET -4088
572 #undef TARGET_SCHED_ISSUE_RATE
573 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
575 #undef TARGET_MANGLE_TYPE
576 #define TARGET_MANGLE_TYPE arm_mangle_type
578 #undef TARGET_BUILD_BUILTIN_VA_LIST
579 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
580 #undef TARGET_EXPAND_BUILTIN_VA_START
581 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
583 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
586 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
587 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
590 #undef TARGET_LEGITIMATE_ADDRESS_P
591 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
593 #undef TARGET_PREFERRED_RELOAD_CLASS
594 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
596 #undef TARGET_INVALID_PARAMETER_TYPE
597 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
599 #undef TARGET_INVALID_RETURN_TYPE
600 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
602 #undef TARGET_PROMOTED_TYPE
603 #define TARGET_PROMOTED_TYPE arm_promoted_type
605 #undef TARGET_CONVERT_TO_TYPE
606 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
608 #undef TARGET_SCALAR_MODE_SUPPORTED_P
609 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
611 #undef TARGET_FRAME_POINTER_REQUIRED
612 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
614 #undef TARGET_CAN_ELIMINATE
615 #define TARGET_CAN_ELIMINATE arm_can_eliminate
617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
618 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
620 #undef TARGET_CLASS_LIKELY_SPILLED_P
621 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
623 #undef TARGET_VECTOR_ALIGNMENT
624 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
626 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
627 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
628 arm_vector_alignment_reachable
630 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
631 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
632 arm_builtin_support_vector_misalignment
634 #undef TARGET_PREFERRED_RENAME_CLASS
635 #define TARGET_PREFERRED_RENAME_CLASS \
636 arm_preferred_rename_class
638 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
639 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
640 arm_vectorize_vec_perm_const_ok
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
644 arm_builtin_vectorization_cost
645 #undef TARGET_VECTORIZE_ADD_STMT_COST
646 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
648 #undef TARGET_CANONICALIZE_COMPARISON
649 #define TARGET_CANONICALIZE_COMPARISON \
650 arm_canonicalize_comparison
652 struct gcc_target targetm = TARGET_INITIALIZER;
654 /* Obstack for minipool constant handling. */
655 static struct obstack minipool_obstack;
656 static char * minipool_startobj;
658 /* The maximum number of insns skipped which
659 will be conditionalised if possible. */
660 static int max_insns_skipped = 5;
662 extern FILE * asm_out_file;
664 /* True if we are currently building a constant table. */
665 int making_const_table;
667 /* The processor for which instructions should be scheduled. */
668 enum processor_type arm_tune = arm_none;
670 /* The current tuning set. */
671 const struct tune_params *current_tune;
673 /* Which floating point hardware to schedule for. */
676 /* Which floating popint hardware to use. */
677 const struct arm_fpu_desc *arm_fpu_desc;
679 /* Used for Thumb call_via trampolines. */
680 rtx thumb_call_via_label[14];
681 static int thumb_call_reg_needed;
683 /* Bit values used to identify processor capabilities. */
684 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
685 #define FL_ARCH3M (1 << 1) /* Extended multiply */
686 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
687 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
688 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
689 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
690 #define FL_THUMB (1 << 6) /* Thumb aware */
691 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
692 #define FL_STRONG (1 << 8) /* StrongARM */
693 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
694 #define FL_XSCALE (1 << 10) /* XScale */
695 /* spare (1 << 11) */
696 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
697 media instructions. */
698 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
699 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
700 Note: ARM6 & 7 derivatives only. */
701 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
702 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
703 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
705 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
706 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
707 #define FL_NEON (1 << 20) /* Neon instructions. */
708 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
710 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
711 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
712 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
714 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
715 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
717 /* Flags that only effect tuning, not available instructions. */
718 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
721 #define FL_FOR_ARCH2 FL_NOTM
722 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
723 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
724 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
725 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
726 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
727 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
728 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
729 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
730 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
731 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
732 #define FL_FOR_ARCH6J FL_FOR_ARCH6
733 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
734 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
735 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
736 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
737 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
738 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
739 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
740 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
741 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
742 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
743 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
744 | FL_ARM_DIV | FL_NOTM)
746 /* The bits in this mask specify which
747 instructions we are allowed to generate. */
748 static unsigned long insn_flags = 0;
750 /* The bits in this mask specify which instruction scheduling options should
752 static unsigned long tune_flags = 0;
754 /* The highest ARM architecture version supported by the
756 enum base_architecture arm_base_arch = BASE_ARCH_0;
758 /* The following are used in the arm.md file as equivalents to bits
759 in the above two flag variables. */
761 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
764 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
767 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
770 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
773 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
776 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
779 /* Nonzero if this chip supports the ARM 6K extensions. */
782 /* Nonzero if instructions present in ARMv6-M can be used. */
785 /* Nonzero if this chip supports the ARM 7 extensions. */
788 /* Nonzero if instructions not present in the 'M' profile can be used. */
789 int arm_arch_notm = 0;
791 /* Nonzero if instructions present in ARMv7E-M can be used. */
794 /* Nonzero if instructions present in ARMv8 can be used. */
797 /* Nonzero if this chip can benefit from load scheduling. */
798 int arm_ld_sched = 0;
800 /* Nonzero if this chip is a StrongARM. */
801 int arm_tune_strongarm = 0;
803 /* Nonzero if this chip supports Intel Wireless MMX technology. */
804 int arm_arch_iwmmxt = 0;
806 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
807 int arm_arch_iwmmxt2 = 0;
809 /* Nonzero if this chip is an XScale. */
810 int arm_arch_xscale = 0;
812 /* Nonzero if tuning for XScale */
813 int arm_tune_xscale = 0;
815 /* Nonzero if we want to tune for stores that access the write-buffer.
816 This typically means an ARM6 or ARM7 with MMU or MPU. */
817 int arm_tune_wbuf = 0;
819 /* Nonzero if tuning for Cortex-A9. */
820 int arm_tune_cortex_a9 = 0;
822 /* Nonzero if generating Thumb instructions. */
825 /* Nonzero if generating Thumb-1 instructions. */
828 /* Nonzero if we should define __THUMB_INTERWORK__ in the
830 XXX This is a bit of a hack, it's intended to help work around
831 problems in GLD which doesn't understand that armv5t code is
832 interworking clean. */
833 int arm_cpp_interwork = 0;
835 /* Nonzero if chip supports Thumb 2. */
838 /* Nonzero if chip supports integer division instruction. */
839 int arm_arch_arm_hwdiv;
840 int arm_arch_thumb_hwdiv;
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 enum machine_mode output_memory_reference_mode;
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register = INVALID_REGNUM;
850 /* Set to 1 after arm_reorg has started. Reset to start at the start of
851 the next function. */
852 static int after_arm_reorg = 0;
854 enum arm_pcs arm_pcs_default;
856 /* For an explanation of these variables, see final_prescan_insn below. */
858 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
859 enum arm_cond_code arm_current_cc;
862 int arm_target_label;
863 /* The number of conditionally executed insns, including the current insn. */
864 int arm_condexec_count = 0;
865 /* A bitmask specifying the patterns for the IT block.
866 Zero means do not output an IT block before this insn. */
867 int arm_condexec_mask = 0;
868 /* The number of bits used in arm_condexec_mask. */
869 int arm_condexec_masklen = 0;
871 /* The condition codes of the ARM, and the inverse function. */
872 static const char * const arm_condition_codes[] =
874 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
875 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
878 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
879 int arm_regs_in_sequence[] =
881 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
884 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
885 #define streq(string1, string2) (strcmp (string1, string2) == 0)
887 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
888 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
889 | (1 << PIC_OFFSET_TABLE_REGNUM)))
891 /* Initialization code. */
895 const char *const name;
896 enum processor_type core;
898 enum base_architecture base_arch;
899 const unsigned long flags;
900 const struct tune_params *const tune;
904 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
905 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
910 /* arm generic vectorizer costs. */
912 struct cpu_vec_costs arm_default_vec_cost = {
913 1, /* scalar_stmt_cost. */
914 1, /* scalar load_cost. */
915 1, /* scalar_store_cost. */
916 1, /* vec_stmt_cost. */
917 1, /* vec_to_scalar_cost. */
918 1, /* scalar_to_vec_cost. */
919 1, /* vec_align_load_cost. */
920 1, /* vec_unalign_load_cost. */
921 1, /* vec_unalign_store_cost. */
922 1, /* vec_store_cost. */
923 3, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 const struct tune_params arm_slowmul_tune =
929 arm_slowmul_rtx_costs,
931 3, /* Constant limit. */
932 5, /* Max cond insns. */
933 ARM_PREFETCH_NOT_BENEFICIAL,
934 true, /* Prefer constant pool. */
935 arm_default_branch_cost,
936 false, /* Prefer LDRD/STRD. */
937 {true, true}, /* Prefer non short circuit. */
938 &arm_default_vec_cost, /* Vectorizer costs. */
941 const struct tune_params arm_fastmul_tune =
943 arm_fastmul_rtx_costs,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 true, /* Prefer constant pool. */
949 arm_default_branch_cost,
950 false, /* Prefer LDRD/STRD. */
951 {true, true}, /* Prefer non short circuit. */
952 &arm_default_vec_cost, /* Vectorizer costs. */
955 /* StrongARM has early execution of branches, so a sequence that is worth
956 skipping is shorter. Set max_insns_skipped to a lower value. */
958 const struct tune_params arm_strongarm_tune =
960 arm_fastmul_rtx_costs,
962 1, /* Constant limit. */
963 3, /* Max cond insns. */
964 ARM_PREFETCH_NOT_BENEFICIAL,
965 true, /* Prefer constant pool. */
966 arm_default_branch_cost,
967 false, /* Prefer LDRD/STRD. */
968 {true, true}, /* Prefer non short circuit. */
969 &arm_default_vec_cost, /* Vectorizer costs. */
972 const struct tune_params arm_xscale_tune =
974 arm_xscale_rtx_costs,
975 xscale_sched_adjust_cost,
976 2, /* Constant limit. */
977 3, /* Max cond insns. */
978 ARM_PREFETCH_NOT_BENEFICIAL,
979 true, /* Prefer constant pool. */
980 arm_default_branch_cost,
981 false, /* Prefer LDRD/STRD. */
982 {true, true}, /* Prefer non short circuit. */
983 &arm_default_vec_cost, /* Vectorizer costs. */
986 const struct tune_params arm_9e_tune =
990 1, /* Constant limit. */
991 5, /* Max cond insns. */
992 ARM_PREFETCH_NOT_BENEFICIAL,
993 true, /* Prefer constant pool. */
994 arm_default_branch_cost,
995 false, /* Prefer LDRD/STRD. */
996 {true, true}, /* Prefer non short circuit. */
997 &arm_default_vec_cost, /* Vectorizer costs. */
1000 const struct tune_params arm_v6t2_tune =
1004 1, /* Constant limit. */
1005 5, /* Max cond insns. */
1006 ARM_PREFETCH_NOT_BENEFICIAL,
1007 false, /* Prefer constant pool. */
1008 arm_default_branch_cost,
1009 false, /* Prefer LDRD/STRD. */
1010 {true, true}, /* Prefer non short circuit. */
1011 &arm_default_vec_cost, /* Vectorizer costs. */
1014 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1015 const struct tune_params arm_cortex_tune =
1019 1, /* Constant limit. */
1020 5, /* Max cond insns. */
1021 ARM_PREFETCH_NOT_BENEFICIAL,
1022 false, /* Prefer constant pool. */
1023 arm_default_branch_cost,
1024 false, /* Prefer LDRD/STRD. */
1025 {true, true}, /* Prefer non short circuit. */
1026 &arm_default_vec_cost, /* Vectorizer costs. */
1029 const struct tune_params arm_cortex_a15_tune =
1033 1, /* Constant limit. */
1034 5, /* Max cond insns. */
1035 ARM_PREFETCH_NOT_BENEFICIAL,
1036 false, /* Prefer constant pool. */
1037 arm_default_branch_cost,
1038 true, /* Prefer LDRD/STRD. */
1039 {true, true}, /* Prefer non short circuit. */
1040 &arm_default_vec_cost, /* Vectorizer costs. */
1043 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1044 less appealing. Set max_insns_skipped to a low value. */
1046 const struct tune_params arm_cortex_a5_tune =
1050 1, /* Constant limit. */
1051 1, /* Max cond insns. */
1052 ARM_PREFETCH_NOT_BENEFICIAL,
1053 false, /* Prefer constant pool. */
1054 arm_cortex_a5_branch_cost,
1055 false, /* Prefer LDRD/STRD. */
1056 {false, false}, /* Prefer non short circuit. */
1057 &arm_default_vec_cost, /* Vectorizer costs. */
1060 const struct tune_params arm_cortex_a9_tune =
1063 cortex_a9_sched_adjust_cost,
1064 1, /* Constant limit. */
1065 5, /* Max cond insns. */
1066 ARM_PREFETCH_BENEFICIAL(4,32,32),
1067 false, /* Prefer constant pool. */
1068 arm_default_branch_cost,
1069 false, /* Prefer LDRD/STRD. */
1070 {true, true}, /* Prefer non short circuit. */
1071 &arm_default_vec_cost, /* Vectorizer costs. */
1074 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1075 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1076 const struct tune_params arm_v6m_tune =
1080 1, /* Constant limit. */
1081 5, /* Max cond insns. */
1082 ARM_PREFETCH_NOT_BENEFICIAL,
1083 false, /* Prefer constant pool. */
1084 arm_default_branch_cost,
1085 false, /* Prefer LDRD/STRD. */
1086 {false, false}, /* Prefer non short circuit. */
1087 &arm_default_vec_cost, /* Vectorizer costs. */
1090 const struct tune_params arm_fa726te_tune =
1093 fa726te_sched_adjust_cost,
1094 1, /* Constant limit. */
1095 5, /* Max cond insns. */
1096 ARM_PREFETCH_NOT_BENEFICIAL,
1097 true, /* Prefer constant pool. */
1098 arm_default_branch_cost,
1099 false, /* Prefer LDRD/STRD. */
1100 {true, true}, /* Prefer non short circuit. */
1101 &arm_default_vec_cost, /* Vectorizer costs. */
1105 /* Not all of these give usefully different compilation alternatives,
1106 but there is no simple way of generalizing them. */
1107 static const struct processors all_cores[] =
1110 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1111 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1112 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1113 #include "arm-cores.def"
1115 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1118 static const struct processors all_architectures[] =
1120 /* ARM Architectures */
1121 /* We don't specify tuning costs here as it will be figured out
1124 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1125 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1126 #include "arm-arches.def"
1128 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1132 /* These are populated as commandline arguments are processed, or NULL
1133 if not specified. */
1134 static const struct processors *arm_selected_arch;
1135 static const struct processors *arm_selected_cpu;
1136 static const struct processors *arm_selected_tune;
1138 /* The name of the preprocessor macro to define for this architecture. */
1140 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1142 /* Available values for -mfpu=. */
1144 static const struct arm_fpu_desc all_fpus[] =
1146 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1147 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1148 #include "arm-fpus.def"
1153 /* Supported TLS relocations. */
1161 TLS_DESCSEQ /* GNU scheme */
1164 /* The maximum number of insns to be used when loading a constant. */
1166 arm_constant_limit (bool size_p)
1168 return size_p ? 1 : current_tune->constant_limit;
1171 /* Emit an insn that's a simple single-set. Both the operands must be known
1174 emit_set_insn (rtx x, rtx y)
1176 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1179 /* Return the number of bits set in VALUE. */
1181 bit_count (unsigned long value)
1183 unsigned long count = 0;
1188 value &= value - 1; /* Clear the least-significant set bit. */
1196 enum machine_mode mode;
1198 } arm_fixed_mode_set;
1200 /* A small helper for setting fixed-point library libfuncs. */
1203 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1204 const char *funcname, const char *modename,
1209 if (num_suffix == 0)
1210 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1212 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1214 set_optab_libfunc (optable, mode, buffer);
1218 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1219 enum machine_mode from, const char *funcname,
1220 const char *toname, const char *fromname)
1223 const char *maybe_suffix_2 = "";
1225 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1226 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1227 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1228 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1229 maybe_suffix_2 = "2";
1231 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1234 set_conv_libfunc (optable, to, from, buffer);
1237 /* Set up library functions unique to ARM. */
1240 arm_init_libfuncs (void)
1242 /* For Linux, we have access to kernel support for atomic operations. */
1243 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1244 init_sync_libfuncs (2 * UNITS_PER_WORD);
1246 /* There are no special library functions unless we are using the
1251 /* The functions below are described in Section 4 of the "Run-Time
1252 ABI for the ARM architecture", Version 1.0. */
1254 /* Double-precision floating-point arithmetic. Table 2. */
1255 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1256 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1257 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1258 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1259 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1261 /* Double-precision comparisons. Table 3. */
1262 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1263 set_optab_libfunc (ne_optab, DFmode, NULL);
1264 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1265 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1266 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1267 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1268 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1270 /* Single-precision floating-point arithmetic. Table 4. */
1271 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1272 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1273 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1274 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1275 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1277 /* Single-precision comparisons. Table 5. */
1278 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1279 set_optab_libfunc (ne_optab, SFmode, NULL);
1280 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1281 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1282 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1283 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1284 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1286 /* Floating-point to integer conversions. Table 6. */
1287 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1288 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1289 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1290 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1291 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1292 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1293 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1294 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1296 /* Conversions between floating types. Table 7. */
1297 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1298 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1300 /* Integer to floating-point conversions. Table 8. */
1301 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1302 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1303 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1304 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1305 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1306 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1307 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1308 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1310 /* Long long. Table 9. */
1311 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1312 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1313 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1314 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1315 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1316 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1317 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1318 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1320 /* Integer (32/32->32) division. \S 4.3.1. */
1321 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1322 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1324 /* The divmod functions are designed so that they can be used for
1325 plain division, even though they return both the quotient and the
1326 remainder. The quotient is returned in the usual location (i.e.,
1327 r0 for SImode, {r0, r1} for DImode), just as would be expected
1328 for an ordinary division routine. Because the AAPCS calling
1329 conventions specify that all of { r0, r1, r2, r3 } are
1330 callee-saved registers, there is no need to tell the compiler
1331 explicitly that those registers are clobbered by these
1333 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1334 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1336 /* For SImode division the ABI provides div-without-mod routines,
1337 which are faster. */
1338 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1339 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1341 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1342 divmod libcalls instead. */
1343 set_optab_libfunc (smod_optab, DImode, NULL);
1344 set_optab_libfunc (umod_optab, DImode, NULL);
1345 set_optab_libfunc (smod_optab, SImode, NULL);
1346 set_optab_libfunc (umod_optab, SImode, NULL);
1348 /* Half-precision float operations. The compiler handles all operations
1349 with NULL libfuncs by converting the SFmode. */
1350 switch (arm_fp16_format)
1352 case ARM_FP16_FORMAT_IEEE:
1353 case ARM_FP16_FORMAT_ALTERNATIVE:
1356 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1357 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1359 : "__gnu_f2h_alternative"));
1360 set_conv_libfunc (sext_optab, SFmode, HFmode,
1361 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1363 : "__gnu_h2f_alternative"));
1366 set_optab_libfunc (add_optab, HFmode, NULL);
1367 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1368 set_optab_libfunc (smul_optab, HFmode, NULL);
1369 set_optab_libfunc (neg_optab, HFmode, NULL);
1370 set_optab_libfunc (sub_optab, HFmode, NULL);
1373 set_optab_libfunc (eq_optab, HFmode, NULL);
1374 set_optab_libfunc (ne_optab, HFmode, NULL);
1375 set_optab_libfunc (lt_optab, HFmode, NULL);
1376 set_optab_libfunc (le_optab, HFmode, NULL);
1377 set_optab_libfunc (ge_optab, HFmode, NULL);
1378 set_optab_libfunc (gt_optab, HFmode, NULL);
1379 set_optab_libfunc (unord_optab, HFmode, NULL);
1386 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1388 const arm_fixed_mode_set fixed_arith_modes[] =
1409 const arm_fixed_mode_set fixed_conv_modes[] =
1439 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1441 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1442 "add", fixed_arith_modes[i].name, 3);
1443 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1444 "ssadd", fixed_arith_modes[i].name, 3);
1445 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1446 "usadd", fixed_arith_modes[i].name, 3);
1447 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1448 "sub", fixed_arith_modes[i].name, 3);
1449 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1450 "sssub", fixed_arith_modes[i].name, 3);
1451 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1452 "ussub", fixed_arith_modes[i].name, 3);
1453 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1454 "mul", fixed_arith_modes[i].name, 3);
1455 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1456 "ssmul", fixed_arith_modes[i].name, 3);
1457 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1458 "usmul", fixed_arith_modes[i].name, 3);
1459 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1460 "div", fixed_arith_modes[i].name, 3);
1461 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1462 "udiv", fixed_arith_modes[i].name, 3);
1463 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1464 "ssdiv", fixed_arith_modes[i].name, 3);
1465 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1466 "usdiv", fixed_arith_modes[i].name, 3);
1467 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1468 "neg", fixed_arith_modes[i].name, 2);
1469 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1470 "ssneg", fixed_arith_modes[i].name, 2);
1471 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1472 "usneg", fixed_arith_modes[i].name, 2);
1473 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1474 "ashl", fixed_arith_modes[i].name, 3);
1475 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1476 "ashr", fixed_arith_modes[i].name, 3);
1477 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1478 "lshr", fixed_arith_modes[i].name, 3);
1479 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1480 "ssashl", fixed_arith_modes[i].name, 3);
1481 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1482 "usashl", fixed_arith_modes[i].name, 3);
1483 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1484 "cmp", fixed_arith_modes[i].name, 2);
1487 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1488 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1491 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1492 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1495 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1496 fixed_conv_modes[j].mode, "fract",
1497 fixed_conv_modes[i].name,
1498 fixed_conv_modes[j].name);
1499 arm_set_fixed_conv_libfunc (satfract_optab,
1500 fixed_conv_modes[i].mode,
1501 fixed_conv_modes[j].mode, "satfract",
1502 fixed_conv_modes[i].name,
1503 fixed_conv_modes[j].name);
1504 arm_set_fixed_conv_libfunc (fractuns_optab,
1505 fixed_conv_modes[i].mode,
1506 fixed_conv_modes[j].mode, "fractuns",
1507 fixed_conv_modes[i].name,
1508 fixed_conv_modes[j].name);
1509 arm_set_fixed_conv_libfunc (satfractuns_optab,
1510 fixed_conv_modes[i].mode,
1511 fixed_conv_modes[j].mode, "satfractuns",
1512 fixed_conv_modes[i].name,
1513 fixed_conv_modes[j].name);
1517 if (TARGET_AAPCS_BASED)
1518 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1521 /* On AAPCS systems, this is the "struct __va_list". */
1522 static GTY(()) tree va_list_type;
1524 /* Return the type to use as __builtin_va_list. */
1526 arm_build_builtin_va_list (void)
1531 if (!TARGET_AAPCS_BASED)
1532 return std_build_builtin_va_list ();
1534 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1542 The C Library ABI further reinforces this definition in \S
1545 We must follow this definition exactly. The structure tag
1546 name is visible in C++ mangled names, and thus forms a part
1547 of the ABI. The field name may be used by people who
1548 #include <stdarg.h>. */
1549 /* Create the type. */
1550 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1551 /* Give it the required name. */
1552 va_list_name = build_decl (BUILTINS_LOCATION,
1554 get_identifier ("__va_list"),
1556 DECL_ARTIFICIAL (va_list_name) = 1;
1557 TYPE_NAME (va_list_type) = va_list_name;
1558 TYPE_STUB_DECL (va_list_type) = va_list_name;
1559 /* Create the __ap field. */
1560 ap_field = build_decl (BUILTINS_LOCATION,
1562 get_identifier ("__ap"),
1564 DECL_ARTIFICIAL (ap_field) = 1;
1565 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1566 TYPE_FIELDS (va_list_type) = ap_field;
1567 /* Compute its layout. */
1568 layout_type (va_list_type);
1570 return va_list_type;
1573 /* Return an expression of type "void *" pointing to the next
1574 available argument in a variable-argument list. VALIST is the
1575 user-level va_list object, of type __builtin_va_list. */
1577 arm_extract_valist_ptr (tree valist)
1579 if (TREE_TYPE (valist) == error_mark_node)
1580 return error_mark_node;
1582 /* On an AAPCS target, the pointer is stored within "struct
1584 if (TARGET_AAPCS_BASED)
1586 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1587 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1588 valist, ap_field, NULL_TREE);
1594 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1596 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1598 valist = arm_extract_valist_ptr (valist);
1599 std_expand_builtin_va_start (valist, nextarg);
1602 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1604 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1607 valist = arm_extract_valist_ptr (valist);
1608 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1611 /* Fix up any incompatible options that the user has specified. */
1613 arm_option_override (void)
1615 if (global_options_set.x_arm_arch_option)
1616 arm_selected_arch = &all_architectures[arm_arch_option];
1618 if (global_options_set.x_arm_cpu_option)
1619 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1621 if (global_options_set.x_arm_tune_option)
1622 arm_selected_tune = &all_cores[(int) arm_tune_option];
1624 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1625 SUBTARGET_OVERRIDE_OPTIONS;
1628 if (arm_selected_arch)
1630 if (arm_selected_cpu)
1632 /* Check for conflict between mcpu and march. */
1633 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1635 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1636 arm_selected_cpu->name, arm_selected_arch->name);
1637 /* -march wins for code generation.
1638 -mcpu wins for default tuning. */
1639 if (!arm_selected_tune)
1640 arm_selected_tune = arm_selected_cpu;
1642 arm_selected_cpu = arm_selected_arch;
1646 arm_selected_arch = NULL;
1649 /* Pick a CPU based on the architecture. */
1650 arm_selected_cpu = arm_selected_arch;
1653 /* If the user did not specify a processor, choose one for them. */
1654 if (!arm_selected_cpu)
1656 const struct processors * sel;
1657 unsigned int sought;
1659 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1660 if (!arm_selected_cpu->name)
1662 #ifdef SUBTARGET_CPU_DEFAULT
1663 /* Use the subtarget default CPU if none was specified by
1665 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1667 /* Default to ARM6. */
1668 if (!arm_selected_cpu->name)
1669 arm_selected_cpu = &all_cores[arm6];
1672 sel = arm_selected_cpu;
1673 insn_flags = sel->flags;
1675 /* Now check to see if the user has specified some command line
1676 switch that require certain abilities from the cpu. */
1679 if (TARGET_INTERWORK || TARGET_THUMB)
1681 sought |= (FL_THUMB | FL_MODE32);
1683 /* There are no ARM processors that support both APCS-26 and
1684 interworking. Therefore we force FL_MODE26 to be removed
1685 from insn_flags here (if it was set), so that the search
1686 below will always be able to find a compatible processor. */
1687 insn_flags &= ~FL_MODE26;
1690 if (sought != 0 && ((sought & insn_flags) != sought))
1692 /* Try to locate a CPU type that supports all of the abilities
1693 of the default CPU, plus the extra abilities requested by
1695 for (sel = all_cores; sel->name != NULL; sel++)
1696 if ((sel->flags & sought) == (sought | insn_flags))
1699 if (sel->name == NULL)
1701 unsigned current_bit_count = 0;
1702 const struct processors * best_fit = NULL;
1704 /* Ideally we would like to issue an error message here
1705 saying that it was not possible to find a CPU compatible
1706 with the default CPU, but which also supports the command
1707 line options specified by the programmer, and so they
1708 ought to use the -mcpu=<name> command line option to
1709 override the default CPU type.
1711 If we cannot find a cpu that has both the
1712 characteristics of the default cpu and the given
1713 command line options we scan the array again looking
1714 for a best match. */
1715 for (sel = all_cores; sel->name != NULL; sel++)
1716 if ((sel->flags & sought) == sought)
1720 count = bit_count (sel->flags & insn_flags);
1722 if (count >= current_bit_count)
1725 current_bit_count = count;
1729 gcc_assert (best_fit);
1733 arm_selected_cpu = sel;
1737 gcc_assert (arm_selected_cpu);
1738 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1739 if (!arm_selected_tune)
1740 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1742 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1743 insn_flags = arm_selected_cpu->flags;
1744 arm_base_arch = arm_selected_cpu->base_arch;
1746 arm_tune = arm_selected_tune->core;
1747 tune_flags = arm_selected_tune->flags;
1748 current_tune = arm_selected_tune->tune;
1750 /* Make sure that the processor choice does not conflict with any of the
1751 other command line choices. */
1752 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1753 error ("target CPU does not support ARM mode");
1755 /* BPABI targets use linker tricks to allow interworking on cores
1756 without thumb support. */
1757 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1759 warning (0, "target CPU does not support interworking" );
1760 target_flags &= ~MASK_INTERWORK;
1763 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1765 warning (0, "target CPU does not support THUMB instructions");
1766 target_flags &= ~MASK_THUMB;
1769 if (TARGET_APCS_FRAME && TARGET_THUMB)
1771 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1772 target_flags &= ~MASK_APCS_FRAME;
1775 /* Callee super interworking implies thumb interworking. Adding
1776 this to the flags here simplifies the logic elsewhere. */
1777 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1778 target_flags |= MASK_INTERWORK;
1780 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1781 from here where no function is being compiled currently. */
1782 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1783 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1785 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1786 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1788 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1790 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1791 target_flags |= MASK_APCS_FRAME;
1794 if (TARGET_POKE_FUNCTION_NAME)
1795 target_flags |= MASK_APCS_FRAME;
1797 if (TARGET_APCS_REENT && flag_pic)
1798 error ("-fpic and -mapcs-reent are incompatible");
1800 if (TARGET_APCS_REENT)
1801 warning (0, "APCS reentrant code not supported. Ignored");
1803 /* If this target is normally configured to use APCS frames, warn if they
1804 are turned off and debugging is turned on. */
1806 && write_symbols != NO_DEBUG
1807 && !TARGET_APCS_FRAME
1808 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1809 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1811 if (TARGET_APCS_FLOAT)
1812 warning (0, "passing floating point arguments in fp regs not yet supported");
1814 if (TARGET_LITTLE_WORDS)
1815 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1816 "will be removed in a future release");
1818 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1819 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1820 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1821 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1822 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1823 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1824 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1825 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1826 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1827 arm_arch6m = arm_arch6 && !arm_arch_notm;
1828 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1829 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1830 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1831 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1832 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1834 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1835 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1836 thumb_code = TARGET_ARM == 0;
1837 thumb1_code = TARGET_THUMB1 != 0;
1838 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1839 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1840 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1841 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1842 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1843 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1844 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1846 /* If we are not using the default (ARM mode) section anchor offset
1847 ranges, then set the correct ranges now. */
1850 /* Thumb-1 LDR instructions cannot have negative offsets.
1851 Permissible positive offset ranges are 5-bit (for byte loads),
1852 6-bit (for halfword loads), or 7-bit (for word loads).
1853 Empirical results suggest a 7-bit anchor range gives the best
1854 overall code size. */
1855 targetm.min_anchor_offset = 0;
1856 targetm.max_anchor_offset = 127;
1858 else if (TARGET_THUMB2)
1860 /* The minimum is set such that the total size of the block
1861 for a particular anchor is 248 + 1 + 4095 bytes, which is
1862 divisible by eight, ensuring natural spacing of anchors. */
1863 targetm.min_anchor_offset = -248;
1864 targetm.max_anchor_offset = 4095;
1867 /* V5 code we generate is completely interworking capable, so we turn off
1868 TARGET_INTERWORK here to avoid many tests later on. */
1870 /* XXX However, we must pass the right pre-processor defines to CPP
1871 or GLD can get confused. This is a hack. */
1872 if (TARGET_INTERWORK)
1873 arm_cpp_interwork = 1;
1876 target_flags &= ~MASK_INTERWORK;
1878 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1879 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1881 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1882 error ("iwmmxt abi requires an iwmmxt capable cpu");
1884 if (!global_options_set.x_arm_fpu_index)
1886 const char *target_fpu_name;
1889 #ifdef FPUTYPE_DEFAULT
1890 target_fpu_name = FPUTYPE_DEFAULT;
1892 target_fpu_name = "vfp";
1895 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1900 arm_fpu_desc = &all_fpus[arm_fpu_index];
1902 switch (arm_fpu_desc->model)
1904 case ARM_FP_MODEL_VFP:
1905 arm_fpu_attr = FPU_VFP;
1912 if (TARGET_AAPCS_BASED)
1914 if (TARGET_CALLER_INTERWORKING)
1915 error ("AAPCS does not support -mcaller-super-interworking");
1917 if (TARGET_CALLEE_INTERWORKING)
1918 error ("AAPCS does not support -mcallee-super-interworking");
1921 /* iWMMXt and NEON are incompatible. */
1922 if (TARGET_IWMMXT && TARGET_NEON)
1923 error ("iWMMXt and NEON are incompatible");
1925 /* iWMMXt unsupported under Thumb mode. */
1926 if (TARGET_THUMB && TARGET_IWMMXT)
1927 error ("iWMMXt unsupported under Thumb mode");
1929 /* __fp16 support currently assumes the core has ldrh. */
1930 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1931 sorry ("__fp16 and no ldrh");
1933 /* If soft-float is specified then don't use FPU. */
1934 if (TARGET_SOFT_FLOAT)
1935 arm_fpu_attr = FPU_NONE;
1937 if (TARGET_AAPCS_BASED)
1939 if (arm_abi == ARM_ABI_IWMMXT)
1940 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1941 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1942 && TARGET_HARD_FLOAT
1944 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1946 arm_pcs_default = ARM_PCS_AAPCS;
1950 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1951 sorry ("-mfloat-abi=hard and VFP");
1953 if (arm_abi == ARM_ABI_APCS)
1954 arm_pcs_default = ARM_PCS_APCS;
1956 arm_pcs_default = ARM_PCS_ATPCS;
1959 /* For arm2/3 there is no need to do any scheduling if we are doing
1960 software floating-point. */
1961 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1962 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1964 /* Use the cp15 method if it is available. */
1965 if (target_thread_pointer == TP_AUTO)
1967 if (arm_arch6k && !TARGET_THUMB1)
1968 target_thread_pointer = TP_CP15;
1970 target_thread_pointer = TP_SOFT;
1973 if (TARGET_HARD_TP && TARGET_THUMB1)
1974 error ("can not use -mtp=cp15 with 16-bit Thumb");
1976 /* Override the default structure alignment for AAPCS ABI. */
1977 if (!global_options_set.x_arm_structure_size_boundary)
1979 if (TARGET_AAPCS_BASED)
1980 arm_structure_size_boundary = 8;
1984 if (arm_structure_size_boundary != 8
1985 && arm_structure_size_boundary != 32
1986 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1988 if (ARM_DOUBLEWORD_ALIGN)
1990 "structure size boundary can only be set to 8, 32 or 64");
1992 warning (0, "structure size boundary can only be set to 8 or 32");
1993 arm_structure_size_boundary
1994 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1998 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2000 error ("RTP PIC is incompatible with Thumb");
2004 /* If stack checking is disabled, we can use r10 as the PIC register,
2005 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2006 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2008 if (TARGET_VXWORKS_RTP)
2009 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2010 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2013 if (flag_pic && TARGET_VXWORKS_RTP)
2014 arm_pic_register = 9;
2016 if (arm_pic_register_string != NULL)
2018 int pic_register = decode_reg_name (arm_pic_register_string);
2021 warning (0, "-mpic-register= is useless without -fpic");
2023 /* Prevent the user from choosing an obviously stupid PIC register. */
2024 else if (pic_register < 0 || call_used_regs[pic_register]
2025 || pic_register == HARD_FRAME_POINTER_REGNUM
2026 || pic_register == STACK_POINTER_REGNUM
2027 || pic_register >= PC_REGNUM
2028 || (TARGET_VXWORKS_RTP
2029 && (unsigned int) pic_register != arm_pic_register))
2030 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2032 arm_pic_register = pic_register;
2035 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2036 if (fix_cm3_ldrd == 2)
2038 if (arm_selected_cpu->core == cortexm3)
2044 /* Enable -munaligned-access by default for
2045 - all ARMv6 architecture-based processors
2046 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2047 - ARMv8 architecture-base processors.
2049 Disable -munaligned-access by default for
2050 - all pre-ARMv6 architecture-based processors
2051 - ARMv6-M architecture-based processors. */
2053 if (unaligned_access == 2)
2055 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2056 unaligned_access = 1;
2058 unaligned_access = 0;
2060 else if (unaligned_access == 1
2061 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2063 warning (0, "target CPU does not support unaligned accesses");
2064 unaligned_access = 0;
2067 if (TARGET_THUMB1 && flag_schedule_insns)
2069 /* Don't warn since it's on by default in -O2. */
2070 flag_schedule_insns = 0;
2075 /* If optimizing for size, bump the number of instructions that we
2076 are prepared to conditionally execute (even on a StrongARM). */
2077 max_insns_skipped = 6;
2080 max_insns_skipped = current_tune->max_insns_skipped;
2082 /* Hot/Cold partitioning is not currently supported, since we can't
2083 handle literal pool placement in that case. */
2084 if (flag_reorder_blocks_and_partition)
2086 inform (input_location,
2087 "-freorder-blocks-and-partition not supported on this architecture");
2088 flag_reorder_blocks_and_partition = 0;
2089 flag_reorder_blocks = 1;
2093 /* Hoisting PIC address calculations more aggressively provides a small,
2094 but measurable, size reduction for PIC code. Therefore, we decrease
2095 the bar for unrestricted expression hoisting to the cost of PIC address
2096 calculation, which is 2 instructions. */
2097 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2098 global_options.x_param_values,
2099 global_options_set.x_param_values);
2101 /* ARM EABI defaults to strict volatile bitfields. */
2102 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2103 && abi_version_at_least(2))
2104 flag_strict_volatile_bitfields = 1;
2106 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2107 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2108 if (flag_prefetch_loop_arrays < 0
2111 && current_tune->num_prefetch_slots > 0)
2112 flag_prefetch_loop_arrays = 1;
2114 /* Set up parameters to be used in prefetching algorithm. Do not override the
2115 defaults unless we are tuning for a core we have researched values for. */
2116 if (current_tune->num_prefetch_slots > 0)
2117 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2118 current_tune->num_prefetch_slots,
2119 global_options.x_param_values,
2120 global_options_set.x_param_values);
2121 if (current_tune->l1_cache_line_size >= 0)
2122 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2123 current_tune->l1_cache_line_size,
2124 global_options.x_param_values,
2125 global_options_set.x_param_values);
2126 if (current_tune->l1_cache_size >= 0)
2127 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2128 current_tune->l1_cache_size,
2129 global_options.x_param_values,
2130 global_options_set.x_param_values);
2132 /* Use the alternative scheduling-pressure algorithm by default. */
2133 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2134 global_options.x_param_values,
2135 global_options_set.x_param_values);
2137 /* Register global variables with the garbage collector. */
2138 arm_add_gc_roots ();
2142 arm_add_gc_roots (void)
2144 gcc_obstack_init(&minipool_obstack);
2145 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2148 /* A table of known ARM exception types.
2149 For use with the interrupt function attribute. */
2153 const char *const arg;
2154 const unsigned long return_value;
2158 static const isr_attribute_arg isr_attribute_args [] =
2160 { "IRQ", ARM_FT_ISR },
2161 { "irq", ARM_FT_ISR },
2162 { "FIQ", ARM_FT_FIQ },
2163 { "fiq", ARM_FT_FIQ },
2164 { "ABORT", ARM_FT_ISR },
2165 { "abort", ARM_FT_ISR },
2166 { "ABORT", ARM_FT_ISR },
2167 { "abort", ARM_FT_ISR },
2168 { "UNDEF", ARM_FT_EXCEPTION },
2169 { "undef", ARM_FT_EXCEPTION },
2170 { "SWI", ARM_FT_EXCEPTION },
2171 { "swi", ARM_FT_EXCEPTION },
2172 { NULL, ARM_FT_NORMAL }
2175 /* Returns the (interrupt) function type of the current
2176 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2178 static unsigned long
2179 arm_isr_value (tree argument)
2181 const isr_attribute_arg * ptr;
2185 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2187 /* No argument - default to IRQ. */
2188 if (argument == NULL_TREE)
2191 /* Get the value of the argument. */
2192 if (TREE_VALUE (argument) == NULL_TREE
2193 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2194 return ARM_FT_UNKNOWN;
2196 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2198 /* Check it against the list of known arguments. */
2199 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2200 if (streq (arg, ptr->arg))
2201 return ptr->return_value;
2203 /* An unrecognized interrupt type. */
2204 return ARM_FT_UNKNOWN;
2207 /* Computes the type of the current function. */
2209 static unsigned long
2210 arm_compute_func_type (void)
2212 unsigned long type = ARM_FT_UNKNOWN;
2216 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2218 /* Decide if the current function is volatile. Such functions
2219 never return, and many memory cycles can be saved by not storing
2220 register values that will never be needed again. This optimization
2221 was added to speed up context switching in a kernel application. */
2223 && (TREE_NOTHROW (current_function_decl)
2224 || !(flag_unwind_tables
2226 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2227 && TREE_THIS_VOLATILE (current_function_decl))
2228 type |= ARM_FT_VOLATILE;
2230 if (cfun->static_chain_decl != NULL)
2231 type |= ARM_FT_NESTED;
2233 attr = DECL_ATTRIBUTES (current_function_decl);
2235 a = lookup_attribute ("naked", attr);
2237 type |= ARM_FT_NAKED;
2239 a = lookup_attribute ("isr", attr);
2241 a = lookup_attribute ("interrupt", attr);
2244 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2246 type |= arm_isr_value (TREE_VALUE (a));
2251 /* Returns the type of the current function. */
2254 arm_current_func_type (void)
2256 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2257 cfun->machine->func_type = arm_compute_func_type ();
2259 return cfun->machine->func_type;
2263 arm_allocate_stack_slots_for_args (void)
2265 /* Naked functions should not allocate stack slots for arguments. */
2266 return !IS_NAKED (arm_current_func_type ());
2270 arm_warn_func_return (tree decl)
2272 /* Naked functions are implemented entirely in assembly, including the
2273 return sequence, so suppress warnings about this. */
2274 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2278 /* Output assembler code for a block containing the constant parts
2279 of a trampoline, leaving space for the variable parts.
2281 On the ARM, (if r8 is the static chain regnum, and remembering that
2282 referencing pc adds an offset of 8) the trampoline looks like:
2285 .word static chain value
2286 .word function's address
2287 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2290 arm_asm_trampoline_template (FILE *f)
2294 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2295 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2297 else if (TARGET_THUMB2)
2299 /* The Thumb-2 trampoline is similar to the arm implementation.
2300 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2301 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2302 STATIC_CHAIN_REGNUM, PC_REGNUM);
2303 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2307 ASM_OUTPUT_ALIGN (f, 2);
2308 fprintf (f, "\t.code\t16\n");
2309 fprintf (f, ".Ltrampoline_start:\n");
2310 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2311 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2312 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2313 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2314 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2315 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2317 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2318 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2321 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2324 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2326 rtx fnaddr, mem, a_tramp;
2328 emit_block_move (m_tramp, assemble_trampoline_template (),
2329 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2331 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2332 emit_move_insn (mem, chain_value);
2334 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2335 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2336 emit_move_insn (mem, fnaddr);
2338 a_tramp = XEXP (m_tramp, 0);
2339 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2340 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2341 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2344 /* Thumb trampolines should be entered in thumb mode, so set
2345 the bottom bit of the address. */
2348 arm_trampoline_adjust_address (rtx addr)
2351 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2352 NULL, 0, OPTAB_LIB_WIDEN);
2356 /* Return 1 if it is possible to return using a single instruction.
2357 If SIBLING is non-null, this is a test for a return before a sibling
2358 call. SIBLING is the call insn, so we can examine its register usage. */
2361 use_return_insn (int iscond, rtx sibling)
2364 unsigned int func_type;
2365 unsigned long saved_int_regs;
2366 unsigned HOST_WIDE_INT stack_adjust;
2367 arm_stack_offsets *offsets;
2369 /* Never use a return instruction before reload has run. */
2370 if (!reload_completed)
2373 func_type = arm_current_func_type ();
2375 /* Naked, volatile and stack alignment functions need special
2377 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2380 /* So do interrupt functions that use the frame pointer and Thumb
2381 interrupt functions. */
2382 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2385 offsets = arm_get_frame_offsets ();
2386 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2388 /* As do variadic functions. */
2389 if (crtl->args.pretend_args_size
2390 || cfun->machine->uses_anonymous_args
2391 /* Or if the function calls __builtin_eh_return () */
2392 || crtl->calls_eh_return
2393 /* Or if the function calls alloca */
2394 || cfun->calls_alloca
2395 /* Or if there is a stack adjustment. However, if the stack pointer
2396 is saved on the stack, we can use a pre-incrementing stack load. */
2397 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2398 && stack_adjust == 4)))
2401 saved_int_regs = offsets->saved_regs_mask;
2403 /* Unfortunately, the insn
2405 ldmib sp, {..., sp, ...}
2407 triggers a bug on most SA-110 based devices, such that the stack
2408 pointer won't be correctly restored if the instruction takes a
2409 page fault. We work around this problem by popping r3 along with
2410 the other registers, since that is never slower than executing
2411 another instruction.
2413 We test for !arm_arch5 here, because code for any architecture
2414 less than this could potentially be run on one of the buggy
2416 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2418 /* Validate that r3 is a call-clobbered register (always true in
2419 the default abi) ... */
2420 if (!call_used_regs[3])
2423 /* ... that it isn't being used for a return value ... */
2424 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2427 /* ... or for a tail-call argument ... */
2430 gcc_assert (CALL_P (sibling));
2432 if (find_regno_fusage (sibling, USE, 3))
2436 /* ... and that there are no call-saved registers in r0-r2
2437 (always true in the default ABI). */
2438 if (saved_int_regs & 0x7)
2442 /* Can't be done if interworking with Thumb, and any registers have been
2444 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2447 /* On StrongARM, conditional returns are expensive if they aren't
2448 taken and multiple registers have been stacked. */
2449 if (iscond && arm_tune_strongarm)
2451 /* Conditional return when just the LR is stored is a simple
2452 conditional-load instruction, that's not expensive. */
2453 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2457 && arm_pic_register != INVALID_REGNUM
2458 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2462 /* If there are saved registers but the LR isn't saved, then we need
2463 two instructions for the return. */
2464 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2467 /* Can't be done if any of the VFP regs are pushed,
2468 since this also requires an insn. */
2469 if (TARGET_HARD_FLOAT && TARGET_VFP)
2470 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2471 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2474 if (TARGET_REALLY_IWMMXT)
2475 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2476 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2482 /* Return TRUE if int I is a valid immediate ARM constant. */
2485 const_ok_for_arm (HOST_WIDE_INT i)
2489 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2490 be all zero, or all one. */
2491 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2492 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2493 != ((~(unsigned HOST_WIDE_INT) 0)
2494 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2497 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2499 /* Fast return for 0 and small values. We must do this for zero, since
2500 the code below can't handle that one case. */
2501 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2504 /* Get the number of trailing zeros. */
2505 lowbit = ffs((int) i) - 1;
2507 /* Only even shifts are allowed in ARM mode so round down to the
2508 nearest even number. */
2512 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2517 /* Allow rotated constants in ARM mode. */
2519 && ((i & ~0xc000003f) == 0
2520 || (i & ~0xf000000f) == 0
2521 || (i & ~0xfc000003) == 0))
2528 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2531 if (i == v || i == (v | (v << 8)))
2534 /* Allow repeated pattern 0xXY00XY00. */
2544 /* Return true if I is a valid constant for the operation CODE. */
2546 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2548 if (const_ok_for_arm (i))
2554 /* See if we can use movw. */
2555 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2558 /* Otherwise, try mvn. */
2559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2562 /* See if we can use addw or subw. */
2564 && ((i & 0xfffff000) == 0
2565 || ((-i) & 0xfffff000) == 0))
2567 /* else fall through. */
2587 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2589 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2595 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2599 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2606 /* Return true if I is a valid di mode constant for the operation CODE. */
2608 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2610 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2611 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2612 rtx hi = GEN_INT (hi_val);
2613 rtx lo = GEN_INT (lo_val);
2621 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2628 /* Emit a sequence of insns to handle a large constant.
2629 CODE is the code of the operation required, it can be any of SET, PLUS,
2630 IOR, AND, XOR, MINUS;
2631 MODE is the mode in which the operation is being performed;
2632 VAL is the integer to operate on;
2633 SOURCE is the other operand (a register, or a null-pointer for SET);
2634 SUBTARGETS means it is safe to create scratch registers if that will
2635 either produce a simpler sequence, or we will want to cse the values.
2636 Return value is the number of insns emitted. */
2638 /* ??? Tweak this for thumb2. */
2640 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2641 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2645 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2646 cond = COND_EXEC_TEST (PATTERN (insn));
2650 if (subtargets || code == SET
2651 || (REG_P (target) && REG_P (source)
2652 && REGNO (target) != REGNO (source)))
2654 /* After arm_reorg has been called, we can't fix up expensive
2655 constants by pushing them into memory so we must synthesize
2656 them in-line, regardless of the cost. This is only likely to
2657 be more costly on chips that have load delay slots and we are
2658 compiling without running the scheduler (so no splitting
2659 occurred before the final instruction emission).
2661 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2663 if (!after_arm_reorg
2665 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2667 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2672 /* Currently SET is the only monadic value for CODE, all
2673 the rest are diadic. */
2674 if (TARGET_USE_MOVT)
2675 arm_emit_movpair (target, GEN_INT (val));
2677 emit_set_insn (target, GEN_INT (val));
2683 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2685 if (TARGET_USE_MOVT)
2686 arm_emit_movpair (temp, GEN_INT (val));
2688 emit_set_insn (temp, GEN_INT (val));
2690 /* For MINUS, the value is subtracted from, since we never
2691 have subtraction of a constant. */
2693 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2695 emit_set_insn (target,
2696 gen_rtx_fmt_ee (code, mode, source, temp));
2702 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2707 ARM/THUMB2 immediates, and add up to VAL.
2708 Thr function return value gives the number of insns required. */
2710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2711 struct four_ints *return_sequence)
2713 int best_consecutive_zeros = 0;
2717 struct four_ints tmp_sequence;
2719 /* If we aren't targeting ARM, the best place to start is always at
2720 the bottom, otherwise look more closely. */
2723 for (i = 0; i < 32; i += 2)
2725 int consecutive_zeros = 0;
2727 if (!(val & (3 << i)))
2729 while ((i < 32) && !(val & (3 << i)))
2731 consecutive_zeros += 2;
2734 if (consecutive_zeros > best_consecutive_zeros)
2736 best_consecutive_zeros = consecutive_zeros;
2737 best_start = i - consecutive_zeros;
2744 /* So long as it won't require any more insns to do so, it's
2745 desirable to emit a small constant (in bits 0...9) in the last
2746 insn. This way there is more chance that it can be combined with
2747 a later addressing insn to form a pre-indexed load or store
2748 operation. Consider:
2750 *((volatile int *)0xe0000100) = 1;
2751 *((volatile int *)0xe0000110) = 2;
2753 We want this to wind up as:
2757 str rB, [rA, #0x100]
2759 str rB, [rA, #0x110]
2761 rather than having to synthesize both large constants from scratch.
2763 Therefore, we calculate how many insns would be required to emit
2764 the constant starting from `best_start', and also starting from
2765 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2766 yield a shorter sequence, we may as well use zero. */
2767 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2769 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2771 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2772 if (insns2 <= insns1)
2774 *return_sequence = tmp_sequence;
2782 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2785 struct four_ints *return_sequence, int i)
2787 int remainder = val & 0xffffffff;
2790 /* Try and find a way of doing the job in either two or three
2793 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2794 location. We start at position I. This may be the MSB, or
2795 optimial_immediate_sequence may have positioned it at the largest block
2796 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2797 wrapping around to the top of the word when we drop off the bottom.
2798 In the worst case this code should produce no more than four insns.
2800 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2801 constants, shifted to any arbitrary location. We should always start
2806 unsigned int b1, b2, b3, b4;
2807 unsigned HOST_WIDE_INT result;
2810 gcc_assert (insns < 4);
2815 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2816 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2819 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2820 /* We can use addw/subw for the last 12 bits. */
2824 /* Use an 8-bit shifted/rotated immediate. */
2828 result = remainder & ((0x0ff << end)
2829 | ((i < end) ? (0xff >> (32 - end))
2836 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2837 arbitrary shifts. */
2838 i -= TARGET_ARM ? 2 : 1;
2842 /* Next, see if we can do a better job with a thumb2 replicated
2845 We do it this way around to catch the cases like 0x01F001E0 where
2846 two 8-bit immediates would work, but a replicated constant would
2849 TODO: 16-bit constants that don't clear all the bits, but still win.
2850 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2853 b1 = (remainder & 0xff000000) >> 24;
2854 b2 = (remainder & 0x00ff0000) >> 16;
2855 b3 = (remainder & 0x0000ff00) >> 8;
2856 b4 = remainder & 0xff;
2860 /* The 8-bit immediate already found clears b1 (and maybe b2),
2861 but must leave b3 and b4 alone. */
2863 /* First try to find a 32-bit replicated constant that clears
2864 almost everything. We can assume that we can't do it in one,
2865 or else we wouldn't be here. */
2866 unsigned int tmp = b1 & b2 & b3 & b4;
2867 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2869 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2870 + (tmp == b3) + (tmp == b4);
2872 && (matching_bytes >= 3
2873 || (matching_bytes == 2
2874 && const_ok_for_op (remainder & ~tmp2, code))))
2876 /* At least 3 of the bytes match, and the fourth has at
2877 least as many bits set, or two of the bytes match
2878 and it will only require one more insn to finish. */
2886 /* Second, try to find a 16-bit replicated constant that can
2887 leave three of the bytes clear. If b2 or b4 is already
2888 zero, then we can. If the 8-bit from above would not
2889 clear b2 anyway, then we still win. */
2890 else if (b1 == b3 && (!b2 || !b4
2891 || (remainder & 0x00ff0000 & ~result)))
2893 result = remainder & 0xff00ff00;
2899 /* The 8-bit immediate already found clears b2 (and maybe b3)
2900 and we don't get here unless b1 is alredy clear, but it will
2901 leave b4 unchanged. */
2903 /* If we can clear b2 and b4 at once, then we win, since the
2904 8-bits couldn't possibly reach that far. */
2907 result = remainder & 0x00ff00ff;
2913 return_sequence->i[insns++] = result;
2914 remainder &= ~result;
2916 if (code == SET || code == MINUS)
2924 /* Emit an instruction with the indicated PATTERN. If COND is
2925 non-NULL, conditionalize the execution of the instruction on COND
2929 emit_constant_insn (rtx cond, rtx pattern)
2932 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2933 emit_insn (pattern);
2936 /* As above, but extra parameter GENERATE which, if clear, suppresses
2940 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2941 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2946 int final_invert = 0;
2948 int set_sign_bit_copies = 0;
2949 int clear_sign_bit_copies = 0;
2950 int clear_zero_bit_copies = 0;
2951 int set_zero_bit_copies = 0;
2952 int insns = 0, neg_insns, inv_insns;
2953 unsigned HOST_WIDE_INT temp1, temp2;
2954 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2955 struct four_ints *immediates;
2956 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2958 /* Find out which operations are safe for a given CODE. Also do a quick
2959 check for degenerate cases; these can occur when DImode operations
2972 if (remainder == 0xffffffff)
2975 emit_constant_insn (cond,
2976 gen_rtx_SET (VOIDmode, target,
2977 GEN_INT (ARM_SIGN_EXTEND (val))));
2983 if (reload_completed && rtx_equal_p (target, source))
2987 emit_constant_insn (cond,
2988 gen_rtx_SET (VOIDmode, target, source));
2997 emit_constant_insn (cond,
2998 gen_rtx_SET (VOIDmode, target, const0_rtx));
3001 if (remainder == 0xffffffff)
3003 if (reload_completed && rtx_equal_p (target, source))
3006 emit_constant_insn (cond,
3007 gen_rtx_SET (VOIDmode, target, source));
3016 if (reload_completed && rtx_equal_p (target, source))
3019 emit_constant_insn (cond,
3020 gen_rtx_SET (VOIDmode, target, source));
3024 if (remainder == 0xffffffff)
3027 emit_constant_insn (cond,
3028 gen_rtx_SET (VOIDmode, target,
3029 gen_rtx_NOT (mode, source)));
3036 /* We treat MINUS as (val - source), since (source - val) is always
3037 passed as (source + (-val)). */
3041 emit_constant_insn (cond,
3042 gen_rtx_SET (VOIDmode, target,
3043 gen_rtx_NEG (mode, source)));
3046 if (const_ok_for_arm (val))
3049 emit_constant_insn (cond,
3050 gen_rtx_SET (VOIDmode, target,
3051 gen_rtx_MINUS (mode, GEN_INT (val),
3062 /* If we can do it in one insn get out quickly. */
3063 if (const_ok_for_op (val, code))
3066 emit_constant_insn (cond,
3067 gen_rtx_SET (VOIDmode, target,
3069 ? gen_rtx_fmt_ee (code, mode, source,
3075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3082 if (mode == SImode && i == 16)
3083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3085 emit_constant_insn (cond,
3086 gen_zero_extendhisi2
3087 (target, gen_lowpart (HImode, source)));
3089 /* Extz only supports SImode, but we can coerce the operands
3091 emit_constant_insn (cond,
3092 gen_extzv_t2 (gen_lowpart (SImode, target),
3093 gen_lowpart (SImode, source),
3094 GEN_INT (i), const0_rtx));
3100 /* Calculate a few attributes that may be useful for specific
3102 /* Count number of leading zeros. */
3103 for (i = 31; i >= 0; i--)
3105 if ((remainder & (1 << i)) == 0)
3106 clear_sign_bit_copies++;
3111 /* Count number of leading 1's. */
3112 for (i = 31; i >= 0; i--)
3114 if ((remainder & (1 << i)) != 0)
3115 set_sign_bit_copies++;
3120 /* Count number of trailing zero's. */
3121 for (i = 0; i <= 31; i++)
3123 if ((remainder & (1 << i)) == 0)
3124 clear_zero_bit_copies++;
3129 /* Count number of trailing 1's. */
3130 for (i = 0; i <= 31; i++)
3132 if ((remainder & (1 << i)) != 0)
3133 set_zero_bit_copies++;
3141 /* See if we can do this by sign_extending a constant that is known
3142 to be negative. This is a good, way of doing it, since the shift
3143 may well merge into a subsequent insn. */
3144 if (set_sign_bit_copies > 1)
3146 if (const_ok_for_arm
3147 (temp1 = ARM_SIGN_EXTEND (remainder
3148 << (set_sign_bit_copies - 1))))
3152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3153 emit_constant_insn (cond,
3154 gen_rtx_SET (VOIDmode, new_src,
3156 emit_constant_insn (cond,
3157 gen_ashrsi3 (target, new_src,
3158 GEN_INT (set_sign_bit_copies - 1)));
3162 /* For an inverted constant, we will need to set the low bits,
3163 these will be shifted out of harm's way. */
3164 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3165 if (const_ok_for_arm (~temp1))
3169 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3170 emit_constant_insn (cond,
3171 gen_rtx_SET (VOIDmode, new_src,
3173 emit_constant_insn (cond,
3174 gen_ashrsi3 (target, new_src,
3175 GEN_INT (set_sign_bit_copies - 1)));
3181 /* See if we can calculate the value as the difference between two
3182 valid immediates. */
3183 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3185 int topshift = clear_sign_bit_copies & ~1;
3187 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3188 & (0xff000000 >> topshift));
3190 /* If temp1 is zero, then that means the 9 most significant
3191 bits of remainder were 1 and we've caused it to overflow.
3192 When topshift is 0 we don't need to do anything since we
3193 can borrow from 'bit 32'. */
3194 if (temp1 == 0 && topshift != 0)
3195 temp1 = 0x80000000 >> (topshift - 1);
3197 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3199 if (const_ok_for_arm (temp2))
3203 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3204 emit_constant_insn (cond,
3205 gen_rtx_SET (VOIDmode, new_src,
3207 emit_constant_insn (cond,
3208 gen_addsi3 (target, new_src,
3216 /* See if we can generate this by setting the bottom (or the top)
3217 16 bits, and then shifting these into the other half of the
3218 word. We only look for the simplest cases, to do more would cost
3219 too much. Be careful, however, not to generate this when the
3220 alternative would take fewer insns. */
3221 if (val & 0xffff0000)
3223 temp1 = remainder & 0xffff0000;
3224 temp2 = remainder & 0x0000ffff;
3226 /* Overlaps outside this range are best done using other methods. */
3227 for (i = 9; i < 24; i++)
3229 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3230 && !const_ok_for_arm (temp2))
3232 rtx new_src = (subtargets
3233 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3235 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3236 source, subtargets, generate);
3244 gen_rtx_ASHIFT (mode, source,
3251 /* Don't duplicate cases already considered. */
3252 for (i = 17; i < 24; i++)
3254 if (((temp1 | (temp1 >> i)) == remainder)
3255 && !const_ok_for_arm (temp1))
3257 rtx new_src = (subtargets
3258 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3260 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3261 source, subtargets, generate);
3266 gen_rtx_SET (VOIDmode, target,
3269 gen_rtx_LSHIFTRT (mode, source,
3280 /* If we have IOR or XOR, and the constant can be loaded in a
3281 single instruction, and we can find a temporary to put it in,
3282 then this can be done in two instructions instead of 3-4. */
3284 /* TARGET can't be NULL if SUBTARGETS is 0 */
3285 || (reload_completed && !reg_mentioned_p (target, source)))
3287 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3291 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3293 emit_constant_insn (cond,
3294 gen_rtx_SET (VOIDmode, sub,
3296 emit_constant_insn (cond,
3297 gen_rtx_SET (VOIDmode, target,
3298 gen_rtx_fmt_ee (code, mode,
3309 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3310 and the remainder 0s for e.g. 0xfff00000)
3311 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3313 This can be done in 2 instructions by using shifts with mov or mvn.
3318 mvn r0, r0, lsr #12 */
3319 if (set_sign_bit_copies > 8
3320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3325 rtx shift = GEN_INT (set_sign_bit_copies);
3329 gen_rtx_SET (VOIDmode, sub,
3331 gen_rtx_ASHIFT (mode,
3336 gen_rtx_SET (VOIDmode, target,
3338 gen_rtx_LSHIFTRT (mode, sub,
3345 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3347 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3349 For eg. r0 = r0 | 0xfff
3354 if (set_zero_bit_copies > 8
3355 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3360 rtx shift = GEN_INT (set_zero_bit_copies);
3364 gen_rtx_SET (VOIDmode, sub,
3366 gen_rtx_LSHIFTRT (mode,
3371 gen_rtx_SET (VOIDmode, target,
3373 gen_rtx_ASHIFT (mode, sub,
3379 /* This will never be reached for Thumb2 because orn is a valid
3380 instruction. This is for Thumb1 and the ARM 32 bit cases.
3382 x = y | constant (such that ~constant is a valid constant)
3384 x = ~(~y & ~constant).
3386 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3391 emit_constant_insn (cond,
3392 gen_rtx_SET (VOIDmode, sub,
3393 gen_rtx_NOT (mode, source)));
3396 sub = gen_reg_rtx (mode);
3397 emit_constant_insn (cond,
3398 gen_rtx_SET (VOIDmode, sub,
3399 gen_rtx_AND (mode, source,
3401 emit_constant_insn (cond,
3402 gen_rtx_SET (VOIDmode, target,
3403 gen_rtx_NOT (mode, sub)));
3410 /* See if two shifts will do 2 or more insn's worth of work. */
3411 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3413 HOST_WIDE_INT shift_mask = ((0xffffffff
3414 << (32 - clear_sign_bit_copies))
3417 if ((remainder | shift_mask) != 0xffffffff)
3421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3422 insns = arm_gen_constant (AND, mode, cond,
3423 remainder | shift_mask,
3424 new_src, source, subtargets, 1);
3429 rtx targ = subtargets ? NULL_RTX : target;
3430 insns = arm_gen_constant (AND, mode, cond,
3431 remainder | shift_mask,
3432 targ, source, subtargets, 0);
3438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3439 rtx shift = GEN_INT (clear_sign_bit_copies);
3441 emit_insn (gen_ashlsi3 (new_src, source, shift));
3442 emit_insn (gen_lshrsi3 (target, new_src, shift));
3448 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3450 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3452 if ((remainder | shift_mask) != 0xffffffff)
3456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3458 insns = arm_gen_constant (AND, mode, cond,
3459 remainder | shift_mask,
3460 new_src, source, subtargets, 1);
3465 rtx targ = subtargets ? NULL_RTX : target;
3467 insns = arm_gen_constant (AND, mode, cond,
3468 remainder | shift_mask,
3469 targ, source, subtargets, 0);
3475 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3476 rtx shift = GEN_INT (clear_zero_bit_copies);
3478 emit_insn (gen_lshrsi3 (new_src, source, shift));
3479 emit_insn (gen_ashlsi3 (target, new_src, shift));
3491 /* Calculate what the instruction sequences would be if we generated it
3492 normally, negated, or inverted. */
3494 /* AND cannot be split into multiple insns, so invert and use BIC. */
3497 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3500 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3505 if (can_invert || final_invert)
3506 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3511 immediates = &pos_immediates;
3513 /* Is the negated immediate sequence more efficient? */
3514 if (neg_insns < insns && neg_insns <= inv_insns)
3517 immediates = &neg_immediates;
3522 /* Is the inverted immediate sequence more efficient?
3523 We must allow for an extra NOT instruction for XOR operations, although
3524 there is some chance that the final 'mvn' will get optimized later. */
3525 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3528 immediates = &inv_immediates;
3536 /* Now output the chosen sequence as instructions. */
3539 for (i = 0; i < insns; i++)
3541 rtx new_src, temp1_rtx;
3543 temp1 = immediates->i[i];
3545 if (code == SET || code == MINUS)
3546 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3547 else if ((final_invert || i < (insns - 1)) && subtargets)
3548 new_src = gen_reg_rtx (mode);
3554 else if (can_negate)
3557 temp1 = trunc_int_for_mode (temp1, mode);
3558 temp1_rtx = GEN_INT (temp1);
3562 else if (code == MINUS)
3563 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3565 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3567 emit_constant_insn (cond,
3568 gen_rtx_SET (VOIDmode, new_src,
3574 can_negate = can_invert;
3578 else if (code == MINUS)
3586 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3587 gen_rtx_NOT (mode, source)));
3594 /* Canonicalize a comparison so that we are more likely to recognize it.
3595 This can be done for a few constant compares, where we can make the
3596 immediate value easier to load. */
3599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3600 bool op0_preserve_value)
3602 enum machine_mode mode;
3603 unsigned HOST_WIDE_INT i, maxval;
3605 mode = GET_MODE (*op0);
3606 if (mode == VOIDmode)
3607 mode = GET_MODE (*op1);
3609 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3611 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3612 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3613 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3614 for GTU/LEU in Thumb mode. */
3619 if (*code == GT || *code == LE
3620 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3622 /* Missing comparison. First try to use an available
3624 if (CONST_INT_P (*op1))
3632 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3634 *op1 = GEN_INT (i + 1);
3635 *code = *code == GT ? GE : LT;
3641 if (i != ~((unsigned HOST_WIDE_INT) 0)
3642 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3644 *op1 = GEN_INT (i + 1);
3645 *code = *code == GTU ? GEU : LTU;
3654 /* If that did not work, reverse the condition. */
3655 if (!op0_preserve_value)
3660 *code = (int)swap_condition ((enum rtx_code)*code);
3666 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3667 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3668 to facilitate possible combining with a cmp into 'ands'. */
3670 && GET_CODE (*op0) == ZERO_EXTEND
3671 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3672 && GET_MODE (XEXP (*op0, 0)) == QImode
3673 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3674 && subreg_lowpart_p (XEXP (*op0, 0))
3675 && *op1 == const0_rtx)
3676 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3679 /* Comparisons smaller than DImode. Only adjust comparisons against
3680 an out-of-range constant. */
3681 if (!CONST_INT_P (*op1)
3682 || const_ok_for_arm (INTVAL (*op1))
3683 || const_ok_for_arm (- INTVAL (*op1)))
3697 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3699 *op1 = GEN_INT (i + 1);
3700 *code = *code == GT ? GE : LT;
3708 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3710 *op1 = GEN_INT (i - 1);
3711 *code = *code == GE ? GT : LE;
3718 if (i != ~((unsigned HOST_WIDE_INT) 0)
3719 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3721 *op1 = GEN_INT (i + 1);
3722 *code = *code == GTU ? GEU : LTU;
3730 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3732 *op1 = GEN_INT (i - 1);
3733 *code = *code == GEU ? GTU : LEU;
3744 /* Define how to find the value returned by a function. */
3747 arm_function_value(const_tree type, const_tree func,
3748 bool outgoing ATTRIBUTE_UNUSED)
3750 enum machine_mode mode;
3751 int unsignedp ATTRIBUTE_UNUSED;
3752 rtx r ATTRIBUTE_UNUSED;
3754 mode = TYPE_MODE (type);
3756 if (TARGET_AAPCS_BASED)
3757 return aapcs_allocate_return_reg (mode, type, func);
3759 /* Promote integer types. */
3760 if (INTEGRAL_TYPE_P (type))
3761 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3763 /* Promotes small structs returned in a register to full-word size
3764 for big-endian AAPCS. */
3765 if (arm_return_in_msb (type))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if (size % UNITS_PER_WORD != 0)
3770 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3771 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3775 return arm_libcall_value_1 (mode);
3779 libcall_eq (const void *p1, const void *p2)
3781 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3785 libcall_hash (const void *p1)
3787 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3791 add_libcall (htab_t htab, rtx libcall)
3793 *htab_find_slot (htab, libcall, INSERT) = libcall;
3797 arm_libcall_uses_aapcs_base (const_rtx libcall)
3799 static bool init_done = false;
3800 static htab_t libcall_htab;
3806 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3808 add_libcall (libcall_htab,
3809 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3810 add_libcall (libcall_htab,
3811 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3812 add_libcall (libcall_htab,
3813 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3814 add_libcall (libcall_htab,
3815 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3817 add_libcall (libcall_htab,
3818 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3819 add_libcall (libcall_htab,
3820 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3821 add_libcall (libcall_htab,
3822 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3823 add_libcall (libcall_htab,
3824 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3826 add_libcall (libcall_htab,
3827 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3828 add_libcall (libcall_htab,
3829 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3830 add_libcall (libcall_htab,
3831 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3832 add_libcall (libcall_htab,
3833 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3834 add_libcall (libcall_htab,
3835 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3836 add_libcall (libcall_htab,
3837 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3838 add_libcall (libcall_htab,
3839 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3840 add_libcall (libcall_htab,
3841 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3843 /* Values from double-precision helper functions are returned in core
3844 registers if the selected core only supports single-precision
3845 arithmetic, even if we are using the hard-float ABI. The same is
3846 true for single-precision helpers, but we will never be using the
3847 hard-float ABI on a CPU which doesn't support single-precision
3848 operations in hardware. */
3849 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3850 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3851 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3852 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3853 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3854 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3855 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3856 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3857 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3858 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3859 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3860 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3862 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3866 return libcall && htab_find (libcall_htab, libcall) != NULL;
3870 arm_libcall_value_1 (enum machine_mode mode)
3872 if (TARGET_AAPCS_BASED)
3873 return aapcs_libcall_value (mode);
3874 else if (TARGET_IWMMXT_ABI
3875 && arm_vector_mode_supported_p (mode))
3876 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3878 return gen_rtx_REG (mode, ARG_REGISTER (1));
3881 /* Define how to find the value returned by a library function
3882 assuming the value has mode MODE. */
3885 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3887 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3888 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3890 /* The following libcalls return their result in integer registers,
3891 even though they return a floating point value. */
3892 if (arm_libcall_uses_aapcs_base (libcall))
3893 return gen_rtx_REG (mode, ARG_REGISTER(1));
3897 return arm_libcall_value_1 (mode);
3900 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3903 arm_function_value_regno_p (const unsigned int regno)
3905 if (regno == ARG_REGISTER (1)
3907 && TARGET_AAPCS_BASED
3909 && TARGET_HARD_FLOAT
3910 && regno == FIRST_VFP_REGNUM)
3911 || (TARGET_IWMMXT_ABI
3912 && regno == FIRST_IWMMXT_REGNUM))
3918 /* Determine the amount of memory needed to store the possible return
3919 registers of an untyped call. */
3921 arm_apply_result_size (void)
3927 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3929 if (TARGET_IWMMXT_ABI)
3936 /* Decide whether TYPE should be returned in memory (true)
3937 or in a register (false). FNTYPE is the type of the function making
3940 arm_return_in_memory (const_tree type, const_tree fntype)
3944 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3946 if (TARGET_AAPCS_BASED)
3948 /* Simple, non-aggregate types (ie not including vectors and
3949 complex) are always returned in a register (or registers).
3950 We don't care about which register here, so we can short-cut
3951 some of the detail. */
3952 if (!AGGREGATE_TYPE_P (type)
3953 && TREE_CODE (type) != VECTOR_TYPE
3954 && TREE_CODE (type) != COMPLEX_TYPE)
3957 /* Any return value that is no larger than one word can be
3959 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3962 /* Check any available co-processors to see if they accept the
3963 type as a register candidate (VFP, for example, can return
3964 some aggregates in consecutive registers). These aren't
3965 available if the call is variadic. */
3966 if (aapcs_select_return_coproc (type, fntype) >= 0)
3969 /* Vector values should be returned using ARM registers, not
3970 memory (unless they're over 16 bytes, which will break since
3971 we only have four call-clobbered registers to play with). */
3972 if (TREE_CODE (type) == VECTOR_TYPE)
3973 return (size < 0 || size > (4 * UNITS_PER_WORD));
3975 /* The rest go in memory. */
3979 if (TREE_CODE (type) == VECTOR_TYPE)
3980 return (size < 0 || size > (4 * UNITS_PER_WORD));
3982 if (!AGGREGATE_TYPE_P (type) &&
3983 (TREE_CODE (type) != VECTOR_TYPE))
3984 /* All simple types are returned in registers. */
3987 if (arm_abi != ARM_ABI_APCS)
3989 /* ATPCS and later return aggregate types in memory only if they are
3990 larger than a word (or are variable size). */
3991 return (size < 0 || size > UNITS_PER_WORD);
3994 /* For the arm-wince targets we choose to be compatible with Microsoft's
3995 ARM and Thumb compilers, which always return aggregates in memory. */
3997 /* All structures/unions bigger than one word are returned in memory.
3998 Also catch the case where int_size_in_bytes returns -1. In this case
3999 the aggregate is either huge or of variable size, and in either case
4000 we will want to return it via memory and not in a register. */
4001 if (size < 0 || size > UNITS_PER_WORD)
4004 if (TREE_CODE (type) == RECORD_TYPE)
4008 /* For a struct the APCS says that we only return in a register
4009 if the type is 'integer like' and every addressable element
4010 has an offset of zero. For practical purposes this means
4011 that the structure can have at most one non bit-field element
4012 and that this element must be the first one in the structure. */
4014 /* Find the first field, ignoring non FIELD_DECL things which will
4015 have been created by C++. */
4016 for (field = TYPE_FIELDS (type);
4017 field && TREE_CODE (field) != FIELD_DECL;
4018 field = DECL_CHAIN (field))
4022 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4024 /* Check that the first field is valid for returning in a register. */
4026 /* ... Floats are not allowed */
4027 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4030 /* ... Aggregates that are not themselves valid for returning in
4031 a register are not allowed. */
4032 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4035 /* Now check the remaining fields, if any. Only bitfields are allowed,
4036 since they are not addressable. */
4037 for (field = DECL_CHAIN (field);
4039 field = DECL_CHAIN (field))
4041 if (TREE_CODE (field) != FIELD_DECL)
4044 if (!DECL_BIT_FIELD_TYPE (field))
4051 if (TREE_CODE (type) == UNION_TYPE)
4055 /* Unions can be returned in registers if every element is
4056 integral, or can be returned in an integer register. */
4057 for (field = TYPE_FIELDS (type);
4059 field = DECL_CHAIN (field))
4061 if (TREE_CODE (field) != FIELD_DECL)
4064 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4067 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4073 #endif /* not ARM_WINCE */
4075 /* Return all other types in memory. */
4079 const struct pcs_attribute_arg
4083 } pcs_attribute_args[] =
4085 {"aapcs", ARM_PCS_AAPCS},
4086 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4088 /* We could recognize these, but changes would be needed elsewhere
4089 * to implement them. */
4090 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4091 {"atpcs", ARM_PCS_ATPCS},
4092 {"apcs", ARM_PCS_APCS},
4094 {NULL, ARM_PCS_UNKNOWN}
4098 arm_pcs_from_attribute (tree attr)
4100 const struct pcs_attribute_arg *ptr;
4103 /* Get the value of the argument. */
4104 if (TREE_VALUE (attr) == NULL_TREE
4105 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4106 return ARM_PCS_UNKNOWN;
4108 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4110 /* Check it against the list of known arguments. */
4111 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4112 if (streq (arg, ptr->arg))
4115 /* An unrecognized interrupt type. */
4116 return ARM_PCS_UNKNOWN;
4119 /* Get the PCS variant to use for this call. TYPE is the function's type
4120 specification, DECL is the specific declartion. DECL may be null if
4121 the call could be indirect or if this is a library call. */
4123 arm_get_pcs_model (const_tree type, const_tree decl)
4125 bool user_convention = false;
4126 enum arm_pcs user_pcs = arm_pcs_default;
4131 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4134 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4135 user_convention = true;
4138 if (TARGET_AAPCS_BASED)
4140 /* Detect varargs functions. These always use the base rules
4141 (no argument is ever a candidate for a co-processor
4143 bool base_rules = stdarg_p (type);
4145 if (user_convention)
4147 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4148 sorry ("non-AAPCS derived PCS variant");
4149 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4150 error ("variadic functions must use the base AAPCS variant");
4154 return ARM_PCS_AAPCS;
4155 else if (user_convention)
4157 else if (decl && flag_unit_at_a_time)
4159 /* Local functions never leak outside this compilation unit,
4160 so we are free to use whatever conventions are
4162 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4163 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4165 return ARM_PCS_AAPCS_LOCAL;
4168 else if (user_convention && user_pcs != arm_pcs_default)
4169 sorry ("PCS variant");
4171 /* For everything else we use the target's default. */
4172 return arm_pcs_default;
4177 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4178 const_tree fntype ATTRIBUTE_UNUSED,
4179 rtx libcall ATTRIBUTE_UNUSED,
4180 const_tree fndecl ATTRIBUTE_UNUSED)
4182 /* Record the unallocated VFP registers. */
4183 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4184 pcum->aapcs_vfp_reg_alloc = 0;
4187 /* Walk down the type tree of TYPE counting consecutive base elements.
4188 If *MODEP is VOIDmode, then set it to the first valid floating point
4189 type. If a non-floating point type is found, or if a floating point
4190 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4191 otherwise return the count in the sub-tree. */
4193 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4195 enum machine_mode mode;
4198 switch (TREE_CODE (type))
4201 mode = TYPE_MODE (type);
4202 if (mode != DFmode && mode != SFmode)
4205 if (*modep == VOIDmode)
4214 mode = TYPE_MODE (TREE_TYPE (type));
4215 if (mode != DFmode && mode != SFmode)
4218 if (*modep == VOIDmode)
4227 /* Use V2SImode and V4SImode as representatives of all 64-bit
4228 and 128-bit vector types, whether or not those modes are
4229 supported with the present options. */
4230 size = int_size_in_bytes (type);
4243 if (*modep == VOIDmode)
4246 /* Vector modes are considered to be opaque: two vectors are
4247 equivalent for the purposes of being homogeneous aggregates
4248 if they are the same size. */
4257 tree index = TYPE_DOMAIN (type);
4259 /* Can't handle incomplete types. */
4260 if (!COMPLETE_TYPE_P (type))
4263 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4266 || !TYPE_MAX_VALUE (index)
4267 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4268 || !TYPE_MIN_VALUE (index)
4269 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4273 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4274 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4276 /* There must be no padding. */
4277 if (!host_integerp (TYPE_SIZE (type), 1)
4278 || (tree_low_cst (TYPE_SIZE (type), 1)
4279 != count * GET_MODE_BITSIZE (*modep)))
4291 /* Can't handle incomplete types. */
4292 if (!COMPLETE_TYPE_P (type))
4295 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4297 if (TREE_CODE (field) != FIELD_DECL)
4300 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4306 /* There must be no padding. */
4307 if (!host_integerp (TYPE_SIZE (type), 1)
4308 || (tree_low_cst (TYPE_SIZE (type), 1)
4309 != count * GET_MODE_BITSIZE (*modep)))
4316 case QUAL_UNION_TYPE:
4318 /* These aren't very interesting except in a degenerate case. */
4323 /* Can't handle incomplete types. */
4324 if (!COMPLETE_TYPE_P (type))
4327 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4329 if (TREE_CODE (field) != FIELD_DECL)
4332 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4335 count = count > sub_count ? count : sub_count;
4338 /* There must be no padding. */
4339 if (!host_integerp (TYPE_SIZE (type), 1)
4340 || (tree_low_cst (TYPE_SIZE (type), 1)
4341 != count * GET_MODE_BITSIZE (*modep)))
4354 /* Return true if PCS_VARIANT should use VFP registers. */
4356 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4358 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4360 static bool seen_thumb1_vfp = false;
4362 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4364 sorry ("Thumb-1 hard-float VFP ABI");
4365 /* sorry() is not immediately fatal, so only display this once. */
4366 seen_thumb1_vfp = true;
4372 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4375 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4376 (TARGET_VFP_DOUBLE || !is_double));
4379 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4380 suitable for passing or returning in VFP registers for the PCS
4381 variant selected. If it is, then *BASE_MODE is updated to contain
4382 a machine mode describing each element of the argument's type and
4383 *COUNT to hold the number of such elements. */
4385 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4386 enum machine_mode mode, const_tree type,
4387 enum machine_mode *base_mode, int *count)
4389 enum machine_mode new_mode = VOIDmode;
4391 /* If we have the type information, prefer that to working things
4392 out from the mode. */
4395 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4397 if (ag_count > 0 && ag_count <= 4)
4402 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4403 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4404 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4409 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4412 new_mode = (mode == DCmode ? DFmode : SFmode);
4418 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4421 *base_mode = new_mode;
4426 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4427 enum machine_mode mode, const_tree type)
4429 int count ATTRIBUTE_UNUSED;
4430 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4432 if (!use_vfp_abi (pcs_variant, false))
4434 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4439 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4442 if (!use_vfp_abi (pcum->pcs_variant, false))
4445 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4446 &pcum->aapcs_vfp_rmode,
4447 &pcum->aapcs_vfp_rcount);
4451 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4452 const_tree type ATTRIBUTE_UNUSED)
4454 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4455 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4458 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4459 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4461 pcum->aapcs_vfp_reg_alloc = mask << regno;
4462 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4465 int rcount = pcum->aapcs_vfp_rcount;
4467 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4471 /* Avoid using unsupported vector modes. */
4472 if (rmode == V2SImode)
4474 else if (rmode == V4SImode)
4481 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4482 for (i = 0; i < rcount; i++)
4484 rtx tmp = gen_rtx_REG (rmode,
4485 FIRST_VFP_REGNUM + regno + i * rshift);
4486 tmp = gen_rtx_EXPR_LIST
4488 GEN_INT (i * GET_MODE_SIZE (rmode)));
4489 XVECEXP (par, 0, i) = tmp;
4492 pcum->aapcs_reg = par;
4495 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4502 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4503 enum machine_mode mode,
4504 const_tree type ATTRIBUTE_UNUSED)
4506 if (!use_vfp_abi (pcs_variant, false))
4509 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4512 enum machine_mode ag_mode;
4517 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4522 if (ag_mode == V2SImode)
4524 else if (ag_mode == V4SImode)
4530 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4531 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4532 for (i = 0; i < count; i++)
4534 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4535 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4536 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4537 XVECEXP (par, 0, i) = tmp;
4543 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4547 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4548 enum machine_mode mode ATTRIBUTE_UNUSED,
4549 const_tree type ATTRIBUTE_UNUSED)
4551 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4552 pcum->aapcs_vfp_reg_alloc = 0;
4556 #define AAPCS_CP(X) \
4558 aapcs_ ## X ## _cum_init, \
4559 aapcs_ ## X ## _is_call_candidate, \
4560 aapcs_ ## X ## _allocate, \
4561 aapcs_ ## X ## _is_return_candidate, \
4562 aapcs_ ## X ## _allocate_return_reg, \
4563 aapcs_ ## X ## _advance \
4566 /* Table of co-processors that can be used to pass arguments in
4567 registers. Idealy no arugment should be a candidate for more than
4568 one co-processor table entry, but the table is processed in order
4569 and stops after the first match. If that entry then fails to put
4570 the argument into a co-processor register, the argument will go on
4574 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4575 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4577 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4578 BLKmode) is a candidate for this co-processor's registers; this
4579 function should ignore any position-dependent state in
4580 CUMULATIVE_ARGS and only use call-type dependent information. */
4581 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4583 /* Return true if the argument does get a co-processor register; it
4584 should set aapcs_reg to an RTX of the register allocated as is
4585 required for a return from FUNCTION_ARG. */
4586 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4588 /* Return true if a result of mode MODE (or type TYPE if MODE is
4589 BLKmode) is can be returned in this co-processor's registers. */
4590 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4592 /* Allocate and return an RTX element to hold the return type of a
4593 call, this routine must not fail and will only be called if
4594 is_return_candidate returned true with the same parameters. */
4595 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4597 /* Finish processing this argument and prepare to start processing
4599 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4600 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4608 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4613 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4614 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4621 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4623 /* We aren't passed a decl, so we can't check that a call is local.
4624 However, it isn't clear that that would be a win anyway, since it
4625 might limit some tail-calling opportunities. */
4626 enum arm_pcs pcs_variant;
4630 const_tree fndecl = NULL_TREE;
4632 if (TREE_CODE (fntype) == FUNCTION_DECL)
4635 fntype = TREE_TYPE (fntype);
4638 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4641 pcs_variant = arm_pcs_default;
4643 if (pcs_variant != ARM_PCS_AAPCS)
4647 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4648 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4657 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4660 /* We aren't passed a decl, so we can't check that a call is local.
4661 However, it isn't clear that that would be a win anyway, since it
4662 might limit some tail-calling opportunities. */
4663 enum arm_pcs pcs_variant;
4664 int unsignedp ATTRIBUTE_UNUSED;
4668 const_tree fndecl = NULL_TREE;
4670 if (TREE_CODE (fntype) == FUNCTION_DECL)
4673 fntype = TREE_TYPE (fntype);
4676 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4679 pcs_variant = arm_pcs_default;
4681 /* Promote integer types. */
4682 if (type && INTEGRAL_TYPE_P (type))
4683 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4685 if (pcs_variant != ARM_PCS_AAPCS)
4689 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4690 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4692 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4696 /* Promotes small structs returned in a register to full-word size
4697 for big-endian AAPCS. */
4698 if (type && arm_return_in_msb (type))
4700 HOST_WIDE_INT size = int_size_in_bytes (type);
4701 if (size % UNITS_PER_WORD != 0)
4703 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4704 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4708 return gen_rtx_REG (mode, R0_REGNUM);
4712 aapcs_libcall_value (enum machine_mode mode)
4714 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4715 && GET_MODE_SIZE (mode) <= 4)
4718 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4721 /* Lay out a function argument using the AAPCS rules. The rule
4722 numbers referred to here are those in the AAPCS. */
4724 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4725 const_tree type, bool named)
4730 /* We only need to do this once per argument. */
4731 if (pcum->aapcs_arg_processed)
4734 pcum->aapcs_arg_processed = true;
4736 /* Special case: if named is false then we are handling an incoming
4737 anonymous argument which is on the stack. */
4741 /* Is this a potential co-processor register candidate? */
4742 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4744 int slot = aapcs_select_call_coproc (pcum, mode, type);
4745 pcum->aapcs_cprc_slot = slot;
4747 /* We don't have to apply any of the rules from part B of the
4748 preparation phase, these are handled elsewhere in the
4753 /* A Co-processor register candidate goes either in its own
4754 class of registers or on the stack. */
4755 if (!pcum->aapcs_cprc_failed[slot])
4757 /* C1.cp - Try to allocate the argument to co-processor
4759 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4762 /* C2.cp - Put the argument on the stack and note that we
4763 can't assign any more candidates in this slot. We also
4764 need to note that we have allocated stack space, so that
4765 we won't later try to split a non-cprc candidate between
4766 core registers and the stack. */
4767 pcum->aapcs_cprc_failed[slot] = true;
4768 pcum->can_split = false;
4771 /* We didn't get a register, so this argument goes on the
4773 gcc_assert (pcum->can_split == false);
4778 /* C3 - For double-word aligned arguments, round the NCRN up to the
4779 next even number. */
4780 ncrn = pcum->aapcs_ncrn;
4781 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4784 nregs = ARM_NUM_REGS2(mode, type);
4786 /* Sigh, this test should really assert that nregs > 0, but a GCC
4787 extension allows empty structs and then gives them empty size; it
4788 then allows such a structure to be passed by value. For some of
4789 the code below we have to pretend that such an argument has
4790 non-zero size so that we 'locate' it correctly either in
4791 registers or on the stack. */
4792 gcc_assert (nregs >= 0);
4794 nregs2 = nregs ? nregs : 1;
4796 /* C4 - Argument fits entirely in core registers. */
4797 if (ncrn + nregs2 <= NUM_ARG_REGS)
4799 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4800 pcum->aapcs_next_ncrn = ncrn + nregs;
4804 /* C5 - Some core registers left and there are no arguments already
4805 on the stack: split this argument between the remaining core
4806 registers and the stack. */
4807 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4809 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4810 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4811 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4815 /* C6 - NCRN is set to 4. */
4816 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4818 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4822 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4823 for a call to a function whose data type is FNTYPE.
4824 For a library call, FNTYPE is NULL. */
4826 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4828 tree fndecl ATTRIBUTE_UNUSED)
4830 /* Long call handling. */
4832 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4834 pcum->pcs_variant = arm_pcs_default;
4836 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4838 if (arm_libcall_uses_aapcs_base (libname))
4839 pcum->pcs_variant = ARM_PCS_AAPCS;
4841 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4842 pcum->aapcs_reg = NULL_RTX;
4843 pcum->aapcs_partial = 0;
4844 pcum->aapcs_arg_processed = false;
4845 pcum->aapcs_cprc_slot = -1;
4846 pcum->can_split = true;
4848 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4852 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4854 pcum->aapcs_cprc_failed[i] = false;
4855 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4863 /* On the ARM, the offset starts at 0. */
4865 pcum->iwmmxt_nregs = 0;
4866 pcum->can_split = true;
4868 /* Varargs vectors are treated the same as long long.
4869 named_count avoids having to change the way arm handles 'named' */
4870 pcum->named_count = 0;
4873 if (TARGET_REALLY_IWMMXT && fntype)
4877 for (fn_arg = TYPE_ARG_TYPES (fntype);
4879 fn_arg = TREE_CHAIN (fn_arg))
4880 pcum->named_count += 1;
4882 if (! pcum->named_count)
4883 pcum->named_count = INT_MAX;
4888 /* Return true if mode/type need doubleword alignment. */
4890 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4892 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4893 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4897 /* Determine where to put an argument to a function.
4898 Value is zero to push the argument on the stack,
4899 or a hard register in which to store the argument.
4901 MODE is the argument's machine mode.
4902 TYPE is the data type of the argument (as a tree).
4903 This is null for libcalls where that information may
4905 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4906 the preceding args and about the function being called.
4907 NAMED is nonzero if this argument is a named parameter
4908 (otherwise it is an extra parameter matching an ellipsis).
4910 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4911 other arguments are passed on the stack. If (NAMED == 0) (which happens
4912 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4913 defined), say it is passed in the stack (function_prologue will
4914 indeed make it pass in the stack if necessary). */
4917 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4918 const_tree type, bool named)
4920 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4923 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4924 a call insn (op3 of a call_value insn). */
4925 if (mode == VOIDmode)
4928 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4930 aapcs_layout_arg (pcum, mode, type, named);
4931 return pcum->aapcs_reg;
4934 /* Varargs vectors are treated the same as long long.
4935 named_count avoids having to change the way arm handles 'named' */
4936 if (TARGET_IWMMXT_ABI
4937 && arm_vector_mode_supported_p (mode)
4938 && pcum->named_count > pcum->nargs + 1)
4940 if (pcum->iwmmxt_nregs <= 9)
4941 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4944 pcum->can_split = false;
4949 /* Put doubleword aligned quantities in even register pairs. */
4951 && ARM_DOUBLEWORD_ALIGN
4952 && arm_needs_doubleword_align (mode, type))
4955 /* Only allow splitting an arg between regs and memory if all preceding
4956 args were allocated to regs. For args passed by reference we only count
4957 the reference pointer. */
4958 if (pcum->can_split)
4961 nregs = ARM_NUM_REGS2 (mode, type);
4963 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4966 return gen_rtx_REG (mode, pcum->nregs);
4970 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4972 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4973 ? DOUBLEWORD_ALIGNMENT
4978 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4979 tree type, bool named)
4981 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4982 int nregs = pcum->nregs;
4984 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4986 aapcs_layout_arg (pcum, mode, type, named);
4987 return pcum->aapcs_partial;
4990 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4993 if (NUM_ARG_REGS > nregs
4994 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4996 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5001 /* Update the data in PCUM to advance over an argument
5002 of mode MODE and data type TYPE.
5003 (TYPE is null for libcalls where that information may not be available.) */
5006 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5007 const_tree type, bool named)
5009 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5011 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5013 aapcs_layout_arg (pcum, mode, type, named);
5015 if (pcum->aapcs_cprc_slot >= 0)
5017 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5019 pcum->aapcs_cprc_slot = -1;
5022 /* Generic stuff. */
5023 pcum->aapcs_arg_processed = false;
5024 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5025 pcum->aapcs_reg = NULL_RTX;
5026 pcum->aapcs_partial = 0;
5031 if (arm_vector_mode_supported_p (mode)
5032 && pcum->named_count > pcum->nargs
5033 && TARGET_IWMMXT_ABI)
5034 pcum->iwmmxt_nregs += 1;
5036 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5040 /* Variable sized types are passed by reference. This is a GCC
5041 extension to the ARM ABI. */
5044 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5045 enum machine_mode mode ATTRIBUTE_UNUSED,
5046 const_tree type, bool named ATTRIBUTE_UNUSED)
5048 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5051 /* Encode the current state of the #pragma [no_]long_calls. */
5054 OFF, /* No #pragma [no_]long_calls is in effect. */
5055 LONG, /* #pragma long_calls is in effect. */
5056 SHORT /* #pragma no_long_calls is in effect. */
5059 static arm_pragma_enum arm_pragma_long_calls = OFF;
5062 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5064 arm_pragma_long_calls = LONG;
5068 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5070 arm_pragma_long_calls = SHORT;
5074 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5076 arm_pragma_long_calls = OFF;
5079 /* Handle an attribute requiring a FUNCTION_DECL;
5080 arguments as in struct attribute_spec.handler. */
5082 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5083 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5085 if (TREE_CODE (*node) != FUNCTION_DECL)
5087 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5089 *no_add_attrs = true;
5095 /* Handle an "interrupt" or "isr" attribute;
5096 arguments as in struct attribute_spec.handler. */
5098 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5103 if (TREE_CODE (*node) != FUNCTION_DECL)
5105 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5107 *no_add_attrs = true;
5109 /* FIXME: the argument if any is checked for type attributes;
5110 should it be checked for decl ones? */
5114 if (TREE_CODE (*node) == FUNCTION_TYPE
5115 || TREE_CODE (*node) == METHOD_TYPE)
5117 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5119 warning (OPT_Wattributes, "%qE attribute ignored",
5121 *no_add_attrs = true;
5124 else if (TREE_CODE (*node) == POINTER_TYPE
5125 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5126 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5127 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5129 *node = build_variant_type_copy (*node);
5130 TREE_TYPE (*node) = build_type_attribute_variant
5132 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5133 *no_add_attrs = true;
5137 /* Possibly pass this attribute on from the type to a decl. */
5138 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5139 | (int) ATTR_FLAG_FUNCTION_NEXT
5140 | (int) ATTR_FLAG_ARRAY_NEXT))
5142 *no_add_attrs = true;
5143 return tree_cons (name, args, NULL_TREE);
5147 warning (OPT_Wattributes, "%qE attribute ignored",
5156 /* Handle a "pcs" attribute; arguments as in struct
5157 attribute_spec.handler. */
5159 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5160 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5162 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5164 warning (OPT_Wattributes, "%qE attribute ignored", name);
5165 *no_add_attrs = true;
5170 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5171 /* Handle the "notshared" attribute. This attribute is another way of
5172 requesting hidden visibility. ARM's compiler supports
5173 "__declspec(notshared)"; we support the same thing via an
5177 arm_handle_notshared_attribute (tree *node,
5178 tree name ATTRIBUTE_UNUSED,
5179 tree args ATTRIBUTE_UNUSED,
5180 int flags ATTRIBUTE_UNUSED,
5183 tree decl = TYPE_NAME (*node);
5187 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5188 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5189 *no_add_attrs = false;
5195 /* Return 0 if the attributes for two types are incompatible, 1 if they
5196 are compatible, and 2 if they are nearly compatible (which causes a
5197 warning to be generated). */
5199 arm_comp_type_attributes (const_tree type1, const_tree type2)
5203 /* Check for mismatch of non-default calling convention. */
5204 if (TREE_CODE (type1) != FUNCTION_TYPE)
5207 /* Check for mismatched call attributes. */
5208 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5209 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5210 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5211 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5213 /* Only bother to check if an attribute is defined. */
5214 if (l1 | l2 | s1 | s2)
5216 /* If one type has an attribute, the other must have the same attribute. */
5217 if ((l1 != l2) || (s1 != s2))
5220 /* Disallow mixed attributes. */
5221 if ((l1 & s2) || (l2 & s1))
5225 /* Check for mismatched ISR attribute. */
5226 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5228 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5229 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5231 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5238 /* Assigns default attributes to newly defined type. This is used to
5239 set short_call/long_call attributes for function types of
5240 functions defined inside corresponding #pragma scopes. */
5242 arm_set_default_type_attributes (tree type)
5244 /* Add __attribute__ ((long_call)) to all functions, when
5245 inside #pragma long_calls or __attribute__ ((short_call)),
5246 when inside #pragma no_long_calls. */
5247 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5249 tree type_attr_list, attr_name;
5250 type_attr_list = TYPE_ATTRIBUTES (type);
5252 if (arm_pragma_long_calls == LONG)
5253 attr_name = get_identifier ("long_call");
5254 else if (arm_pragma_long_calls == SHORT)
5255 attr_name = get_identifier ("short_call");
5259 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5260 TYPE_ATTRIBUTES (type) = type_attr_list;
5264 /* Return true if DECL is known to be linked into section SECTION. */
5267 arm_function_in_section_p (tree decl, section *section)
5269 /* We can only be certain about functions defined in the same
5270 compilation unit. */
5271 if (!TREE_STATIC (decl))
5274 /* Make sure that SYMBOL always binds to the definition in this
5275 compilation unit. */
5276 if (!targetm.binds_local_p (decl))
5279 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5280 if (!DECL_SECTION_NAME (decl))
5282 /* Make sure that we will not create a unique section for DECL. */
5283 if (flag_function_sections || DECL_ONE_ONLY (decl))
5287 return function_section (decl) == section;
5290 /* Return nonzero if a 32-bit "long_call" should be generated for
5291 a call from the current function to DECL. We generate a long_call
5294 a. has an __attribute__((long call))
5295 or b. is within the scope of a #pragma long_calls
5296 or c. the -mlong-calls command line switch has been specified
5298 However we do not generate a long call if the function:
5300 d. has an __attribute__ ((short_call))
5301 or e. is inside the scope of a #pragma no_long_calls
5302 or f. is defined in the same section as the current function. */
5305 arm_is_long_call_p (tree decl)
5310 return TARGET_LONG_CALLS;
5312 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5313 if (lookup_attribute ("short_call", attrs))
5316 /* For "f", be conservative, and only cater for cases in which the
5317 whole of the current function is placed in the same section. */
5318 if (!flag_reorder_blocks_and_partition
5319 && TREE_CODE (decl) == FUNCTION_DECL
5320 && arm_function_in_section_p (decl, current_function_section ()))
5323 if (lookup_attribute ("long_call", attrs))
5326 return TARGET_LONG_CALLS;
5329 /* Return nonzero if it is ok to make a tail-call to DECL. */
5331 arm_function_ok_for_sibcall (tree decl, tree exp)
5333 unsigned long func_type;
5335 if (cfun->machine->sibcall_blocked)
5338 /* Never tailcall something for which we have no decl, or if we
5339 are generating code for Thumb-1. */
5340 if (decl == NULL || TARGET_THUMB1)
5343 /* The PIC register is live on entry to VxWorks PLT entries, so we
5344 must make the call before restoring the PIC register. */
5345 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5348 /* Cannot tail-call to long calls, since these are out of range of
5349 a branch instruction. */
5350 if (arm_is_long_call_p (decl))
5353 /* If we are interworking and the function is not declared static
5354 then we can't tail-call it unless we know that it exists in this
5355 compilation unit (since it might be a Thumb routine). */
5356 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5359 func_type = arm_current_func_type ();
5360 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5361 if (IS_INTERRUPT (func_type))
5364 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5366 /* Check that the return value locations are the same. For
5367 example that we aren't returning a value from the sibling in
5368 a VFP register but then need to transfer it to a core
5372 a = arm_function_value (TREE_TYPE (exp), decl, false);
5373 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5375 if (!rtx_equal_p (a, b))
5379 /* Never tailcall if function may be called with a misaligned SP. */
5380 if (IS_STACKALIGN (func_type))
5383 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5384 references should become a NOP. Don't convert such calls into
5386 if (TARGET_AAPCS_BASED
5387 && arm_abi == ARM_ABI_AAPCS
5388 && DECL_WEAK (decl))
5391 /* Everything else is ok. */
5396 /* Addressing mode support functions. */
5398 /* Return nonzero if X is a legitimate immediate operand when compiling
5399 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5401 legitimate_pic_operand_p (rtx x)
5403 if (GET_CODE (x) == SYMBOL_REF
5404 || (GET_CODE (x) == CONST
5405 && GET_CODE (XEXP (x, 0)) == PLUS
5406 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5412 /* Record that the current function needs a PIC register. Initialize
5413 cfun->machine->pic_reg if we have not already done so. */
5416 require_pic_register (void)
5418 /* A lot of the logic here is made obscure by the fact that this
5419 routine gets called as part of the rtx cost estimation process.
5420 We don't want those calls to affect any assumptions about the real
5421 function; and further, we can't call entry_of_function() until we
5422 start the real expansion process. */
5423 if (!crtl->uses_pic_offset_table)
5425 gcc_assert (can_create_pseudo_p ());
5426 if (arm_pic_register != INVALID_REGNUM)
5428 if (!cfun->machine->pic_reg)
5429 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5431 /* Play games to avoid marking the function as needing pic
5432 if we are being called as part of the cost-estimation
5434 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5435 crtl->uses_pic_offset_table = 1;
5441 if (!cfun->machine->pic_reg)
5442 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5444 /* Play games to avoid marking the function as needing pic
5445 if we are being called as part of the cost-estimation
5447 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5449 crtl->uses_pic_offset_table = 1;
5452 arm_load_pic_register (0UL);
5457 for (insn = seq; insn; insn = NEXT_INSN (insn))
5459 INSN_LOCATION (insn) = prologue_location;
5461 /* We can be called during expansion of PHI nodes, where
5462 we can't yet emit instructions directly in the final
5463 insn stream. Queue the insns on the entry edge, they will
5464 be committed after everything else is expanded. */
5465 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5472 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5474 if (GET_CODE (orig) == SYMBOL_REF
5475 || GET_CODE (orig) == LABEL_REF)
5481 gcc_assert (can_create_pseudo_p ());
5482 reg = gen_reg_rtx (Pmode);
5485 /* VxWorks does not impose a fixed gap between segments; the run-time
5486 gap can be different from the object-file gap. We therefore can't
5487 use GOTOFF unless we are absolutely sure that the symbol is in the
5488 same segment as the GOT. Unfortunately, the flexibility of linker
5489 scripts means that we can't be sure of that in general, so assume
5490 that GOTOFF is never valid on VxWorks. */
5491 if ((GET_CODE (orig) == LABEL_REF
5492 || (GET_CODE (orig) == SYMBOL_REF &&
5493 SYMBOL_REF_LOCAL_P (orig)))
5495 && !TARGET_VXWORKS_RTP)
5496 insn = arm_pic_static_addr (orig, reg);
5502 /* If this function doesn't have a pic register, create one now. */
5503 require_pic_register ();
5505 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5507 /* Make the MEM as close to a constant as possible. */
5508 mem = SET_SRC (pat);
5509 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5510 MEM_READONLY_P (mem) = 1;
5511 MEM_NOTRAP_P (mem) = 1;
5513 insn = emit_insn (pat);
5516 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5518 set_unique_reg_note (insn, REG_EQUAL, orig);
5522 else if (GET_CODE (orig) == CONST)
5526 if (GET_CODE (XEXP (orig, 0)) == PLUS
5527 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5530 /* Handle the case where we have: const (UNSPEC_TLS). */
5531 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5532 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5535 /* Handle the case where we have:
5536 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5538 if (GET_CODE (XEXP (orig, 0)) == PLUS
5539 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5540 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5542 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5548 gcc_assert (can_create_pseudo_p ());
5549 reg = gen_reg_rtx (Pmode);
5552 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5554 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5555 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5556 base == reg ? 0 : reg);
5558 if (CONST_INT_P (offset))
5560 /* The base register doesn't really matter, we only want to
5561 test the index for the appropriate mode. */
5562 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5564 gcc_assert (can_create_pseudo_p ());
5565 offset = force_reg (Pmode, offset);
5568 if (CONST_INT_P (offset))
5569 return plus_constant (Pmode, base, INTVAL (offset));
5572 if (GET_MODE_SIZE (mode) > 4
5573 && (GET_MODE_CLASS (mode) == MODE_INT
5574 || TARGET_SOFT_FLOAT))
5576 emit_insn (gen_addsi3 (reg, base, offset));
5580 return gen_rtx_PLUS (Pmode, base, offset);
5587 /* Find a spare register to use during the prolog of a function. */
5590 thumb_find_work_register (unsigned long pushed_regs_mask)
5594 /* Check the argument registers first as these are call-used. The
5595 register allocation order means that sometimes r3 might be used
5596 but earlier argument registers might not, so check them all. */
5597 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5598 if (!df_regs_ever_live_p (reg))
5601 /* Before going on to check the call-saved registers we can try a couple
5602 more ways of deducing that r3 is available. The first is when we are
5603 pushing anonymous arguments onto the stack and we have less than 4
5604 registers worth of fixed arguments(*). In this case r3 will be part of
5605 the variable argument list and so we can be sure that it will be
5606 pushed right at the start of the function. Hence it will be available
5607 for the rest of the prologue.
5608 (*): ie crtl->args.pretend_args_size is greater than 0. */
5609 if (cfun->machine->uses_anonymous_args
5610 && crtl->args.pretend_args_size > 0)
5611 return LAST_ARG_REGNUM;
5613 /* The other case is when we have fixed arguments but less than 4 registers
5614 worth. In this case r3 might be used in the body of the function, but
5615 it is not being used to convey an argument into the function. In theory
5616 we could just check crtl->args.size to see how many bytes are
5617 being passed in argument registers, but it seems that it is unreliable.
5618 Sometimes it will have the value 0 when in fact arguments are being
5619 passed. (See testcase execute/20021111-1.c for an example). So we also
5620 check the args_info.nregs field as well. The problem with this field is
5621 that it makes no allowances for arguments that are passed to the
5622 function but which are not used. Hence we could miss an opportunity
5623 when a function has an unused argument in r3. But it is better to be
5624 safe than to be sorry. */
5625 if (! cfun->machine->uses_anonymous_args
5626 && crtl->args.size >= 0
5627 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5628 && (TARGET_AAPCS_BASED
5629 ? crtl->args.info.aapcs_ncrn < 4
5630 : crtl->args.info.nregs < 4))
5631 return LAST_ARG_REGNUM;
5633 /* Otherwise look for a call-saved register that is going to be pushed. */
5634 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5635 if (pushed_regs_mask & (1 << reg))
5640 /* Thumb-2 can use high regs. */
5641 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5642 if (pushed_regs_mask & (1 << reg))
5645 /* Something went wrong - thumb_compute_save_reg_mask()
5646 should have arranged for a suitable register to be pushed. */
5650 static GTY(()) int pic_labelno;
5652 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5656 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5658 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5660 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5663 gcc_assert (flag_pic);
5665 pic_reg = cfun->machine->pic_reg;
5666 if (TARGET_VXWORKS_RTP)
5668 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5669 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5670 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5672 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5674 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5675 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5679 /* We use an UNSPEC rather than a LABEL_REF because this label
5680 never appears in the code stream. */
5682 labelno = GEN_INT (pic_labelno++);
5683 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5684 l1 = gen_rtx_CONST (VOIDmode, l1);
5686 /* On the ARM the PC register contains 'dot + 8' at the time of the
5687 addition, on the Thumb it is 'dot + 4'. */
5688 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5689 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5691 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5695 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5697 else /* TARGET_THUMB1 */
5699 if (arm_pic_register != INVALID_REGNUM
5700 && REGNO (pic_reg) > LAST_LO_REGNUM)
5702 /* We will have pushed the pic register, so we should always be
5703 able to find a work register. */
5704 pic_tmp = gen_rtx_REG (SImode,
5705 thumb_find_work_register (saved_regs));
5706 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5707 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5708 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5711 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5715 /* Need to emit this whether or not we obey regdecls,
5716 since setjmp/longjmp can cause life info to screw up. */
5720 /* Generate code to load the address of a static var when flag_pic is set. */
5722 arm_pic_static_addr (rtx orig, rtx reg)
5724 rtx l1, labelno, offset_rtx, insn;
5726 gcc_assert (flag_pic);
5728 /* We use an UNSPEC rather than a LABEL_REF because this label
5729 never appears in the code stream. */
5730 labelno = GEN_INT (pic_labelno++);
5731 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5732 l1 = gen_rtx_CONST (VOIDmode, l1);
5734 /* On the ARM the PC register contains 'dot + 8' at the time of the
5735 addition, on the Thumb it is 'dot + 4'. */
5736 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5737 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5738 UNSPEC_SYMBOL_OFFSET);
5739 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5741 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5745 /* Return nonzero if X is valid as an ARM state addressing register. */
5747 arm_address_register_rtx_p (rtx x, int strict_p)
5757 return ARM_REGNO_OK_FOR_BASE_P (regno);
5759 return (regno <= LAST_ARM_REGNUM
5760 || regno >= FIRST_PSEUDO_REGISTER
5761 || regno == FRAME_POINTER_REGNUM
5762 || regno == ARG_POINTER_REGNUM);
5765 /* Return TRUE if this rtx is the difference of a symbol and a label,
5766 and will reduce to a PC-relative relocation in the object file.
5767 Expressions like this can be left alone when generating PIC, rather
5768 than forced through the GOT. */
5770 pcrel_constant_p (rtx x)
5772 if (GET_CODE (x) == MINUS)
5773 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5778 /* Return true if X will surely end up in an index register after next
5781 will_be_in_index_register (const_rtx x)
5783 /* arm.md: calculate_pic_address will split this into a register. */
5784 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5787 /* Return nonzero if X is a valid ARM state address operand. */
5789 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5793 enum rtx_code code = GET_CODE (x);
5795 if (arm_address_register_rtx_p (x, strict_p))
5798 use_ldrd = (TARGET_LDRD
5800 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5802 if (code == POST_INC || code == PRE_DEC
5803 || ((code == PRE_INC || code == POST_DEC)
5804 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5805 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5807 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5808 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5809 && GET_CODE (XEXP (x, 1)) == PLUS
5810 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5812 rtx addend = XEXP (XEXP (x, 1), 1);
5814 /* Don't allow ldrd post increment by register because it's hard
5815 to fixup invalid register choices. */
5817 && GET_CODE (x) == POST_MODIFY
5821 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5822 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5825 /* After reload constants split into minipools will have addresses
5826 from a LABEL_REF. */
5827 else if (reload_completed
5828 && (code == LABEL_REF
5830 && GET_CODE (XEXP (x, 0)) == PLUS
5831 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5832 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5835 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5838 else if (code == PLUS)
5840 rtx xop0 = XEXP (x, 0);
5841 rtx xop1 = XEXP (x, 1);
5843 return ((arm_address_register_rtx_p (xop0, strict_p)
5844 && ((CONST_INT_P (xop1)
5845 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5846 || (!strict_p && will_be_in_index_register (xop1))))
5847 || (arm_address_register_rtx_p (xop1, strict_p)
5848 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5852 /* Reload currently can't handle MINUS, so disable this for now */
5853 else if (GET_CODE (x) == MINUS)
5855 rtx xop0 = XEXP (x, 0);
5856 rtx xop1 = XEXP (x, 1);
5858 return (arm_address_register_rtx_p (xop0, strict_p)
5859 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5863 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5864 && code == SYMBOL_REF
5865 && CONSTANT_POOL_ADDRESS_P (x)
5867 && symbol_mentioned_p (get_pool_constant (x))
5868 && ! pcrel_constant_p (get_pool_constant (x))))
5874 /* Return nonzero if X is a valid Thumb-2 address operand. */
5876 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5879 enum rtx_code code = GET_CODE (x);
5881 if (arm_address_register_rtx_p (x, strict_p))
5884 use_ldrd = (TARGET_LDRD
5886 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5888 if (code == POST_INC || code == PRE_DEC
5889 || ((code == PRE_INC || code == POST_DEC)
5890 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5891 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5893 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5894 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5895 && GET_CODE (XEXP (x, 1)) == PLUS
5896 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5898 /* Thumb-2 only has autoincrement by constant. */
5899 rtx addend = XEXP (XEXP (x, 1), 1);
5900 HOST_WIDE_INT offset;
5902 if (!CONST_INT_P (addend))
5905 offset = INTVAL(addend);
5906 if (GET_MODE_SIZE (mode) <= 4)
5907 return (offset > -256 && offset < 256);
5909 return (use_ldrd && offset > -1024 && offset < 1024
5910 && (offset & 3) == 0);
5913 /* After reload constants split into minipools will have addresses
5914 from a LABEL_REF. */
5915 else if (reload_completed
5916 && (code == LABEL_REF
5918 && GET_CODE (XEXP (x, 0)) == PLUS
5919 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5920 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5923 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5926 else if (code == PLUS)
5928 rtx xop0 = XEXP (x, 0);
5929 rtx xop1 = XEXP (x, 1);
5931 return ((arm_address_register_rtx_p (xop0, strict_p)
5932 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5933 || (!strict_p && will_be_in_index_register (xop1))))
5934 || (arm_address_register_rtx_p (xop1, strict_p)
5935 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5938 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5939 && code == SYMBOL_REF
5940 && CONSTANT_POOL_ADDRESS_P (x)
5942 && symbol_mentioned_p (get_pool_constant (x))
5943 && ! pcrel_constant_p (get_pool_constant (x))))
5949 /* Return nonzero if INDEX is valid for an address index operand in
5952 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5955 HOST_WIDE_INT range;
5956 enum rtx_code code = GET_CODE (index);
5958 /* Standard coprocessor addressing modes. */
5959 if (TARGET_HARD_FLOAT
5961 && (mode == SFmode || mode == DFmode))
5962 return (code == CONST_INT && INTVAL (index) < 1024
5963 && INTVAL (index) > -1024
5964 && (INTVAL (index) & 3) == 0);
5966 /* For quad modes, we restrict the constant offset to be slightly less
5967 than what the instruction format permits. We do this because for
5968 quad mode moves, we will actually decompose them into two separate
5969 double-mode reads or writes. INDEX must therefore be a valid
5970 (double-mode) offset and so should INDEX+8. */
5971 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5972 return (code == CONST_INT
5973 && INTVAL (index) < 1016
5974 && INTVAL (index) > -1024
5975 && (INTVAL (index) & 3) == 0);
5977 /* We have no such constraint on double mode offsets, so we permit the
5978 full range of the instruction format. */
5979 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5980 return (code == CONST_INT
5981 && INTVAL (index) < 1024
5982 && INTVAL (index) > -1024
5983 && (INTVAL (index) & 3) == 0);
5985 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5986 return (code == CONST_INT
5987 && INTVAL (index) < 1024
5988 && INTVAL (index) > -1024
5989 && (INTVAL (index) & 3) == 0);
5991 if (arm_address_register_rtx_p (index, strict_p)
5992 && (GET_MODE_SIZE (mode) <= 4))
5995 if (mode == DImode || mode == DFmode)
5997 if (code == CONST_INT)
5999 HOST_WIDE_INT val = INTVAL (index);
6002 return val > -256 && val < 256;
6004 return val > -4096 && val < 4092;
6007 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6010 if (GET_MODE_SIZE (mode) <= 4
6014 || (mode == QImode && outer == SIGN_EXTEND))))
6018 rtx xiop0 = XEXP (index, 0);
6019 rtx xiop1 = XEXP (index, 1);
6021 return ((arm_address_register_rtx_p (xiop0, strict_p)
6022 && power_of_two_operand (xiop1, SImode))
6023 || (arm_address_register_rtx_p (xiop1, strict_p)
6024 && power_of_two_operand (xiop0, SImode)));
6026 else if (code == LSHIFTRT || code == ASHIFTRT
6027 || code == ASHIFT || code == ROTATERT)
6029 rtx op = XEXP (index, 1);
6031 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6034 && INTVAL (op) <= 31);
6038 /* For ARM v4 we may be doing a sign-extend operation during the
6044 || (outer == SIGN_EXTEND && mode == QImode))
6050 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6052 return (code == CONST_INT
6053 && INTVAL (index) < range
6054 && INTVAL (index) > -range);
6057 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6058 index operand. i.e. 1, 2, 4 or 8. */
6060 thumb2_index_mul_operand (rtx op)
6064 if (!CONST_INT_P (op))
6068 return (val == 1 || val == 2 || val == 4 || val == 8);
6071 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6073 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6075 enum rtx_code code = GET_CODE (index);
6077 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6078 /* Standard coprocessor addressing modes. */
6079 if (TARGET_HARD_FLOAT
6081 && (mode == SFmode || mode == DFmode))
6082 return (code == CONST_INT && INTVAL (index) < 1024
6083 /* Thumb-2 allows only > -256 index range for it's core register
6084 load/stores. Since we allow SF/DF in core registers, we have
6085 to use the intersection between -256~4096 (core) and -1024~1024
6087 && INTVAL (index) > -256
6088 && (INTVAL (index) & 3) == 0);
6090 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6092 /* For DImode assume values will usually live in core regs
6093 and only allow LDRD addressing modes. */
6094 if (!TARGET_LDRD || mode != DImode)
6095 return (code == CONST_INT
6096 && INTVAL (index) < 1024
6097 && INTVAL (index) > -1024
6098 && (INTVAL (index) & 3) == 0);
6101 /* For quad modes, we restrict the constant offset to be slightly less
6102 than what the instruction format permits. We do this because for
6103 quad mode moves, we will actually decompose them into two separate
6104 double-mode reads or writes. INDEX must therefore be a valid
6105 (double-mode) offset and so should INDEX+8. */
6106 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6107 return (code == CONST_INT
6108 && INTVAL (index) < 1016
6109 && INTVAL (index) > -1024
6110 && (INTVAL (index) & 3) == 0);
6112 /* We have no such constraint on double mode offsets, so we permit the
6113 full range of the instruction format. */
6114 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6115 return (code == CONST_INT
6116 && INTVAL (index) < 1024
6117 && INTVAL (index) > -1024
6118 && (INTVAL (index) & 3) == 0);
6120 if (arm_address_register_rtx_p (index, strict_p)
6121 && (GET_MODE_SIZE (mode) <= 4))
6124 if (mode == DImode || mode == DFmode)
6126 if (code == CONST_INT)
6128 HOST_WIDE_INT val = INTVAL (index);
6129 /* ??? Can we assume ldrd for thumb2? */
6130 /* Thumb-2 ldrd only has reg+const addressing modes. */
6131 /* ldrd supports offsets of +-1020.
6132 However the ldr fallback does not. */
6133 return val > -256 && val < 256 && (val & 3) == 0;
6141 rtx xiop0 = XEXP (index, 0);
6142 rtx xiop1 = XEXP (index, 1);
6144 return ((arm_address_register_rtx_p (xiop0, strict_p)
6145 && thumb2_index_mul_operand (xiop1))
6146 || (arm_address_register_rtx_p (xiop1, strict_p)
6147 && thumb2_index_mul_operand (xiop0)));
6149 else if (code == ASHIFT)
6151 rtx op = XEXP (index, 1);
6153 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6156 && INTVAL (op) <= 3);
6159 return (code == CONST_INT
6160 && INTVAL (index) < 4096
6161 && INTVAL (index) > -256);
6164 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6166 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6176 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6178 return (regno <= LAST_LO_REGNUM
6179 || regno > LAST_VIRTUAL_REGISTER
6180 || regno == FRAME_POINTER_REGNUM
6181 || (GET_MODE_SIZE (mode) >= 4
6182 && (regno == STACK_POINTER_REGNUM
6183 || regno >= FIRST_PSEUDO_REGISTER
6184 || x == hard_frame_pointer_rtx
6185 || x == arg_pointer_rtx)));
6188 /* Return nonzero if x is a legitimate index register. This is the case
6189 for any base register that can access a QImode object. */
6191 thumb1_index_register_rtx_p (rtx x, int strict_p)
6193 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6196 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6198 The AP may be eliminated to either the SP or the FP, so we use the
6199 least common denominator, e.g. SImode, and offsets from 0 to 64.
6201 ??? Verify whether the above is the right approach.
6203 ??? Also, the FP may be eliminated to the SP, so perhaps that
6204 needs special handling also.
6206 ??? Look at how the mips16 port solves this problem. It probably uses
6207 better ways to solve some of these problems.
6209 Although it is not incorrect, we don't accept QImode and HImode
6210 addresses based on the frame pointer or arg pointer until the
6211 reload pass starts. This is so that eliminating such addresses
6212 into stack based ones won't produce impossible code. */
6214 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6216 /* ??? Not clear if this is right. Experiment. */
6217 if (GET_MODE_SIZE (mode) < 4
6218 && !(reload_in_progress || reload_completed)
6219 && (reg_mentioned_p (frame_pointer_rtx, x)
6220 || reg_mentioned_p (arg_pointer_rtx, x)
6221 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6222 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6223 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6224 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6227 /* Accept any base register. SP only in SImode or larger. */
6228 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6231 /* This is PC relative data before arm_reorg runs. */
6232 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6233 && GET_CODE (x) == SYMBOL_REF
6234 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6237 /* This is PC relative data after arm_reorg runs. */
6238 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6240 && (GET_CODE (x) == LABEL_REF
6241 || (GET_CODE (x) == CONST
6242 && GET_CODE (XEXP (x, 0)) == PLUS
6243 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6244 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6247 /* Post-inc indexing only supported for SImode and larger. */
6248 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6249 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6252 else if (GET_CODE (x) == PLUS)
6254 /* REG+REG address can be any two index registers. */
6255 /* We disallow FRAME+REG addressing since we know that FRAME
6256 will be replaced with STACK, and SP relative addressing only
6257 permits SP+OFFSET. */
6258 if (GET_MODE_SIZE (mode) <= 4
6259 && XEXP (x, 0) != frame_pointer_rtx
6260 && XEXP (x, 1) != frame_pointer_rtx
6261 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6262 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6263 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6266 /* REG+const has 5-7 bit offset for non-SP registers. */
6267 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6268 || XEXP (x, 0) == arg_pointer_rtx)
6269 && CONST_INT_P (XEXP (x, 1))
6270 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6273 /* REG+const has 10-bit offset for SP, but only SImode and
6274 larger is supported. */
6275 /* ??? Should probably check for DI/DFmode overflow here
6276 just like GO_IF_LEGITIMATE_OFFSET does. */
6277 else if (REG_P (XEXP (x, 0))
6278 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6279 && GET_MODE_SIZE (mode) >= 4
6280 && CONST_INT_P (XEXP (x, 1))
6281 && INTVAL (XEXP (x, 1)) >= 0
6282 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6283 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6286 else if (REG_P (XEXP (x, 0))
6287 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6288 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6289 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6290 && REGNO (XEXP (x, 0))
6291 <= LAST_VIRTUAL_POINTER_REGISTER))
6292 && GET_MODE_SIZE (mode) >= 4
6293 && CONST_INT_P (XEXP (x, 1))
6294 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6298 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6299 && GET_MODE_SIZE (mode) == 4
6300 && GET_CODE (x) == SYMBOL_REF
6301 && CONSTANT_POOL_ADDRESS_P (x)
6303 && symbol_mentioned_p (get_pool_constant (x))
6304 && ! pcrel_constant_p (get_pool_constant (x))))
6310 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6311 instruction of mode MODE. */
6313 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6315 switch (GET_MODE_SIZE (mode))
6318 return val >= 0 && val < 32;
6321 return val >= 0 && val < 64 && (val & 1) == 0;
6325 && (val + GET_MODE_SIZE (mode)) <= 128
6331 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6334 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6335 else if (TARGET_THUMB2)
6336 return thumb2_legitimate_address_p (mode, x, strict_p);
6337 else /* if (TARGET_THUMB1) */
6338 return thumb1_legitimate_address_p (mode, x, strict_p);
6341 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6343 Given an rtx X being reloaded into a reg required to be
6344 in class CLASS, return the class of reg to actually use.
6345 In general this is just CLASS, but for the Thumb core registers and
6346 immediate constants we prefer a LO_REGS class or a subset. */
6349 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6355 if (rclass == GENERAL_REGS
6356 || rclass == HI_REGS
6357 || rclass == NO_REGS
6358 || rclass == STACK_REG)
6365 /* Build the SYMBOL_REF for __tls_get_addr. */
6367 static GTY(()) rtx tls_get_addr_libfunc;
6370 get_tls_get_addr (void)
6372 if (!tls_get_addr_libfunc)
6373 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6374 return tls_get_addr_libfunc;
6378 arm_load_tp (rtx target)
6381 target = gen_reg_rtx (SImode);
6385 /* Can return in any reg. */
6386 emit_insn (gen_load_tp_hard (target));
6390 /* Always returned in r0. Immediately copy the result into a pseudo,
6391 otherwise other uses of r0 (e.g. setting up function arguments) may
6392 clobber the value. */
6396 emit_insn (gen_load_tp_soft ());
6398 tmp = gen_rtx_REG (SImode, 0);
6399 emit_move_insn (target, tmp);
6405 load_tls_operand (rtx x, rtx reg)
6409 if (reg == NULL_RTX)
6410 reg = gen_reg_rtx (SImode);
6412 tmp = gen_rtx_CONST (SImode, x);
6414 emit_move_insn (reg, tmp);
6420 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6422 rtx insns, label, labelno, sum;
6424 gcc_assert (reloc != TLS_DESCSEQ);
6427 labelno = GEN_INT (pic_labelno++);
6428 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6429 label = gen_rtx_CONST (VOIDmode, label);
6431 sum = gen_rtx_UNSPEC (Pmode,
6432 gen_rtvec (4, x, GEN_INT (reloc), label,
6433 GEN_INT (TARGET_ARM ? 8 : 4)),
6435 reg = load_tls_operand (sum, reg);
6438 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6440 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6442 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6443 LCT_PURE, /* LCT_CONST? */
6444 Pmode, 1, reg, Pmode);
6446 insns = get_insns ();
6453 arm_tls_descseq_addr (rtx x, rtx reg)
6455 rtx labelno = GEN_INT (pic_labelno++);
6456 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6457 rtx sum = gen_rtx_UNSPEC (Pmode,
6458 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6459 gen_rtx_CONST (VOIDmode, label),
6460 GEN_INT (!TARGET_ARM)),
6462 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6464 emit_insn (gen_tlscall (x, labelno));
6466 reg = gen_reg_rtx (SImode);
6468 gcc_assert (REGNO (reg) != 0);
6470 emit_move_insn (reg, reg0);
6476 legitimize_tls_address (rtx x, rtx reg)
6478 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6479 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6483 case TLS_MODEL_GLOBAL_DYNAMIC:
6484 if (TARGET_GNU2_TLS)
6486 reg = arm_tls_descseq_addr (x, reg);
6488 tp = arm_load_tp (NULL_RTX);
6490 dest = gen_rtx_PLUS (Pmode, tp, reg);
6494 /* Original scheme */
6495 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6496 dest = gen_reg_rtx (Pmode);
6497 emit_libcall_block (insns, dest, ret, x);
6501 case TLS_MODEL_LOCAL_DYNAMIC:
6502 if (TARGET_GNU2_TLS)
6504 reg = arm_tls_descseq_addr (x, reg);
6506 tp = arm_load_tp (NULL_RTX);
6508 dest = gen_rtx_PLUS (Pmode, tp, reg);
6512 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6514 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6515 share the LDM result with other LD model accesses. */
6516 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6518 dest = gen_reg_rtx (Pmode);
6519 emit_libcall_block (insns, dest, ret, eqv);
6521 /* Load the addend. */
6522 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6523 GEN_INT (TLS_LDO32)),
6525 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6526 dest = gen_rtx_PLUS (Pmode, dest, addend);
6530 case TLS_MODEL_INITIAL_EXEC:
6531 labelno = GEN_INT (pic_labelno++);
6532 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6533 label = gen_rtx_CONST (VOIDmode, label);
6534 sum = gen_rtx_UNSPEC (Pmode,
6535 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6536 GEN_INT (TARGET_ARM ? 8 : 4)),
6538 reg = load_tls_operand (sum, reg);
6541 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6542 else if (TARGET_THUMB2)
6543 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6546 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6547 emit_move_insn (reg, gen_const_mem (SImode, reg));
6550 tp = arm_load_tp (NULL_RTX);
6552 return gen_rtx_PLUS (Pmode, tp, reg);
6554 case TLS_MODEL_LOCAL_EXEC:
6555 tp = arm_load_tp (NULL_RTX);
6557 reg = gen_rtx_UNSPEC (Pmode,
6558 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6560 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6562 return gen_rtx_PLUS (Pmode, tp, reg);
6569 /* Try machine-dependent ways of modifying an illegitimate address
6570 to be legitimate. If we find one, return the new, valid address. */
6572 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6576 /* TODO: legitimize_address for Thumb2. */
6579 return thumb_legitimize_address (x, orig_x, mode);
6582 if (arm_tls_symbol_p (x))
6583 return legitimize_tls_address (x, NULL_RTX);
6585 if (GET_CODE (x) == PLUS)
6587 rtx xop0 = XEXP (x, 0);
6588 rtx xop1 = XEXP (x, 1);
6590 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6591 xop0 = force_reg (SImode, xop0);
6593 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6594 xop1 = force_reg (SImode, xop1);
6596 if (ARM_BASE_REGISTER_RTX_P (xop0)
6597 && CONST_INT_P (xop1))
6599 HOST_WIDE_INT n, low_n;
6603 /* VFP addressing modes actually allow greater offsets, but for
6604 now we just stick with the lowest common denominator. */
6606 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6618 low_n = ((mode) == TImode ? 0
6619 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6623 base_reg = gen_reg_rtx (SImode);
6624 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6625 emit_move_insn (base_reg, val);
6626 x = plus_constant (Pmode, base_reg, low_n);
6628 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6629 x = gen_rtx_PLUS (SImode, xop0, xop1);
6632 /* XXX We don't allow MINUS any more -- see comment in
6633 arm_legitimate_address_outer_p (). */
6634 else if (GET_CODE (x) == MINUS)
6636 rtx xop0 = XEXP (x, 0);
6637 rtx xop1 = XEXP (x, 1);
6639 if (CONSTANT_P (xop0))
6640 xop0 = force_reg (SImode, xop0);
6642 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6643 xop1 = force_reg (SImode, xop1);
6645 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6646 x = gen_rtx_MINUS (SImode, xop0, xop1);
6649 /* Make sure to take full advantage of the pre-indexed addressing mode
6650 with absolute addresses which often allows for the base register to
6651 be factorized for multiple adjacent memory references, and it might
6652 even allows for the mini pool to be avoided entirely. */
6653 else if (CONST_INT_P (x) && optimize > 0)
6656 HOST_WIDE_INT mask, base, index;
6659 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6660 use a 8-bit index. So let's use a 12-bit index for SImode only and
6661 hope that arm_gen_constant will enable ldrb to use more bits. */
6662 bits = (mode == SImode) ? 12 : 8;
6663 mask = (1 << bits) - 1;
6664 base = INTVAL (x) & ~mask;
6665 index = INTVAL (x) & mask;
6666 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6668 /* It'll most probably be more efficient to generate the base
6669 with more bits set and use a negative index instead. */
6673 base_reg = force_reg (SImode, GEN_INT (base));
6674 x = plus_constant (Pmode, base_reg, index);
6679 /* We need to find and carefully transform any SYMBOL and LABEL
6680 references; so go back to the original address expression. */
6681 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6683 if (new_x != orig_x)
6691 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6692 to be legitimate. If we find one, return the new, valid address. */
6694 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6696 if (arm_tls_symbol_p (x))
6697 return legitimize_tls_address (x, NULL_RTX);
6699 if (GET_CODE (x) == PLUS
6700 && CONST_INT_P (XEXP (x, 1))
6701 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6702 || INTVAL (XEXP (x, 1)) < 0))
6704 rtx xop0 = XEXP (x, 0);
6705 rtx xop1 = XEXP (x, 1);
6706 HOST_WIDE_INT offset = INTVAL (xop1);
6708 /* Try and fold the offset into a biasing of the base register and
6709 then offsetting that. Don't do this when optimizing for space
6710 since it can cause too many CSEs. */
6711 if (optimize_size && offset >= 0
6712 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6714 HOST_WIDE_INT delta;
6717 delta = offset - (256 - GET_MODE_SIZE (mode));
6718 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6719 delta = 31 * GET_MODE_SIZE (mode);
6721 delta = offset & (~31 * GET_MODE_SIZE (mode));
6723 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6725 x = plus_constant (Pmode, xop0, delta);
6727 else if (offset < 0 && offset > -256)
6728 /* Small negative offsets are best done with a subtract before the
6729 dereference, forcing these into a register normally takes two
6731 x = force_operand (x, NULL_RTX);
6734 /* For the remaining cases, force the constant into a register. */
6735 xop1 = force_reg (SImode, xop1);
6736 x = gen_rtx_PLUS (SImode, xop0, xop1);
6739 else if (GET_CODE (x) == PLUS
6740 && s_register_operand (XEXP (x, 1), SImode)
6741 && !s_register_operand (XEXP (x, 0), SImode))
6743 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6745 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6750 /* We need to find and carefully transform any SYMBOL and LABEL
6751 references; so go back to the original address expression. */
6752 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6754 if (new_x != orig_x)
6762 arm_legitimize_reload_address (rtx *p,
6763 enum machine_mode mode,
6764 int opnum, int type,
6765 int ind_levels ATTRIBUTE_UNUSED)
6767 /* We must recognize output that we have already generated ourselves. */
6768 if (GET_CODE (*p) == PLUS
6769 && GET_CODE (XEXP (*p, 0)) == PLUS
6770 && REG_P (XEXP (XEXP (*p, 0), 0))
6771 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6772 && CONST_INT_P (XEXP (*p, 1)))
6774 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6775 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6776 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6780 if (GET_CODE (*p) == PLUS
6781 && REG_P (XEXP (*p, 0))
6782 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6783 /* If the base register is equivalent to a constant, let the generic
6784 code handle it. Otherwise we will run into problems if a future
6785 reload pass decides to rematerialize the constant. */
6786 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6787 && CONST_INT_P (XEXP (*p, 1)))
6789 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6790 HOST_WIDE_INT low, high;
6792 /* Detect coprocessor load/stores. */
6793 bool coproc_p = ((TARGET_HARD_FLOAT
6795 && (mode == SFmode || mode == DFmode))
6796 || (TARGET_REALLY_IWMMXT
6797 && VALID_IWMMXT_REG_MODE (mode))
6799 && (VALID_NEON_DREG_MODE (mode)
6800 || VALID_NEON_QREG_MODE (mode))));
6802 /* For some conditions, bail out when lower two bits are unaligned. */
6803 if ((val & 0x3) != 0
6804 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6806 /* For DI, and DF under soft-float: */
6807 || ((mode == DImode || mode == DFmode)
6808 /* Without ldrd, we use stm/ldm, which does not
6809 fair well with unaligned bits. */
6811 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6812 || TARGET_THUMB2))))
6815 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6816 of which the (reg+high) gets turned into a reload add insn,
6817 we try to decompose the index into high/low values that can often
6818 also lead to better reload CSE.
6820 ldr r0, [r2, #4100] // Offset too large
6821 ldr r1, [r2, #4104] // Offset too large
6823 is best reloaded as:
6829 which post-reload CSE can simplify in most cases to eliminate the
6830 second add instruction:
6835 The idea here is that we want to split out the bits of the constant
6836 as a mask, rather than as subtracting the maximum offset that the
6837 respective type of load/store used can handle.
6839 When encountering negative offsets, we can still utilize it even if
6840 the overall offset is positive; sometimes this may lead to an immediate
6841 that can be constructed with fewer instructions.
6843 ldr r0, [r2, #0x3FFFFC]
6845 This is best reloaded as:
6846 add t1, r2, #0x400000
6849 The trick for spotting this for a load insn with N bits of offset
6850 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6851 negative offset that is going to make bit N and all the bits below
6852 it become zero in the remainder part.
6854 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6855 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6856 used in most cases of ARM load/store instructions. */
6858 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6859 (((VAL) & ((1 << (N)) - 1)) \
6860 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6865 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6867 /* NEON quad-word load/stores are made of two double-word accesses,
6868 so the valid index range is reduced by 8. Treat as 9-bit range if
6870 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6871 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6873 else if (GET_MODE_SIZE (mode) == 8)
6876 low = (TARGET_THUMB2
6877 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6878 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6880 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6881 to access doublewords. The supported load/store offsets are
6882 -8, -4, and 4, which we try to produce here. */
6883 low = ((val & 0xf) ^ 0x8) - 0x8;
6885 else if (GET_MODE_SIZE (mode) < 8)
6887 /* NEON element load/stores do not have an offset. */
6888 if (TARGET_NEON_FP16 && mode == HFmode)
6893 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6894 Try the wider 12-bit range first, and re-try if the result
6896 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6898 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6902 if (mode == HImode || mode == HFmode)
6905 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6908 /* The storehi/movhi_bytes fallbacks can use only
6909 [-4094,+4094] of the full ldrb/strb index range. */
6910 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6911 if (low == 4095 || low == -4095)
6916 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6922 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6923 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6924 - (unsigned HOST_WIDE_INT) 0x80000000);
6925 /* Check for overflow or zero */
6926 if (low == 0 || high == 0 || (high + low != val))
6929 /* Reload the high part into a base reg; leave the low part
6931 *p = gen_rtx_PLUS (GET_MODE (*p),
6932 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6935 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6936 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6937 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6945 thumb_legitimize_reload_address (rtx *x_p,
6946 enum machine_mode mode,
6947 int opnum, int type,
6948 int ind_levels ATTRIBUTE_UNUSED)
6952 if (GET_CODE (x) == PLUS
6953 && GET_MODE_SIZE (mode) < 4
6954 && REG_P (XEXP (x, 0))
6955 && XEXP (x, 0) == stack_pointer_rtx
6956 && CONST_INT_P (XEXP (x, 1))
6957 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6962 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6963 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6967 /* If both registers are hi-regs, then it's better to reload the
6968 entire expression rather than each register individually. That
6969 only requires one reload register rather than two. */
6970 if (GET_CODE (x) == PLUS
6971 && REG_P (XEXP (x, 0))
6972 && REG_P (XEXP (x, 1))
6973 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6974 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6979 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6980 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6987 /* Test for various thread-local symbols. */
6989 /* Return TRUE if X is a thread-local symbol. */
6992 arm_tls_symbol_p (rtx x)
6994 if (! TARGET_HAVE_TLS)
6997 if (GET_CODE (x) != SYMBOL_REF)
7000 return SYMBOL_REF_TLS_MODEL (x) != 0;
7003 /* Helper for arm_tls_referenced_p. */
7006 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7008 if (GET_CODE (*x) == SYMBOL_REF)
7009 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7011 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7012 TLS offsets, not real symbol references. */
7013 if (GET_CODE (*x) == UNSPEC
7014 && XINT (*x, 1) == UNSPEC_TLS)
7020 /* Return TRUE if X contains any TLS symbol references. */
7023 arm_tls_referenced_p (rtx x)
7025 if (! TARGET_HAVE_TLS)
7028 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7031 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7033 On the ARM, allow any integer (invalid ones are removed later by insn
7034 patterns), nice doubles and symbol_refs which refer to the function's
7037 When generating pic allow anything. */
7040 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7042 /* At present, we have no support for Neon structure constants, so forbid
7043 them here. It might be possible to handle simple cases like 0 and -1
7045 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7048 return flag_pic || !label_mentioned_p (x);
7052 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7054 return (CONST_INT_P (x)
7055 || CONST_DOUBLE_P (x)
7056 || CONSTANT_ADDRESS_P (x)
7061 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7063 return (!arm_cannot_force_const_mem (mode, x)
7065 ? arm_legitimate_constant_p_1 (mode, x)
7066 : thumb_legitimate_constant_p (mode, x)));
7069 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7072 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7076 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7078 split_const (x, &base, &offset);
7079 if (GET_CODE (base) == SYMBOL_REF
7080 && !offset_within_block_p (base, INTVAL (offset)))
7083 return arm_tls_referenced_p (x);
7086 #define REG_OR_SUBREG_REG(X) \
7088 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7090 #define REG_OR_SUBREG_RTX(X) \
7091 (REG_P (X) ? (X) : SUBREG_REG (X))
7094 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7096 enum machine_mode mode = GET_MODE (x);
7110 return COSTS_N_INSNS (1);
7113 if (CONST_INT_P (XEXP (x, 1)))
7116 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7123 return COSTS_N_INSNS (2) + cycles;
7125 return COSTS_N_INSNS (1) + 16;
7128 return (COSTS_N_INSNS (1)
7129 + 4 * ((MEM_P (SET_SRC (x)))
7130 + MEM_P (SET_DEST (x))));
7135 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7137 if (thumb_shiftable_const (INTVAL (x)))
7138 return COSTS_N_INSNS (2);
7139 return COSTS_N_INSNS (3);
7141 else if ((outer == PLUS || outer == COMPARE)
7142 && INTVAL (x) < 256 && INTVAL (x) > -256)
7144 else if ((outer == IOR || outer == XOR || outer == AND)
7145 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7146 return COSTS_N_INSNS (1);
7147 else if (outer == AND)
7150 /* This duplicates the tests in the andsi3 expander. */
7151 for (i = 9; i <= 31; i++)
7152 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7153 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7154 return COSTS_N_INSNS (2);
7156 else if (outer == ASHIFT || outer == ASHIFTRT
7157 || outer == LSHIFTRT)
7159 return COSTS_N_INSNS (2);
7165 return COSTS_N_INSNS (3);
7183 /* XXX another guess. */
7184 /* Memory costs quite a lot for the first word, but subsequent words
7185 load at the equivalent of a single insn each. */
7186 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7187 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7192 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7198 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7199 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7205 return total + COSTS_N_INSNS (1);
7207 /* Assume a two-shift sequence. Increase the cost slightly so
7208 we prefer actual shifts over an extend operation. */
7209 return total + 1 + COSTS_N_INSNS (2);
7217 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7219 enum machine_mode mode = GET_MODE (x);
7220 enum rtx_code subcode;
7222 enum rtx_code code = GET_CODE (x);
7228 /* Memory costs quite a lot for the first word, but subsequent words
7229 load at the equivalent of a single insn each. */
7230 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7237 if (TARGET_HARD_FLOAT && mode == SFmode)
7238 *total = COSTS_N_INSNS (2);
7239 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7240 *total = COSTS_N_INSNS (4);
7242 *total = COSTS_N_INSNS (20);
7246 if (REG_P (XEXP (x, 1)))
7247 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7248 else if (!CONST_INT_P (XEXP (x, 1)))
7249 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7255 *total += COSTS_N_INSNS (4);
7260 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7261 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7264 *total += COSTS_N_INSNS (3);
7268 *total += COSTS_N_INSNS (1);
7269 /* Increase the cost of complex shifts because they aren't any faster,
7270 and reduce dual issue opportunities. */
7271 if (arm_tune_cortex_a9
7272 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7280 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7281 if (CONST_INT_P (XEXP (x, 0))
7282 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7284 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7288 if (CONST_INT_P (XEXP (x, 1))
7289 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7291 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7298 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7300 if (TARGET_HARD_FLOAT
7302 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7304 *total = COSTS_N_INSNS (1);
7305 if (CONST_DOUBLE_P (XEXP (x, 0))
7306 && arm_const_double_rtx (XEXP (x, 0)))
7308 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7312 if (CONST_DOUBLE_P (XEXP (x, 1))
7313 && arm_const_double_rtx (XEXP (x, 1)))
7315 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7321 *total = COSTS_N_INSNS (20);
7325 *total = COSTS_N_INSNS (1);
7326 if (CONST_INT_P (XEXP (x, 0))
7327 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7329 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7333 subcode = GET_CODE (XEXP (x, 1));
7334 if (subcode == ASHIFT || subcode == ASHIFTRT
7335 || subcode == LSHIFTRT
7336 || subcode == ROTATE || subcode == ROTATERT)
7338 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7339 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7343 /* A shift as a part of RSB costs no more than RSB itself. */
7344 if (GET_CODE (XEXP (x, 0)) == MULT
7345 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7347 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7348 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7353 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7355 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7356 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7360 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7361 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7363 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7364 if (REG_P (XEXP (XEXP (x, 1), 0))
7365 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7366 *total += COSTS_N_INSNS (1);
7374 if (code == PLUS && arm_arch6 && mode == SImode
7375 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7376 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7378 *total = COSTS_N_INSNS (1);
7379 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7381 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7385 /* MLA: All arguments must be registers. We filter out
7386 multiplication by a power of two, so that we fall down into
7388 if (GET_CODE (XEXP (x, 0)) == MULT
7389 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7391 /* The cost comes from the cost of the multiply. */
7395 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7397 if (TARGET_HARD_FLOAT
7399 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7401 *total = COSTS_N_INSNS (1);
7402 if (CONST_DOUBLE_P (XEXP (x, 1))
7403 && arm_const_double_rtx (XEXP (x, 1)))
7405 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7412 *total = COSTS_N_INSNS (20);
7416 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7417 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7419 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7420 if (REG_P (XEXP (XEXP (x, 0), 0))
7421 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7422 *total += COSTS_N_INSNS (1);
7428 case AND: case XOR: case IOR:
7430 /* Normally the frame registers will be spilt into reg+const during
7431 reload, so it is a bad idea to combine them with other instructions,
7432 since then they might not be moved outside of loops. As a compromise
7433 we allow integration with ops that have a constant as their second
7435 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7436 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7437 && !CONST_INT_P (XEXP (x, 1)))
7438 *total = COSTS_N_INSNS (1);
7442 *total += COSTS_N_INSNS (2);
7443 if (CONST_INT_P (XEXP (x, 1))
7444 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7446 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7453 *total += COSTS_N_INSNS (1);
7454 if (CONST_INT_P (XEXP (x, 1))
7455 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7457 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7460 subcode = GET_CODE (XEXP (x, 0));
7461 if (subcode == ASHIFT || subcode == ASHIFTRT
7462 || subcode == LSHIFTRT
7463 || subcode == ROTATE || subcode == ROTATERT)
7465 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7466 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7471 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7473 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7474 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7478 if (subcode == UMIN || subcode == UMAX
7479 || subcode == SMIN || subcode == SMAX)
7481 *total = COSTS_N_INSNS (3);
7488 /* This should have been handled by the CPU specific routines. */
7492 if (arm_arch3m && mode == SImode
7493 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7494 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7495 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7496 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7497 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7498 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7500 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7503 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7507 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7509 if (TARGET_HARD_FLOAT
7511 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7513 *total = COSTS_N_INSNS (1);
7516 *total = COSTS_N_INSNS (2);
7522 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7523 if (mode == SImode && code == NOT)
7525 subcode = GET_CODE (XEXP (x, 0));
7526 if (subcode == ASHIFT || subcode == ASHIFTRT
7527 || subcode == LSHIFTRT
7528 || subcode == ROTATE || subcode == ROTATERT
7530 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7532 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7533 /* Register shifts cost an extra cycle. */
7534 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7535 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7544 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7546 *total = COSTS_N_INSNS (4);
7550 operand = XEXP (x, 0);
7552 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7553 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7554 && REG_P (XEXP (operand, 0))
7555 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7556 *total += COSTS_N_INSNS (1);
7557 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7558 + rtx_cost (XEXP (x, 2), code, 2, speed));
7562 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7564 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7570 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7571 && mode == SImode && XEXP (x, 1) == const0_rtx)
7573 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7579 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7580 && mode == SImode && XEXP (x, 1) == const0_rtx)
7582 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7602 /* SCC insns. In the case where the comparison has already been
7603 performed, then they cost 2 instructions. Otherwise they need
7604 an additional comparison before them. */
7605 *total = COSTS_N_INSNS (2);
7606 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7613 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7619 *total += COSTS_N_INSNS (1);
7620 if (CONST_INT_P (XEXP (x, 1))
7621 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7623 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7627 subcode = GET_CODE (XEXP (x, 0));
7628 if (subcode == ASHIFT || subcode == ASHIFTRT
7629 || subcode == LSHIFTRT
7630 || subcode == ROTATE || subcode == ROTATERT)
7632 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7633 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7638 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7640 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7641 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7651 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7652 if (!CONST_INT_P (XEXP (x, 1))
7653 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7654 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7658 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7660 if (TARGET_HARD_FLOAT
7662 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7664 *total = COSTS_N_INSNS (1);
7667 *total = COSTS_N_INSNS (20);
7670 *total = COSTS_N_INSNS (1);
7672 *total += COSTS_N_INSNS (3);
7678 if (GET_MODE_CLASS (mode) == MODE_INT)
7680 rtx op = XEXP (x, 0);
7681 enum machine_mode opmode = GET_MODE (op);
7684 *total += COSTS_N_INSNS (1);
7686 if (opmode != SImode)
7690 /* If !arm_arch4, we use one of the extendhisi2_mem
7691 or movhi_bytes patterns for HImode. For a QImode
7692 sign extension, we first zero-extend from memory
7693 and then perform a shift sequence. */
7694 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7695 *total += COSTS_N_INSNS (2);
7698 *total += COSTS_N_INSNS (1);
7700 /* We don't have the necessary insn, so we need to perform some
7702 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7703 /* An and with constant 255. */
7704 *total += COSTS_N_INSNS (1);
7706 /* A shift sequence. Increase costs slightly to avoid
7707 combining two shifts into an extend operation. */
7708 *total += COSTS_N_INSNS (2) + 1;
7714 switch (GET_MODE (XEXP (x, 0)))
7721 *total = COSTS_N_INSNS (1);
7731 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7735 if (const_ok_for_arm (INTVAL (x))
7736 || const_ok_for_arm (~INTVAL (x)))
7737 *total = COSTS_N_INSNS (1);
7739 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7740 INTVAL (x), NULL_RTX,
7747 *total = COSTS_N_INSNS (3);
7751 *total = COSTS_N_INSNS (1);
7755 *total = COSTS_N_INSNS (1);
7756 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7760 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7761 && (mode == SFmode || !TARGET_VFP_SINGLE))
7762 *total = COSTS_N_INSNS (1);
7764 *total = COSTS_N_INSNS (4);
7768 /* The vec_extract patterns accept memory operands that require an
7769 address reload. Account for the cost of that reload to give the
7770 auto-inc-dec pass an incentive to try to replace them. */
7771 if (TARGET_NEON && MEM_P (SET_DEST (x))
7772 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7774 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7775 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7776 *total += COSTS_N_INSNS (1);
7779 /* Likewise for the vec_set patterns. */
7780 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7781 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7782 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7784 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7785 *total = rtx_cost (mem, code, 0, speed);
7786 if (!neon_vector_mem_operand (mem, 2))
7787 *total += COSTS_N_INSNS (1);
7793 /* We cost this as high as our memory costs to allow this to
7794 be hoisted from loops. */
7795 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7797 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7803 && TARGET_HARD_FLOAT
7805 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7806 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7807 *total = COSTS_N_INSNS (1);
7809 *total = COSTS_N_INSNS (4);
7813 *total = COSTS_N_INSNS (4);
7818 /* Estimates the size cost of thumb1 instructions.
7819 For now most of the code is copied from thumb1_rtx_costs. We need more
7820 fine grain tuning when we have more related test cases. */
7822 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7824 enum machine_mode mode = GET_MODE (x);
7837 return COSTS_N_INSNS (1);
7840 if (CONST_INT_P (XEXP (x, 1)))
7842 /* Thumb1 mul instruction can't operate on const. We must Load it
7843 into a register first. */
7844 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7845 return COSTS_N_INSNS (1) + const_size;
7847 return COSTS_N_INSNS (1);
7850 return (COSTS_N_INSNS (1)
7851 + 4 * ((MEM_P (SET_SRC (x)))
7852 + MEM_P (SET_DEST (x))));
7857 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7858 return COSTS_N_INSNS (1);
7859 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7860 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7861 return COSTS_N_INSNS (2);
7862 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7863 if (thumb_shiftable_const (INTVAL (x)))
7864 return COSTS_N_INSNS (2);
7865 return COSTS_N_INSNS (3);
7867 else if ((outer == PLUS || outer == COMPARE)
7868 && INTVAL (x) < 256 && INTVAL (x) > -256)
7870 else if ((outer == IOR || outer == XOR || outer == AND)
7871 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7872 return COSTS_N_INSNS (1);
7873 else if (outer == AND)
7876 /* This duplicates the tests in the andsi3 expander. */
7877 for (i = 9; i <= 31; i++)
7878 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7879 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7880 return COSTS_N_INSNS (2);
7882 else if (outer == ASHIFT || outer == ASHIFTRT
7883 || outer == LSHIFTRT)
7885 return COSTS_N_INSNS (2);
7891 return COSTS_N_INSNS (3);
7909 /* XXX another guess. */
7910 /* Memory costs quite a lot for the first word, but subsequent words
7911 load at the equivalent of a single insn each. */
7912 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7913 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7918 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7923 /* XXX still guessing. */
7924 switch (GET_MODE (XEXP (x, 0)))
7927 return (1 + (mode == DImode ? 4 : 0)
7928 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7931 return (4 + (mode == DImode ? 4 : 0)
7932 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7935 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7946 /* RTX costs when optimizing for size. */
7948 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7951 enum machine_mode mode = GET_MODE (x);
7954 *total = thumb1_size_rtx_costs (x, code, outer_code);
7958 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7962 /* A memory access costs 1 insn if the mode is small, or the address is
7963 a single register, otherwise it costs one insn per word. */
7964 if (REG_P (XEXP (x, 0)))
7965 *total = COSTS_N_INSNS (1);
7967 && GET_CODE (XEXP (x, 0)) == PLUS
7968 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7969 /* This will be split into two instructions.
7970 See arm.md:calculate_pic_address. */
7971 *total = COSTS_N_INSNS (2);
7973 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7980 /* Needs a libcall, so it costs about this. */
7981 *total = COSTS_N_INSNS (2);
7985 if (mode == SImode && REG_P (XEXP (x, 1)))
7987 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7995 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
7997 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8000 else if (mode == SImode)
8002 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8003 /* Slightly disparage register shifts, but not by much. */
8004 if (!CONST_INT_P (XEXP (x, 1)))
8005 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8009 /* Needs a libcall. */
8010 *total = COSTS_N_INSNS (2);
8014 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8015 && (mode == SFmode || !TARGET_VFP_SINGLE))
8017 *total = COSTS_N_INSNS (1);
8023 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8024 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8026 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8027 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8028 || subcode1 == ROTATE || subcode1 == ROTATERT
8029 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8030 || subcode1 == ASHIFTRT)
8032 /* It's just the cost of the two operands. */
8037 *total = COSTS_N_INSNS (1);
8041 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8045 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8046 && (mode == SFmode || !TARGET_VFP_SINGLE))
8048 *total = COSTS_N_INSNS (1);
8052 /* A shift as a part of ADD costs nothing. */
8053 if (GET_CODE (XEXP (x, 0)) == MULT
8054 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8056 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8057 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8058 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8063 case AND: case XOR: case IOR:
8066 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8068 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8069 || subcode == LSHIFTRT || subcode == ASHIFTRT
8070 || (code == AND && subcode == NOT))
8072 /* It's just the cost of the two operands. */
8078 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8082 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8086 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8087 && (mode == SFmode || !TARGET_VFP_SINGLE))
8089 *total = COSTS_N_INSNS (1);
8095 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8104 if (cc_register (XEXP (x, 0), VOIDmode))
8107 *total = COSTS_N_INSNS (1);
8111 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8112 && (mode == SFmode || !TARGET_VFP_SINGLE))
8113 *total = COSTS_N_INSNS (1);
8115 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8120 return arm_rtx_costs_1 (x, outer_code, total, 0);
8123 if (const_ok_for_arm (INTVAL (x)))
8124 /* A multiplication by a constant requires another instruction
8125 to load the constant to a register. */
8126 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8128 else if (const_ok_for_arm (~INTVAL (x)))
8129 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8130 else if (const_ok_for_arm (-INTVAL (x)))
8132 if (outer_code == COMPARE || outer_code == PLUS
8133 || outer_code == MINUS)
8136 *total = COSTS_N_INSNS (1);
8139 *total = COSTS_N_INSNS (2);
8145 *total = COSTS_N_INSNS (2);
8149 *total = COSTS_N_INSNS (4);
8154 && TARGET_HARD_FLOAT
8155 && outer_code == SET
8156 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8157 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8158 *total = COSTS_N_INSNS (1);
8160 *total = COSTS_N_INSNS (4);
8165 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8166 cost of these slightly. */
8167 *total = COSTS_N_INSNS (1) + 1;
8174 if (mode != VOIDmode)
8175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8177 *total = COSTS_N_INSNS (4); /* How knows? */
8182 /* RTX costs when optimizing for size. */
8184 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8185 int *total, bool speed)
8188 return arm_size_rtx_costs (x, (enum rtx_code) code,
8189 (enum rtx_code) outer_code, total);
8191 return current_tune->rtx_costs (x, (enum rtx_code) code,
8192 (enum rtx_code) outer_code,
8196 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8197 supported on any "slowmul" cores, so it can be ignored. */
8200 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8201 int *total, bool speed)
8203 enum machine_mode mode = GET_MODE (x);
8207 *total = thumb1_rtx_costs (x, code, outer_code);
8214 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8217 *total = COSTS_N_INSNS (20);
8221 if (CONST_INT_P (XEXP (x, 1)))
8223 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8224 & (unsigned HOST_WIDE_INT) 0xffffffff);
8225 int cost, const_ok = const_ok_for_arm (i);
8226 int j, booth_unit_size;
8228 /* Tune as appropriate. */
8229 cost = const_ok ? 4 : 8;
8230 booth_unit_size = 2;
8231 for (j = 0; i && j < 32; j += booth_unit_size)
8233 i >>= booth_unit_size;
8237 *total = COSTS_N_INSNS (cost);
8238 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8242 *total = COSTS_N_INSNS (20);
8246 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8251 /* RTX cost for cores with a fast multiply unit (M variants). */
8254 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8255 int *total, bool speed)
8257 enum machine_mode mode = GET_MODE (x);
8261 *total = thumb1_rtx_costs (x, code, outer_code);
8265 /* ??? should thumb2 use different costs? */
8269 /* There is no point basing this on the tuning, since it is always the
8270 fast variant if it exists at all. */
8272 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8273 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8274 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8276 *total = COSTS_N_INSNS(2);
8283 *total = COSTS_N_INSNS (5);
8287 if (CONST_INT_P (XEXP (x, 1)))
8289 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8290 & (unsigned HOST_WIDE_INT) 0xffffffff);
8291 int cost, const_ok = const_ok_for_arm (i);
8292 int j, booth_unit_size;
8294 /* Tune as appropriate. */
8295 cost = const_ok ? 4 : 8;
8296 booth_unit_size = 8;
8297 for (j = 0; i && j < 32; j += booth_unit_size)
8299 i >>= booth_unit_size;
8303 *total = COSTS_N_INSNS(cost);
8309 *total = COSTS_N_INSNS (4);
8313 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8315 if (TARGET_HARD_FLOAT
8317 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8319 *total = COSTS_N_INSNS (1);
8324 /* Requires a lib call */
8325 *total = COSTS_N_INSNS (20);
8329 return arm_rtx_costs_1 (x, outer_code, total, speed);
8334 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8335 so it can be ignored. */
8338 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8339 int *total, bool speed)
8341 enum machine_mode mode = GET_MODE (x);
8345 *total = thumb1_rtx_costs (x, code, outer_code);
8352 if (GET_CODE (XEXP (x, 0)) != MULT)
8353 return arm_rtx_costs_1 (x, outer_code, total, speed);
8355 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8356 will stall until the multiplication is complete. */
8357 *total = COSTS_N_INSNS (3);
8361 /* There is no point basing this on the tuning, since it is always the
8362 fast variant if it exists at all. */
8364 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8365 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8366 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8368 *total = COSTS_N_INSNS (2);
8375 *total = COSTS_N_INSNS (5);
8379 if (CONST_INT_P (XEXP (x, 1)))
8381 /* If operand 1 is a constant we can more accurately
8382 calculate the cost of the multiply. The multiplier can
8383 retire 15 bits on the first cycle and a further 12 on the
8384 second. We do, of course, have to load the constant into
8385 a register first. */
8386 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8387 /* There's a general overhead of one cycle. */
8389 unsigned HOST_WIDE_INT masked_const;
8394 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8396 masked_const = i & 0xffff8000;
8397 if (masked_const != 0)
8400 masked_const = i & 0xf8000000;
8401 if (masked_const != 0)
8404 *total = COSTS_N_INSNS (cost);
8410 *total = COSTS_N_INSNS (3);
8414 /* Requires a lib call */
8415 *total = COSTS_N_INSNS (20);
8419 return arm_rtx_costs_1 (x, outer_code, total, speed);
8424 /* RTX costs for 9e (and later) cores. */
8427 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8428 int *total, bool speed)
8430 enum machine_mode mode = GET_MODE (x);
8437 *total = COSTS_N_INSNS (3);
8441 *total = thumb1_rtx_costs (x, code, outer_code);
8449 /* There is no point basing this on the tuning, since it is always the
8450 fast variant if it exists at all. */
8452 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8453 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8454 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8456 *total = COSTS_N_INSNS (2);
8463 *total = COSTS_N_INSNS (5);
8469 *total = COSTS_N_INSNS (2);
8473 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8475 if (TARGET_HARD_FLOAT
8477 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8479 *total = COSTS_N_INSNS (1);
8484 *total = COSTS_N_INSNS (20);
8488 return arm_rtx_costs_1 (x, outer_code, total, speed);
8491 /* All address computations that can be done are free, but rtx cost returns
8492 the same for practically all of them. So we weight the different types
8493 of address here in the order (most pref first):
8494 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8496 arm_arm_address_cost (rtx x)
8498 enum rtx_code c = GET_CODE (x);
8500 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8502 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8507 if (CONST_INT_P (XEXP (x, 1)))
8510 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8520 arm_thumb_address_cost (rtx x)
8522 enum rtx_code c = GET_CODE (x);
8527 && REG_P (XEXP (x, 0))
8528 && CONST_INT_P (XEXP (x, 1)))
8535 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8536 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8538 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8541 /* Adjust cost hook for XScale. */
8543 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8545 /* Some true dependencies can have a higher cost depending
8546 on precisely how certain input operands are used. */
8547 if (REG_NOTE_KIND(link) == 0
8548 && recog_memoized (insn) >= 0
8549 && recog_memoized (dep) >= 0)
8551 int shift_opnum = get_attr_shift (insn);
8552 enum attr_type attr_type = get_attr_type (dep);
8554 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8555 operand for INSN. If we have a shifted input operand and the
8556 instruction we depend on is another ALU instruction, then we may
8557 have to account for an additional stall. */
8558 if (shift_opnum != 0
8559 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8561 rtx shifted_operand;
8564 /* Get the shifted operand. */
8565 extract_insn (insn);
8566 shifted_operand = recog_data.operand[shift_opnum];
8568 /* Iterate over all the operands in DEP. If we write an operand
8569 that overlaps with SHIFTED_OPERAND, then we have increase the
8570 cost of this dependency. */
8572 preprocess_constraints ();
8573 for (opno = 0; opno < recog_data.n_operands; opno++)
8575 /* We can ignore strict inputs. */
8576 if (recog_data.operand_type[opno] == OP_IN)
8579 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8591 /* Adjust cost hook for Cortex A9. */
8593 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8595 switch (REG_NOTE_KIND (link))
8602 case REG_DEP_OUTPUT:
8603 if (recog_memoized (insn) >= 0
8604 && recog_memoized (dep) >= 0)
8606 if (GET_CODE (PATTERN (insn)) == SET)
8609 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8611 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8613 enum attr_type attr_type_insn = get_attr_type (insn);
8614 enum attr_type attr_type_dep = get_attr_type (dep);
8616 /* By default all dependencies of the form
8619 have an extra latency of 1 cycle because
8620 of the input and output dependency in this
8621 case. However this gets modeled as an true
8622 dependency and hence all these checks. */
8623 if (REG_P (SET_DEST (PATTERN (insn)))
8624 && REG_P (SET_DEST (PATTERN (dep)))
8625 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8626 SET_DEST (PATTERN (dep))))
8628 /* FMACS is a special case where the dependent
8629 instruction can be issued 3 cycles before
8630 the normal latency in case of an output
8632 if ((attr_type_insn == TYPE_FMACS
8633 || attr_type_insn == TYPE_FMACD)
8634 && (attr_type_dep == TYPE_FMACS
8635 || attr_type_dep == TYPE_FMACD))
8637 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8638 *cost = insn_default_latency (dep) - 3;
8640 *cost = insn_default_latency (dep);
8645 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8646 *cost = insn_default_latency (dep) + 1;
8648 *cost = insn_default_latency (dep);
8664 /* Adjust cost hook for FA726TE. */
8666 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8668 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8669 have penalty of 3. */
8670 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8671 && recog_memoized (insn) >= 0
8672 && recog_memoized (dep) >= 0
8673 && get_attr_conds (dep) == CONDS_SET)
8675 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8676 if (get_attr_conds (insn) == CONDS_USE
8677 && get_attr_type (insn) != TYPE_BRANCH)
8683 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8684 || get_attr_conds (insn) == CONDS_USE)
8694 /* Implement TARGET_REGISTER_MOVE_COST.
8696 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8697 it is typically more expensive than a single memory access. We set
8698 the cost to less than two memory accesses so that floating
8699 point to integer conversion does not go through memory. */
8702 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8703 reg_class_t from, reg_class_t to)
8707 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8708 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8710 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8711 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8713 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8720 if (from == HI_REGS || to == HI_REGS)
8727 /* Implement TARGET_MEMORY_MOVE_COST. */
8730 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8731 bool in ATTRIBUTE_UNUSED)
8737 if (GET_MODE_SIZE (mode) < 4)
8740 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8744 /* Vectorizer cost model implementation. */
8746 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8748 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8750 int misalign ATTRIBUTE_UNUSED)
8754 switch (type_of_cost)
8757 return current_tune->vec_costs->scalar_stmt_cost;
8760 return current_tune->vec_costs->scalar_load_cost;
8763 return current_tune->vec_costs->scalar_store_cost;
8766 return current_tune->vec_costs->vec_stmt_cost;
8769 return current_tune->vec_costs->vec_align_load_cost;
8772 return current_tune->vec_costs->vec_store_cost;
8775 return current_tune->vec_costs->vec_to_scalar_cost;
8778 return current_tune->vec_costs->scalar_to_vec_cost;
8780 case unaligned_load:
8781 return current_tune->vec_costs->vec_unalign_load_cost;
8783 case unaligned_store:
8784 return current_tune->vec_costs->vec_unalign_store_cost;
8786 case cond_branch_taken:
8787 return current_tune->vec_costs->cond_taken_branch_cost;
8789 case cond_branch_not_taken:
8790 return current_tune->vec_costs->cond_not_taken_branch_cost;
8793 case vec_promote_demote:
8794 return current_tune->vec_costs->vec_stmt_cost;
8797 elements = TYPE_VECTOR_SUBPARTS (vectype);
8798 return elements / 2 + 1;
8805 /* Implement targetm.vectorize.add_stmt_cost. */
8808 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8809 struct _stmt_vec_info *stmt_info, int misalign,
8810 enum vect_cost_model_location where)
8812 unsigned *cost = (unsigned *) data;
8813 unsigned retval = 0;
8815 if (flag_vect_cost_model)
8817 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8818 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8820 /* Statements in an inner loop relative to the loop being
8821 vectorized are weighted more heavily. The value here is
8822 arbitrary and could potentially be improved with analysis. */
8823 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8824 count *= 50; /* FIXME. */
8826 retval = (unsigned) (count * stmt_cost);
8827 cost[where] += retval;
8833 /* Return true if and only if this insn can dual-issue only as older. */
8835 cortexa7_older_only (rtx insn)
8837 if (recog_memoized (insn) < 0)
8840 if (get_attr_insn (insn) == INSN_MOV)
8843 switch (get_attr_type (insn))
8846 case TYPE_LOAD_BYTE:
8875 /* Return true if and only if this insn can dual-issue as younger. */
8877 cortexa7_younger (FILE *file, int verbose, rtx insn)
8879 if (recog_memoized (insn) < 0)
8882 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
8886 if (get_attr_insn (insn) == INSN_MOV)
8889 switch (get_attr_type (insn))
8891 case TYPE_SIMPLE_ALU_IMM:
8892 case TYPE_SIMPLE_ALU_SHIFT:
8902 /* Look for an instruction that can dual issue only as an older
8903 instruction, and move it in front of any instructions that can
8904 dual-issue as younger, while preserving the relative order of all
8905 other instructions in the ready list. This is a hueuristic to help
8906 dual-issue in later cycles, by postponing issue of more flexible
8907 instructions. This heuristic may affect dual issue opportunities
8908 in the current cycle. */
8910 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
8914 int first_older_only = -1, first_younger = -1;
8918 ";; sched_reorder for cycle %d with %d insns in ready list\n",
8922 /* Traverse the ready list from the head (the instruction to issue
8923 first), and looking for the first instruction that can issue as
8924 younger and the first instruction that can dual-issue only as
8926 for (i = *n_readyp - 1; i >= 0; i--)
8928 rtx insn = ready[i];
8929 if (cortexa7_older_only (insn))
8931 first_older_only = i;
8933 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
8936 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
8940 /* Nothing to reorder because either no younger insn found or insn
8941 that can dual-issue only as older appears before any insn that
8942 can dual-issue as younger. */
8943 if (first_younger == -1)
8946 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
8950 /* Nothing to reorder because no older-only insn in the ready list. */
8951 if (first_older_only == -1)
8954 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
8958 /* Move first_older_only insn before first_younger. */
8960 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
8961 INSN_UID(ready [first_older_only]),
8962 INSN_UID(ready [first_younger]));
8963 rtx first_older_only_insn = ready [first_older_only];
8964 for (i = first_older_only; i < first_younger; i++)
8966 ready[i] = ready[i+1];
8969 ready[i] = first_older_only_insn;
8973 /* Implement TARGET_SCHED_REORDER. */
8975 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
8981 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
8984 /* Do nothing for other cores. */
8988 return arm_issue_rate ();
8991 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8992 It corrects the value of COST based on the relationship between
8993 INSN and DEP through the dependence LINK. It returns the new
8994 value. There is a per-core adjust_cost hook to adjust scheduler costs
8995 and the per-core hook can choose to completely override the generic
8996 adjust_cost function. Only put bits of code into arm_adjust_cost that
8997 are common across all cores. */
8999 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9003 /* When generating Thumb-1 code, we want to place flag-setting operations
9004 close to a conditional branch which depends on them, so that we can
9005 omit the comparison. */
9007 && REG_NOTE_KIND (link) == 0
9008 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9009 && recog_memoized (dep) >= 0
9010 && get_attr_conds (dep) == CONDS_SET)
9013 if (current_tune->sched_adjust_cost != NULL)
9015 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9019 /* XXX Is this strictly true? */
9020 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9021 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9024 /* Call insns don't incur a stall, even if they follow a load. */
9025 if (REG_NOTE_KIND (link) == 0
9029 if ((i_pat = single_set (insn)) != NULL
9030 && MEM_P (SET_SRC (i_pat))
9031 && (d_pat = single_set (dep)) != NULL
9032 && MEM_P (SET_DEST (d_pat)))
9034 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9035 /* This is a load after a store, there is no conflict if the load reads
9036 from a cached area. Assume that loads from the stack, and from the
9037 constant pool are cached, and that others will miss. This is a
9040 if ((GET_CODE (src_mem) == SYMBOL_REF
9041 && CONSTANT_POOL_ADDRESS_P (src_mem))
9042 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9043 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9044 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9052 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9055 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9057 return (optimize > 0) ? 2 : 0;
9061 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9063 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9066 static bool fp_consts_inited = false;
9068 static REAL_VALUE_TYPE value_fp0;
9071 init_fp_table (void)
9075 r = REAL_VALUE_ATOF ("0", DFmode);
9077 fp_consts_inited = true;
9080 /* Return TRUE if rtx X is a valid immediate FP constant. */
9082 arm_const_double_rtx (rtx x)
9086 if (!fp_consts_inited)
9089 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9090 if (REAL_VALUE_MINUS_ZERO (r))
9093 if (REAL_VALUES_EQUAL (r, value_fp0))
9099 /* VFPv3 has a fairly wide range of representable immediates, formed from
9100 "quarter-precision" floating-point values. These can be evaluated using this
9101 formula (with ^ for exponentiation):
9105 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9106 16 <= n <= 31 and 0 <= r <= 7.
9108 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9110 - A (most-significant) is the sign bit.
9111 - BCD are the exponent (encoded as r XOR 3).
9112 - EFGH are the mantissa (encoded as n - 16).
9115 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9116 fconst[sd] instruction, or -1 if X isn't suitable. */
9118 vfp3_const_double_index (rtx x)
9120 REAL_VALUE_TYPE r, m;
9122 unsigned HOST_WIDE_INT mantissa, mant_hi;
9123 unsigned HOST_WIDE_INT mask;
9124 HOST_WIDE_INT m1, m2;
9125 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9127 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9130 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9132 /* We can't represent these things, so detect them first. */
9133 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9136 /* Extract sign, exponent and mantissa. */
9137 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9138 r = real_value_abs (&r);
9139 exponent = REAL_EXP (&r);
9140 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9141 highest (sign) bit, with a fixed binary point at bit point_pos.
9142 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9143 bits for the mantissa, this may fail (low bits would be lost). */
9144 real_ldexp (&m, &r, point_pos - exponent);
9145 REAL_VALUE_TO_INT (&m1, &m2, m);
9149 /* If there are bits set in the low part of the mantissa, we can't
9150 represent this value. */
9154 /* Now make it so that mantissa contains the most-significant bits, and move
9155 the point_pos to indicate that the least-significant bits have been
9157 point_pos -= HOST_BITS_PER_WIDE_INT;
9160 /* We can permit four significant bits of mantissa only, plus a high bit
9161 which is always 1. */
9162 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9163 if ((mantissa & mask) != 0)
9166 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9167 mantissa >>= point_pos - 5;
9169 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9170 floating-point immediate zero with Neon using an integer-zero load, but
9171 that case is handled elsewhere.) */
9175 gcc_assert (mantissa >= 16 && mantissa <= 31);
9177 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9178 normalized significands are in the range [1, 2). (Our mantissa is shifted
9179 left 4 places at this point relative to normalized IEEE754 values). GCC
9180 internally uses [0.5, 1) (see real.c), so the exponent returned from
9181 REAL_EXP must be altered. */
9182 exponent = 5 - exponent;
9184 if (exponent < 0 || exponent > 7)
9187 /* Sign, mantissa and exponent are now in the correct form to plug into the
9188 formula described in the comment above. */
9189 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9192 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9194 vfp3_const_double_rtx (rtx x)
9199 return vfp3_const_double_index (x) != -1;
9202 /* Recognize immediates which can be used in various Neon instructions. Legal
9203 immediates are described by the following table (for VMVN variants, the
9204 bitwise inverse of the constant shown is recognized. In either case, VMOV
9205 is output and the correct instruction to use for a given constant is chosen
9206 by the assembler). The constant shown is replicated across all elements of
9207 the destination vector.
9209 insn elems variant constant (binary)
9210 ---- ----- ------- -----------------
9211 vmov i32 0 00000000 00000000 00000000 abcdefgh
9212 vmov i32 1 00000000 00000000 abcdefgh 00000000
9213 vmov i32 2 00000000 abcdefgh 00000000 00000000
9214 vmov i32 3 abcdefgh 00000000 00000000 00000000
9215 vmov i16 4 00000000 abcdefgh
9216 vmov i16 5 abcdefgh 00000000
9217 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9218 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9219 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9220 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9221 vmvn i16 10 00000000 abcdefgh
9222 vmvn i16 11 abcdefgh 00000000
9223 vmov i32 12 00000000 00000000 abcdefgh 11111111
9224 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9225 vmov i32 14 00000000 abcdefgh 11111111 11111111
9226 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9228 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9229 eeeeeeee ffffffff gggggggg hhhhhhhh
9230 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9231 vmov f32 19 00000000 00000000 00000000 00000000
9233 For case 18, B = !b. Representable values are exactly those accepted by
9234 vfp3_const_double_index, but are output as floating-point numbers rather
9237 For case 19, we will change it to vmov.i32 when assembling.
9239 Variants 0-5 (inclusive) may also be used as immediates for the second
9240 operand of VORR/VBIC instructions.
9242 The INVERSE argument causes the bitwise inverse of the given operand to be
9243 recognized instead (used for recognizing legal immediates for the VAND/VORN
9244 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9245 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9246 output, rather than the real insns vbic/vorr).
9248 INVERSE makes no difference to the recognition of float vectors.
9250 The return value is the variant of immediate as shown in the above table, or
9251 -1 if the given value doesn't match any of the listed patterns.
9254 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9255 rtx *modconst, int *elementwidth)
9257 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9259 for (i = 0; i < idx; i += (STRIDE)) \
9264 immtype = (CLASS); \
9265 elsize = (ELSIZE); \
9269 unsigned int i, elsize = 0, idx = 0, n_elts;
9270 unsigned int innersize;
9271 unsigned char bytes[16];
9272 int immtype = -1, matches;
9273 unsigned int invmask = inverse ? 0xff : 0;
9274 bool vector = GET_CODE (op) == CONST_VECTOR;
9278 n_elts = CONST_VECTOR_NUNITS (op);
9279 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9284 if (mode == VOIDmode)
9286 innersize = GET_MODE_SIZE (mode);
9289 /* Vectors of float constants. */
9290 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9292 rtx el0 = CONST_VECTOR_ELT (op, 0);
9295 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9298 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9300 for (i = 1; i < n_elts; i++)
9302 rtx elt = CONST_VECTOR_ELT (op, i);
9305 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9307 if (!REAL_VALUES_EQUAL (r0, re))
9312 *modconst = CONST_VECTOR_ELT (op, 0);
9317 if (el0 == CONST0_RTX (GET_MODE (el0)))
9323 /* Splat vector constant out into a byte vector. */
9324 for (i = 0; i < n_elts; i++)
9326 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9327 unsigned HOST_WIDE_INT elpart;
9328 unsigned int part, parts;
9330 if (CONST_INT_P (el))
9332 elpart = INTVAL (el);
9335 else if (CONST_DOUBLE_P (el))
9337 elpart = CONST_DOUBLE_LOW (el);
9343 for (part = 0; part < parts; part++)
9346 for (byte = 0; byte < innersize; byte++)
9348 bytes[idx++] = (elpart & 0xff) ^ invmask;
9349 elpart >>= BITS_PER_UNIT;
9351 if (CONST_DOUBLE_P (el))
9352 elpart = CONST_DOUBLE_HIGH (el);
9357 gcc_assert (idx == GET_MODE_SIZE (mode));
9361 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9362 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9364 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9365 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9367 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9368 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9370 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9371 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9373 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9375 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9377 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9378 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9380 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9381 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9383 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9384 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9386 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9387 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9389 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9391 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9393 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9394 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9396 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9397 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9399 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9400 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9402 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9403 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9405 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9407 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9408 && bytes[i] == bytes[(i + 8) % idx]);
9416 *elementwidth = elsize;
9420 unsigned HOST_WIDE_INT imm = 0;
9422 /* Un-invert bytes of recognized vector, if necessary. */
9424 for (i = 0; i < idx; i++)
9425 bytes[i] ^= invmask;
9429 /* FIXME: Broken on 32-bit H_W_I hosts. */
9430 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9432 for (i = 0; i < 8; i++)
9433 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9434 << (i * BITS_PER_UNIT);
9436 *modconst = GEN_INT (imm);
9440 unsigned HOST_WIDE_INT imm = 0;
9442 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9443 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9445 *modconst = GEN_INT (imm);
9453 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9454 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9455 float elements), and a modified constant (whatever should be output for a
9456 VMOV) in *MODCONST. */
9459 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9460 rtx *modconst, int *elementwidth)
9464 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9470 *modconst = tmpconst;
9473 *elementwidth = tmpwidth;
9478 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9479 the immediate is valid, write a constant suitable for using as an operand
9480 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9481 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9484 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9485 rtx *modconst, int *elementwidth)
9489 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9491 if (retval < 0 || retval > 5)
9495 *modconst = tmpconst;
9498 *elementwidth = tmpwidth;
9503 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9504 the immediate is valid, write a constant suitable for using as an operand
9505 to VSHR/VSHL to *MODCONST and the corresponding element width to
9506 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9507 because they have different limitations. */
9510 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9511 rtx *modconst, int *elementwidth,
9514 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9515 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9516 unsigned HOST_WIDE_INT last_elt = 0;
9517 unsigned HOST_WIDE_INT maxshift;
9519 /* Split vector constant out into a byte vector. */
9520 for (i = 0; i < n_elts; i++)
9522 rtx el = CONST_VECTOR_ELT (op, i);
9523 unsigned HOST_WIDE_INT elpart;
9525 if (CONST_INT_P (el))
9526 elpart = INTVAL (el);
9527 else if (CONST_DOUBLE_P (el))
9532 if (i != 0 && elpart != last_elt)
9538 /* Shift less than element size. */
9539 maxshift = innersize * 8;
9543 /* Left shift immediate value can be from 0 to <size>-1. */
9544 if (last_elt >= maxshift)
9549 /* Right shift immediate value can be from 1 to <size>. */
9550 if (last_elt == 0 || last_elt > maxshift)
9555 *elementwidth = innersize * 8;
9558 *modconst = CONST_VECTOR_ELT (op, 0);
9563 /* Return a string suitable for output of Neon immediate logic operation
9567 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9568 int inverse, int quad)
9570 int width, is_valid;
9571 static char templ[40];
9573 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9575 gcc_assert (is_valid != 0);
9578 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9580 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9585 /* Return a string suitable for output of Neon immediate shift operation
9586 (VSHR or VSHL) MNEM. */
9589 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9590 enum machine_mode mode, int quad,
9593 int width, is_valid;
9594 static char templ[40];
9596 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9597 gcc_assert (is_valid != 0);
9600 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9602 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9607 /* Output a sequence of pairwise operations to implement a reduction.
9608 NOTE: We do "too much work" here, because pairwise operations work on two
9609 registers-worth of operands in one go. Unfortunately we can't exploit those
9610 extra calculations to do the full operation in fewer steps, I don't think.
9611 Although all vector elements of the result but the first are ignored, we
9612 actually calculate the same result in each of the elements. An alternative
9613 such as initially loading a vector with zero to use as each of the second
9614 operands would use up an additional register and take an extra instruction,
9615 for no particular gain. */
9618 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9619 rtx (*reduc) (rtx, rtx, rtx))
9621 enum machine_mode inner = GET_MODE_INNER (mode);
9622 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9625 for (i = parts / 2; i >= 1; i /= 2)
9627 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9628 emit_insn (reduc (dest, tmpsum, tmpsum));
9633 /* If VALS is a vector constant that can be loaded into a register
9634 using VDUP, generate instructions to do so and return an RTX to
9635 assign to the register. Otherwise return NULL_RTX. */
9638 neon_vdup_constant (rtx vals)
9640 enum machine_mode mode = GET_MODE (vals);
9641 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9642 int n_elts = GET_MODE_NUNITS (mode);
9643 bool all_same = true;
9647 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9650 for (i = 0; i < n_elts; ++i)
9652 x = XVECEXP (vals, 0, i);
9653 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9658 /* The elements are not all the same. We could handle repeating
9659 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9660 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9664 /* We can load this constant by using VDUP and a constant in a
9665 single ARM register. This will be cheaper than a vector
9668 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9669 return gen_rtx_VEC_DUPLICATE (mode, x);
9672 /* Generate code to load VALS, which is a PARALLEL containing only
9673 constants (for vec_init) or CONST_VECTOR, efficiently into a
9674 register. Returns an RTX to copy into the register, or NULL_RTX
9675 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9678 neon_make_constant (rtx vals)
9680 enum machine_mode mode = GET_MODE (vals);
9682 rtx const_vec = NULL_RTX;
9683 int n_elts = GET_MODE_NUNITS (mode);
9687 if (GET_CODE (vals) == CONST_VECTOR)
9689 else if (GET_CODE (vals) == PARALLEL)
9691 /* A CONST_VECTOR must contain only CONST_INTs and
9692 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9693 Only store valid constants in a CONST_VECTOR. */
9694 for (i = 0; i < n_elts; ++i)
9696 rtx x = XVECEXP (vals, 0, i);
9697 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9700 if (n_const == n_elts)
9701 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9706 if (const_vec != NULL
9707 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9708 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9710 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9711 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9712 pipeline cycle; creating the constant takes one or two ARM
9715 else if (const_vec != NULL_RTX)
9716 /* Load from constant pool. On Cortex-A8 this takes two cycles
9717 (for either double or quad vectors). We can not take advantage
9718 of single-cycle VLD1 because we need a PC-relative addressing
9722 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9723 We can not construct an initializer. */
9727 /* Initialize vector TARGET to VALS. */
9730 neon_expand_vector_init (rtx target, rtx vals)
9732 enum machine_mode mode = GET_MODE (target);
9733 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9734 int n_elts = GET_MODE_NUNITS (mode);
9735 int n_var = 0, one_var = -1;
9736 bool all_same = true;
9740 for (i = 0; i < n_elts; ++i)
9742 x = XVECEXP (vals, 0, i);
9743 if (!CONSTANT_P (x))
9744 ++n_var, one_var = i;
9746 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9752 rtx constant = neon_make_constant (vals);
9753 if (constant != NULL_RTX)
9755 emit_move_insn (target, constant);
9760 /* Splat a single non-constant element if we can. */
9761 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9763 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9764 emit_insn (gen_rtx_SET (VOIDmode, target,
9765 gen_rtx_VEC_DUPLICATE (mode, x)));
9769 /* One field is non-constant. Load constant then overwrite varying
9770 field. This is more efficient than using the stack. */
9773 rtx copy = copy_rtx (vals);
9774 rtx index = GEN_INT (one_var);
9776 /* Load constant part of vector, substitute neighboring value for
9778 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9779 neon_expand_vector_init (target, copy);
9781 /* Insert variable. */
9782 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9786 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9789 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9792 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9795 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9798 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9801 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9804 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9807 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9810 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9818 /* Construct the vector in memory one field at a time
9819 and load the whole vector. */
9820 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9821 for (i = 0; i < n_elts; i++)
9822 emit_move_insn (adjust_address_nv (mem, inner_mode,
9823 i * GET_MODE_SIZE (inner_mode)),
9824 XVECEXP (vals, 0, i));
9825 emit_move_insn (target, mem);
9828 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9829 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9830 reported source locations are bogus. */
9833 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9838 gcc_assert (CONST_INT_P (operand));
9840 lane = INTVAL (operand);
9842 if (lane < low || lane >= high)
9846 /* Bounds-check lanes. */
9849 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9851 bounds_check (operand, low, high, "lane out of range");
9854 /* Bounds-check constants. */
9857 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9859 bounds_check (operand, low, high, "constant out of range");
9863 neon_element_bits (enum machine_mode mode)
9866 return GET_MODE_BITSIZE (mode);
9868 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9872 /* Predicates for `match_operand' and `match_operator'. */
9874 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9875 WB is true if full writeback address modes are allowed and is false
9876 if limited writeback address modes (POST_INC and PRE_DEC) are
9880 arm_coproc_mem_operand (rtx op, bool wb)
9884 /* Reject eliminable registers. */
9885 if (! (reload_in_progress || reload_completed)
9886 && ( reg_mentioned_p (frame_pointer_rtx, op)
9887 || reg_mentioned_p (arg_pointer_rtx, op)
9888 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9889 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9890 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9891 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9894 /* Constants are converted into offsets from labels. */
9900 if (reload_completed
9901 && (GET_CODE (ind) == LABEL_REF
9902 || (GET_CODE (ind) == CONST
9903 && GET_CODE (XEXP (ind, 0)) == PLUS
9904 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9905 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9908 /* Match: (mem (reg)). */
9910 return arm_address_register_rtx_p (ind, 0);
9912 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9913 acceptable in any case (subject to verification by
9914 arm_address_register_rtx_p). We need WB to be true to accept
9915 PRE_INC and POST_DEC. */
9916 if (GET_CODE (ind) == POST_INC
9917 || GET_CODE (ind) == PRE_DEC
9919 && (GET_CODE (ind) == PRE_INC
9920 || GET_CODE (ind) == POST_DEC)))
9921 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9924 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9925 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9926 && GET_CODE (XEXP (ind, 1)) == PLUS
9927 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9928 ind = XEXP (ind, 1);
9933 if (GET_CODE (ind) == PLUS
9934 && REG_P (XEXP (ind, 0))
9935 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9936 && CONST_INT_P (XEXP (ind, 1))
9937 && INTVAL (XEXP (ind, 1)) > -1024
9938 && INTVAL (XEXP (ind, 1)) < 1024
9939 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9945 /* Return TRUE if OP is a memory operand which we can load or store a vector
9946 to/from. TYPE is one of the following values:
9947 0 - Vector load/stor (vldr)
9948 1 - Core registers (ldm)
9949 2 - Element/structure loads (vld1)
9952 neon_vector_mem_operand (rtx op, int type)
9956 /* Reject eliminable registers. */
9957 if (! (reload_in_progress || reload_completed)
9958 && ( reg_mentioned_p (frame_pointer_rtx, op)
9959 || reg_mentioned_p (arg_pointer_rtx, op)
9960 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9961 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9962 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9963 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9966 /* Constants are converted into offsets from labels. */
9972 if (reload_completed
9973 && (GET_CODE (ind) == LABEL_REF
9974 || (GET_CODE (ind) == CONST
9975 && GET_CODE (XEXP (ind, 0)) == PLUS
9976 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9977 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9980 /* Match: (mem (reg)). */
9982 return arm_address_register_rtx_p (ind, 0);
9984 /* Allow post-increment with Neon registers. */
9985 if ((type != 1 && GET_CODE (ind) == POST_INC)
9986 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9987 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9989 /* FIXME: vld1 allows register post-modify. */
9995 && GET_CODE (ind) == PLUS
9996 && REG_P (XEXP (ind, 0))
9997 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9998 && CONST_INT_P (XEXP (ind, 1))
9999 && INTVAL (XEXP (ind, 1)) > -1024
10000 /* For quad modes, we restrict the constant offset to be slightly less
10001 than what the instruction format permits. We have no such constraint
10002 on double mode offsets. (This must match arm_legitimate_index_p.) */
10003 && (INTVAL (XEXP (ind, 1))
10004 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10005 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10011 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10014 neon_struct_mem_operand (rtx op)
10018 /* Reject eliminable registers. */
10019 if (! (reload_in_progress || reload_completed)
10020 && ( reg_mentioned_p (frame_pointer_rtx, op)
10021 || reg_mentioned_p (arg_pointer_rtx, op)
10022 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10023 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10024 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10025 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10028 /* Constants are converted into offsets from labels. */
10032 ind = XEXP (op, 0);
10034 if (reload_completed
10035 && (GET_CODE (ind) == LABEL_REF
10036 || (GET_CODE (ind) == CONST
10037 && GET_CODE (XEXP (ind, 0)) == PLUS
10038 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10039 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10042 /* Match: (mem (reg)). */
10044 return arm_address_register_rtx_p (ind, 0);
10046 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10047 if (GET_CODE (ind) == POST_INC
10048 || GET_CODE (ind) == PRE_DEC)
10049 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10054 /* Return true if X is a register that will be eliminated later on. */
10056 arm_eliminable_register (rtx x)
10058 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10059 || REGNO (x) == ARG_POINTER_REGNUM
10060 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10061 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10064 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10065 coprocessor registers. Otherwise return NO_REGS. */
10068 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10070 if (mode == HFmode)
10072 if (!TARGET_NEON_FP16)
10073 return GENERAL_REGS;
10074 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10076 return GENERAL_REGS;
10079 /* The neon move patterns handle all legitimate vector and struct
10082 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10083 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10084 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10085 || VALID_NEON_STRUCT_MODE (mode)))
10088 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10091 return GENERAL_REGS;
10094 /* Values which must be returned in the most-significant end of the return
10098 arm_return_in_msb (const_tree valtype)
10100 return (TARGET_AAPCS_BASED
10101 && BYTES_BIG_ENDIAN
10102 && (AGGREGATE_TYPE_P (valtype)
10103 || TREE_CODE (valtype) == COMPLEX_TYPE
10104 || FIXED_POINT_TYPE_P (valtype)));
10107 /* Return TRUE if X references a SYMBOL_REF. */
10109 symbol_mentioned_p (rtx x)
10114 if (GET_CODE (x) == SYMBOL_REF)
10117 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10118 are constant offsets, not symbols. */
10119 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10122 fmt = GET_RTX_FORMAT (GET_CODE (x));
10124 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10130 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10131 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10134 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10141 /* Return TRUE if X references a LABEL_REF. */
10143 label_mentioned_p (rtx x)
10148 if (GET_CODE (x) == LABEL_REF)
10151 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10152 instruction, but they are constant offsets, not symbols. */
10153 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10156 fmt = GET_RTX_FORMAT (GET_CODE (x));
10157 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10163 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10164 if (label_mentioned_p (XVECEXP (x, i, j)))
10167 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10175 tls_mentioned_p (rtx x)
10177 switch (GET_CODE (x))
10180 return tls_mentioned_p (XEXP (x, 0));
10183 if (XINT (x, 1) == UNSPEC_TLS)
10191 /* Must not copy any rtx that uses a pc-relative address. */
10194 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10196 if (GET_CODE (*x) == UNSPEC
10197 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10198 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10204 arm_cannot_copy_insn_p (rtx insn)
10206 /* The tls call insn cannot be copied, as it is paired with a data
10208 if (recog_memoized (insn) == CODE_FOR_tlscall)
10211 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10215 minmax_code (rtx x)
10217 enum rtx_code code = GET_CODE (x);
10230 gcc_unreachable ();
10234 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10237 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10238 int *mask, bool *signed_sat)
10240 /* The high bound must be a power of two minus one. */
10241 int log = exact_log2 (INTVAL (hi_bound) + 1);
10245 /* The low bound is either zero (for usat) or one less than the
10246 negation of the high bound (for ssat). */
10247 if (INTVAL (lo_bound) == 0)
10252 *signed_sat = false;
10257 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10262 *signed_sat = true;
10270 /* Return 1 if memory locations are adjacent. */
10272 adjacent_mem_locations (rtx a, rtx b)
10274 /* We don't guarantee to preserve the order of these memory refs. */
10275 if (volatile_refs_p (a) || volatile_refs_p (b))
10278 if ((REG_P (XEXP (a, 0))
10279 || (GET_CODE (XEXP (a, 0)) == PLUS
10280 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10281 && (REG_P (XEXP (b, 0))
10282 || (GET_CODE (XEXP (b, 0)) == PLUS
10283 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10285 HOST_WIDE_INT val0 = 0, val1 = 0;
10289 if (GET_CODE (XEXP (a, 0)) == PLUS)
10291 reg0 = XEXP (XEXP (a, 0), 0);
10292 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10295 reg0 = XEXP (a, 0);
10297 if (GET_CODE (XEXP (b, 0)) == PLUS)
10299 reg1 = XEXP (XEXP (b, 0), 0);
10300 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10303 reg1 = XEXP (b, 0);
10305 /* Don't accept any offset that will require multiple
10306 instructions to handle, since this would cause the
10307 arith_adjacentmem pattern to output an overlong sequence. */
10308 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10311 /* Don't allow an eliminable register: register elimination can make
10312 the offset too large. */
10313 if (arm_eliminable_register (reg0))
10316 val_diff = val1 - val0;
10320 /* If the target has load delay slots, then there's no benefit
10321 to using an ldm instruction unless the offset is zero and
10322 we are optimizing for size. */
10323 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10324 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10325 && (val_diff == 4 || val_diff == -4));
10328 return ((REGNO (reg0) == REGNO (reg1))
10329 && (val_diff == 4 || val_diff == -4));
10335 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10336 for load operations, false for store operations. CONSECUTIVE is true
10337 if the register numbers in the operation must be consecutive in the register
10338 bank. RETURN_PC is true if value is to be loaded in PC.
10339 The pattern we are trying to match for load is:
10340 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10341 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10344 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10347 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10348 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10349 3. If consecutive is TRUE, then for kth register being loaded,
10350 REGNO (R_dk) = REGNO (R_d0) + k.
10351 The pattern for store is similar. */
10353 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10354 bool consecutive, bool return_pc)
10356 HOST_WIDE_INT count = XVECLEN (op, 0);
10357 rtx reg, mem, addr;
10359 unsigned first_regno;
10360 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10362 bool addr_reg_in_reglist = false;
10363 bool update = false;
10368 /* If not in SImode, then registers must be consecutive
10369 (e.g., VLDM instructions for DFmode). */
10370 gcc_assert ((mode == SImode) || consecutive);
10371 /* Setting return_pc for stores is illegal. */
10372 gcc_assert (!return_pc || load);
10374 /* Set up the increments and the regs per val based on the mode. */
10375 reg_increment = GET_MODE_SIZE (mode);
10376 regs_per_val = reg_increment / 4;
10377 offset_adj = return_pc ? 1 : 0;
10380 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10381 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10384 /* Check if this is a write-back. */
10385 elt = XVECEXP (op, 0, offset_adj);
10386 if (GET_CODE (SET_SRC (elt)) == PLUS)
10392 /* The offset adjustment must be the number of registers being
10393 popped times the size of a single register. */
10394 if (!REG_P (SET_DEST (elt))
10395 || !REG_P (XEXP (SET_SRC (elt), 0))
10396 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10397 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10398 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10399 ((count - 1 - offset_adj) * reg_increment))
10403 i = i + offset_adj;
10404 base = base + offset_adj;
10405 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10406 success depends on the type: VLDM can do just one reg,
10407 LDM must do at least two. */
10408 if ((count <= i) && (mode == SImode))
10411 elt = XVECEXP (op, 0, i - 1);
10412 if (GET_CODE (elt) != SET)
10417 reg = SET_DEST (elt);
10418 mem = SET_SRC (elt);
10422 reg = SET_SRC (elt);
10423 mem = SET_DEST (elt);
10426 if (!REG_P (reg) || !MEM_P (mem))
10429 regno = REGNO (reg);
10430 first_regno = regno;
10431 addr = XEXP (mem, 0);
10432 if (GET_CODE (addr) == PLUS)
10434 if (!CONST_INT_P (XEXP (addr, 1)))
10437 offset = INTVAL (XEXP (addr, 1));
10438 addr = XEXP (addr, 0);
10444 /* Don't allow SP to be loaded unless it is also the base register. It
10445 guarantees that SP is reset correctly when an LDM instruction
10446 is interruptted. Otherwise, we might end up with a corrupt stack. */
10447 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10450 for (; i < count; i++)
10452 elt = XVECEXP (op, 0, i);
10453 if (GET_CODE (elt) != SET)
10458 reg = SET_DEST (elt);
10459 mem = SET_SRC (elt);
10463 reg = SET_SRC (elt);
10464 mem = SET_DEST (elt);
10468 || GET_MODE (reg) != mode
10469 || REGNO (reg) <= regno
10472 (unsigned int) (first_regno + regs_per_val * (i - base))))
10473 /* Don't allow SP to be loaded unless it is also the base register. It
10474 guarantees that SP is reset correctly when an LDM instruction
10475 is interrupted. Otherwise, we might end up with a corrupt stack. */
10476 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10478 || GET_MODE (mem) != mode
10479 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10480 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10481 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10482 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10483 offset + (i - base) * reg_increment))
10484 && (!REG_P (XEXP (mem, 0))
10485 || offset + (i - base) * reg_increment != 0)))
10488 regno = REGNO (reg);
10489 if (regno == REGNO (addr))
10490 addr_reg_in_reglist = true;
10495 if (update && addr_reg_in_reglist)
10498 /* For Thumb-1, address register is always modified - either by write-back
10499 or by explicit load. If the pattern does not describe an update,
10500 then the address register must be in the list of loaded registers. */
10502 return update || addr_reg_in_reglist;
10508 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10509 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10510 instruction. ADD_OFFSET is nonzero if the base address register needs
10511 to be modified with an add instruction before we can use it. */
10514 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10515 int nops, HOST_WIDE_INT add_offset)
10517 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10518 if the offset isn't small enough. The reason 2 ldrs are faster
10519 is because these ARMs are able to do more than one cache access
10520 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10521 whilst the ARM8 has a double bandwidth cache. This means that
10522 these cores can do both an instruction fetch and a data fetch in
10523 a single cycle, so the trick of calculating the address into a
10524 scratch register (one of the result regs) and then doing a load
10525 multiple actually becomes slower (and no smaller in code size).
10526 That is the transformation
10528 ldr rd1, [rbase + offset]
10529 ldr rd2, [rbase + offset + 4]
10533 add rd1, rbase, offset
10534 ldmia rd1, {rd1, rd2}
10536 produces worse code -- '3 cycles + any stalls on rd2' instead of
10537 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10538 access per cycle, the first sequence could never complete in less
10539 than 6 cycles, whereas the ldm sequence would only take 5 and
10540 would make better use of sequential accesses if not hitting the
10543 We cheat here and test 'arm_ld_sched' which we currently know to
10544 only be true for the ARM8, ARM9 and StrongARM. If this ever
10545 changes, then the test below needs to be reworked. */
10546 if (nops == 2 && arm_ld_sched && add_offset != 0)
10549 /* XScale has load-store double instructions, but they have stricter
10550 alignment requirements than load-store multiple, so we cannot
10553 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10554 the pipeline until completion.
10562 An ldr instruction takes 1-3 cycles, but does not block the
10571 Best case ldr will always win. However, the more ldr instructions
10572 we issue, the less likely we are to be able to schedule them well.
10573 Using ldr instructions also increases code size.
10575 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10576 for counts of 3 or 4 regs. */
10577 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10582 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10583 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10584 an array ORDER which describes the sequence to use when accessing the
10585 offsets that produces an ascending order. In this sequence, each
10586 offset must be larger by exactly 4 than the previous one. ORDER[0]
10587 must have been filled in with the lowest offset by the caller.
10588 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10589 we use to verify that ORDER produces an ascending order of registers.
10590 Return true if it was possible to construct such an order, false if
10594 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10595 int *unsorted_regs)
10598 for (i = 1; i < nops; i++)
10602 order[i] = order[i - 1];
10603 for (j = 0; j < nops; j++)
10604 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10606 /* We must find exactly one offset that is higher than the
10607 previous one by 4. */
10608 if (order[i] != order[i - 1])
10612 if (order[i] == order[i - 1])
10614 /* The register numbers must be ascending. */
10615 if (unsorted_regs != NULL
10616 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10622 /* Used to determine in a peephole whether a sequence of load
10623 instructions can be changed into a load-multiple instruction.
10624 NOPS is the number of separate load instructions we are examining. The
10625 first NOPS entries in OPERANDS are the destination registers, the
10626 next NOPS entries are memory operands. If this function is
10627 successful, *BASE is set to the common base register of the memory
10628 accesses; *LOAD_OFFSET is set to the first memory location's offset
10629 from that base register.
10630 REGS is an array filled in with the destination register numbers.
10631 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10632 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10633 the sequence of registers in REGS matches the loads from ascending memory
10634 locations, and the function verifies that the register numbers are
10635 themselves ascending. If CHECK_REGS is false, the register numbers
10636 are stored in the order they are found in the operands. */
10638 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10639 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10641 int unsorted_regs[MAX_LDM_STM_OPS];
10642 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10643 int order[MAX_LDM_STM_OPS];
10644 rtx base_reg_rtx = NULL;
10648 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10649 easily extended if required. */
10650 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10652 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10654 /* Loop over the operands and check that the memory references are
10655 suitable (i.e. immediate offsets from the same base register). At
10656 the same time, extract the target register, and the memory
10658 for (i = 0; i < nops; i++)
10663 /* Convert a subreg of a mem into the mem itself. */
10664 if (GET_CODE (operands[nops + i]) == SUBREG)
10665 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10667 gcc_assert (MEM_P (operands[nops + i]));
10669 /* Don't reorder volatile memory references; it doesn't seem worth
10670 looking for the case where the order is ok anyway. */
10671 if (MEM_VOLATILE_P (operands[nops + i]))
10674 offset = const0_rtx;
10676 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10677 || (GET_CODE (reg) == SUBREG
10678 && REG_P (reg = SUBREG_REG (reg))))
10679 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10680 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10681 || (GET_CODE (reg) == SUBREG
10682 && REG_P (reg = SUBREG_REG (reg))))
10683 && (CONST_INT_P (offset
10684 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10688 base_reg = REGNO (reg);
10689 base_reg_rtx = reg;
10690 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10693 else if (base_reg != (int) REGNO (reg))
10694 /* Not addressed from the same base register. */
10697 unsorted_regs[i] = (REG_P (operands[i])
10698 ? REGNO (operands[i])
10699 : REGNO (SUBREG_REG (operands[i])));
10701 /* If it isn't an integer register, or if it overwrites the
10702 base register but isn't the last insn in the list, then
10703 we can't do this. */
10704 if (unsorted_regs[i] < 0
10705 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10706 || unsorted_regs[i] > 14
10707 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10710 /* Don't allow SP to be loaded unless it is also the base
10711 register. It guarantees that SP is reset correctly when
10712 an LDM instruction is interrupted. Otherwise, we might
10713 end up with a corrupt stack. */
10714 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10717 unsorted_offsets[i] = INTVAL (offset);
10718 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10722 /* Not a suitable memory address. */
10726 /* All the useful information has now been extracted from the
10727 operands into unsorted_regs and unsorted_offsets; additionally,
10728 order[0] has been set to the lowest offset in the list. Sort
10729 the offsets into order, verifying that they are adjacent, and
10730 check that the register numbers are ascending. */
10731 if (!compute_offset_order (nops, unsorted_offsets, order,
10732 check_regs ? unsorted_regs : NULL))
10736 memcpy (saved_order, order, sizeof order);
10742 for (i = 0; i < nops; i++)
10743 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10745 *load_offset = unsorted_offsets[order[0]];
10749 && !peep2_reg_dead_p (nops, base_reg_rtx))
10752 if (unsorted_offsets[order[0]] == 0)
10753 ldm_case = 1; /* ldmia */
10754 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10755 ldm_case = 2; /* ldmib */
10756 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10757 ldm_case = 3; /* ldmda */
10758 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10759 ldm_case = 4; /* ldmdb */
10760 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10761 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10766 if (!multiple_operation_profitable_p (false, nops,
10768 ? unsorted_offsets[order[0]] : 0))
10774 /* Used to determine in a peephole whether a sequence of store instructions can
10775 be changed into a store-multiple instruction.
10776 NOPS is the number of separate store instructions we are examining.
10777 NOPS_TOTAL is the total number of instructions recognized by the peephole
10779 The first NOPS entries in OPERANDS are the source registers, the next
10780 NOPS entries are memory operands. If this function is successful, *BASE is
10781 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10782 to the first memory location's offset from that base register. REGS is an
10783 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10784 likewise filled with the corresponding rtx's.
10785 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10786 numbers to an ascending order of stores.
10787 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10788 from ascending memory locations, and the function verifies that the register
10789 numbers are themselves ascending. If CHECK_REGS is false, the register
10790 numbers are stored in the order they are found in the operands. */
10792 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10793 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10794 HOST_WIDE_INT *load_offset, bool check_regs)
10796 int unsorted_regs[MAX_LDM_STM_OPS];
10797 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10798 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10799 int order[MAX_LDM_STM_OPS];
10801 rtx base_reg_rtx = NULL;
10804 /* Write back of base register is currently only supported for Thumb 1. */
10805 int base_writeback = TARGET_THUMB1;
10807 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10808 easily extended if required. */
10809 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10811 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10813 /* Loop over the operands and check that the memory references are
10814 suitable (i.e. immediate offsets from the same base register). At
10815 the same time, extract the target register, and the memory
10817 for (i = 0; i < nops; i++)
10822 /* Convert a subreg of a mem into the mem itself. */
10823 if (GET_CODE (operands[nops + i]) == SUBREG)
10824 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10826 gcc_assert (MEM_P (operands[nops + i]));
10828 /* Don't reorder volatile memory references; it doesn't seem worth
10829 looking for the case where the order is ok anyway. */
10830 if (MEM_VOLATILE_P (operands[nops + i]))
10833 offset = const0_rtx;
10835 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10836 || (GET_CODE (reg) == SUBREG
10837 && REG_P (reg = SUBREG_REG (reg))))
10838 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10839 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10840 || (GET_CODE (reg) == SUBREG
10841 && REG_P (reg = SUBREG_REG (reg))))
10842 && (CONST_INT_P (offset
10843 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10845 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10846 ? operands[i] : SUBREG_REG (operands[i]));
10847 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10851 base_reg = REGNO (reg);
10852 base_reg_rtx = reg;
10853 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10856 else if (base_reg != (int) REGNO (reg))
10857 /* Not addressed from the same base register. */
10860 /* If it isn't an integer register, then we can't do this. */
10861 if (unsorted_regs[i] < 0
10862 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10863 /* The effects are unpredictable if the base register is
10864 both updated and stored. */
10865 || (base_writeback && unsorted_regs[i] == base_reg)
10866 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10867 || unsorted_regs[i] > 14)
10870 unsorted_offsets[i] = INTVAL (offset);
10871 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10875 /* Not a suitable memory address. */
10879 /* All the useful information has now been extracted from the
10880 operands into unsorted_regs and unsorted_offsets; additionally,
10881 order[0] has been set to the lowest offset in the list. Sort
10882 the offsets into order, verifying that they are adjacent, and
10883 check that the register numbers are ascending. */
10884 if (!compute_offset_order (nops, unsorted_offsets, order,
10885 check_regs ? unsorted_regs : NULL))
10889 memcpy (saved_order, order, sizeof order);
10895 for (i = 0; i < nops; i++)
10897 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10899 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10902 *load_offset = unsorted_offsets[order[0]];
10906 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10909 if (unsorted_offsets[order[0]] == 0)
10910 stm_case = 1; /* stmia */
10911 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10912 stm_case = 2; /* stmib */
10913 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10914 stm_case = 3; /* stmda */
10915 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10916 stm_case = 4; /* stmdb */
10920 if (!multiple_operation_profitable_p (false, nops, 0))
10926 /* Routines for use in generating RTL. */
10928 /* Generate a load-multiple instruction. COUNT is the number of loads in
10929 the instruction; REGS and MEMS are arrays containing the operands.
10930 BASEREG is the base register to be used in addressing the memory operands.
10931 WBACK_OFFSET is nonzero if the instruction should update the base
10935 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10936 HOST_WIDE_INT wback_offset)
10941 if (!multiple_operation_profitable_p (false, count, 0))
10947 for (i = 0; i < count; i++)
10948 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10950 if (wback_offset != 0)
10951 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10953 seq = get_insns ();
10959 result = gen_rtx_PARALLEL (VOIDmode,
10960 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10961 if (wback_offset != 0)
10963 XVECEXP (result, 0, 0)
10964 = gen_rtx_SET (VOIDmode, basereg,
10965 plus_constant (Pmode, basereg, wback_offset));
10970 for (j = 0; i < count; i++, j++)
10971 XVECEXP (result, 0, i)
10972 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10977 /* Generate a store-multiple instruction. COUNT is the number of stores in
10978 the instruction; REGS and MEMS are arrays containing the operands.
10979 BASEREG is the base register to be used in addressing the memory operands.
10980 WBACK_OFFSET is nonzero if the instruction should update the base
10984 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10985 HOST_WIDE_INT wback_offset)
10990 if (GET_CODE (basereg) == PLUS)
10991 basereg = XEXP (basereg, 0);
10993 if (!multiple_operation_profitable_p (false, count, 0))
10999 for (i = 0; i < count; i++)
11000 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11002 if (wback_offset != 0)
11003 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11005 seq = get_insns ();
11011 result = gen_rtx_PARALLEL (VOIDmode,
11012 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11013 if (wback_offset != 0)
11015 XVECEXP (result, 0, 0)
11016 = gen_rtx_SET (VOIDmode, basereg,
11017 plus_constant (Pmode, basereg, wback_offset));
11022 for (j = 0; i < count; i++, j++)
11023 XVECEXP (result, 0, i)
11024 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11029 /* Generate either a load-multiple or a store-multiple instruction. This
11030 function can be used in situations where we can start with a single MEM
11031 rtx and adjust its address upwards.
11032 COUNT is the number of operations in the instruction, not counting a
11033 possible update of the base register. REGS is an array containing the
11035 BASEREG is the base register to be used in addressing the memory operands,
11036 which are constructed from BASEMEM.
11037 WRITE_BACK specifies whether the generated instruction should include an
11038 update of the base register.
11039 OFFSETP is used to pass an offset to and from this function; this offset
11040 is not used when constructing the address (instead BASEMEM should have an
11041 appropriate offset in its address), it is used only for setting
11042 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11045 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11046 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11048 rtx mems[MAX_LDM_STM_OPS];
11049 HOST_WIDE_INT offset = *offsetp;
11052 gcc_assert (count <= MAX_LDM_STM_OPS);
11054 if (GET_CODE (basereg) == PLUS)
11055 basereg = XEXP (basereg, 0);
11057 for (i = 0; i < count; i++)
11059 rtx addr = plus_constant (Pmode, basereg, i * 4);
11060 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11068 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11069 write_back ? 4 * count : 0);
11071 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11072 write_back ? 4 * count : 0);
11076 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11077 rtx basemem, HOST_WIDE_INT *offsetp)
11079 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11084 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11085 rtx basemem, HOST_WIDE_INT *offsetp)
11087 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11091 /* Called from a peephole2 expander to turn a sequence of loads into an
11092 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11093 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11094 is true if we can reorder the registers because they are used commutatively
11096 Returns true iff we could generate a new instruction. */
11099 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11101 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11102 rtx mems[MAX_LDM_STM_OPS];
11103 int i, j, base_reg;
11105 HOST_WIDE_INT offset;
11106 int write_back = FALSE;
11110 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11111 &base_reg, &offset, !sort_regs);
11117 for (i = 0; i < nops - 1; i++)
11118 for (j = i + 1; j < nops; j++)
11119 if (regs[i] > regs[j])
11125 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11129 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11130 gcc_assert (ldm_case == 1 || ldm_case == 5);
11136 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11137 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11139 if (!TARGET_THUMB1)
11141 base_reg = regs[0];
11142 base_reg_rtx = newbase;
11146 for (i = 0; i < nops; i++)
11148 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11149 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11152 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11153 write_back ? offset + i * 4 : 0));
11157 /* Called from a peephole2 expander to turn a sequence of stores into an
11158 STM instruction. OPERANDS are the operands found by the peephole matcher;
11159 NOPS indicates how many separate stores we are trying to combine.
11160 Returns true iff we could generate a new instruction. */
11163 gen_stm_seq (rtx *operands, int nops)
11166 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11167 rtx mems[MAX_LDM_STM_OPS];
11170 HOST_WIDE_INT offset;
11171 int write_back = FALSE;
11174 bool base_reg_dies;
11176 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11177 mem_order, &base_reg, &offset, true);
11182 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11184 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11187 gcc_assert (base_reg_dies);
11193 gcc_assert (base_reg_dies);
11194 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11198 addr = plus_constant (Pmode, base_reg_rtx, offset);
11200 for (i = 0; i < nops; i++)
11202 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11203 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11206 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11207 write_back ? offset + i * 4 : 0));
11211 /* Called from a peephole2 expander to turn a sequence of stores that are
11212 preceded by constant loads into an STM instruction. OPERANDS are the
11213 operands found by the peephole matcher; NOPS indicates how many
11214 separate stores we are trying to combine; there are 2 * NOPS
11215 instructions in the peephole.
11216 Returns true iff we could generate a new instruction. */
11219 gen_const_stm_seq (rtx *operands, int nops)
11221 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11222 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11223 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11224 rtx mems[MAX_LDM_STM_OPS];
11227 HOST_WIDE_INT offset;
11228 int write_back = FALSE;
11231 bool base_reg_dies;
11233 HARD_REG_SET allocated;
11235 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11236 mem_order, &base_reg, &offset, false);
11241 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11243 /* If the same register is used more than once, try to find a free
11245 CLEAR_HARD_REG_SET (allocated);
11246 for (i = 0; i < nops; i++)
11248 for (j = i + 1; j < nops; j++)
11249 if (regs[i] == regs[j])
11251 rtx t = peep2_find_free_register (0, nops * 2,
11252 TARGET_THUMB1 ? "l" : "r",
11253 SImode, &allocated);
11257 regs[i] = REGNO (t);
11261 /* Compute an ordering that maps the register numbers to an ascending
11264 for (i = 0; i < nops; i++)
11265 if (regs[i] < regs[reg_order[0]])
11268 for (i = 1; i < nops; i++)
11270 int this_order = reg_order[i - 1];
11271 for (j = 0; j < nops; j++)
11272 if (regs[j] > regs[reg_order[i - 1]]
11273 && (this_order == reg_order[i - 1]
11274 || regs[j] < regs[this_order]))
11276 reg_order[i] = this_order;
11279 /* Ensure that registers that must be live after the instruction end
11280 up with the correct value. */
11281 for (i = 0; i < nops; i++)
11283 int this_order = reg_order[i];
11284 if ((this_order != mem_order[i]
11285 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11286 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11290 /* Load the constants. */
11291 for (i = 0; i < nops; i++)
11293 rtx op = operands[2 * nops + mem_order[i]];
11294 sorted_regs[i] = regs[reg_order[i]];
11295 emit_move_insn (reg_rtxs[reg_order[i]], op);
11298 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11300 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11303 gcc_assert (base_reg_dies);
11309 gcc_assert (base_reg_dies);
11310 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11314 addr = plus_constant (Pmode, base_reg_rtx, offset);
11316 for (i = 0; i < nops; i++)
11318 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11319 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11322 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11323 write_back ? offset + i * 4 : 0));
11327 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11328 unaligned copies on processors which support unaligned semantics for those
11329 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11330 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11331 An interleave factor of 1 (the minimum) will perform no interleaving.
11332 Load/store multiple are used for aligned addresses where possible. */
11335 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11336 HOST_WIDE_INT length,
11337 unsigned int interleave_factor)
11339 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11340 int *regnos = XALLOCAVEC (int, interleave_factor);
11341 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11342 HOST_WIDE_INT i, j;
11343 HOST_WIDE_INT remaining = length, words;
11344 rtx halfword_tmp = NULL, byte_tmp = NULL;
11346 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11347 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11348 HOST_WIDE_INT srcoffset, dstoffset;
11349 HOST_WIDE_INT src_autoinc, dst_autoinc;
11352 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11354 /* Use hard registers if we have aligned source or destination so we can use
11355 load/store multiple with contiguous registers. */
11356 if (dst_aligned || src_aligned)
11357 for (i = 0; i < interleave_factor; i++)
11358 regs[i] = gen_rtx_REG (SImode, i);
11360 for (i = 0; i < interleave_factor; i++)
11361 regs[i] = gen_reg_rtx (SImode);
11363 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11364 src = copy_addr_to_reg (XEXP (srcbase, 0));
11366 srcoffset = dstoffset = 0;
11368 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11369 For copying the last bytes we want to subtract this offset again. */
11370 src_autoinc = dst_autoinc = 0;
11372 for (i = 0; i < interleave_factor; i++)
11375 /* Copy BLOCK_SIZE_BYTES chunks. */
11377 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11380 if (src_aligned && interleave_factor > 1)
11382 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11383 TRUE, srcbase, &srcoffset));
11384 src_autoinc += UNITS_PER_WORD * interleave_factor;
11388 for (j = 0; j < interleave_factor; j++)
11390 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11392 mem = adjust_automodify_address (srcbase, SImode, addr,
11393 srcoffset + j * UNITS_PER_WORD);
11394 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11396 srcoffset += block_size_bytes;
11400 if (dst_aligned && interleave_factor > 1)
11402 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11403 TRUE, dstbase, &dstoffset));
11404 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11408 for (j = 0; j < interleave_factor; j++)
11410 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11412 mem = adjust_automodify_address (dstbase, SImode, addr,
11413 dstoffset + j * UNITS_PER_WORD);
11414 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11416 dstoffset += block_size_bytes;
11419 remaining -= block_size_bytes;
11422 /* Copy any whole words left (note these aren't interleaved with any
11423 subsequent halfword/byte load/stores in the interests of simplicity). */
11425 words = remaining / UNITS_PER_WORD;
11427 gcc_assert (words < interleave_factor);
11429 if (src_aligned && words > 1)
11431 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11433 src_autoinc += UNITS_PER_WORD * words;
11437 for (j = 0; j < words; j++)
11439 addr = plus_constant (Pmode, src,
11440 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11441 mem = adjust_automodify_address (srcbase, SImode, addr,
11442 srcoffset + j * UNITS_PER_WORD);
11443 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11445 srcoffset += words * UNITS_PER_WORD;
11448 if (dst_aligned && words > 1)
11450 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11452 dst_autoinc += words * UNITS_PER_WORD;
11456 for (j = 0; j < words; j++)
11458 addr = plus_constant (Pmode, dst,
11459 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11460 mem = adjust_automodify_address (dstbase, SImode, addr,
11461 dstoffset + j * UNITS_PER_WORD);
11462 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11464 dstoffset += words * UNITS_PER_WORD;
11467 remaining -= words * UNITS_PER_WORD;
11469 gcc_assert (remaining < 4);
11471 /* Copy a halfword if necessary. */
11473 if (remaining >= 2)
11475 halfword_tmp = gen_reg_rtx (SImode);
11477 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11478 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11479 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11481 /* Either write out immediately, or delay until we've loaded the last
11482 byte, depending on interleave factor. */
11483 if (interleave_factor == 1)
11485 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11486 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11487 emit_insn (gen_unaligned_storehi (mem,
11488 gen_lowpart (HImode, halfword_tmp)));
11489 halfword_tmp = NULL;
11497 gcc_assert (remaining < 2);
11499 /* Copy last byte. */
11501 if ((remaining & 1) != 0)
11503 byte_tmp = gen_reg_rtx (SImode);
11505 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11506 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11507 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11509 if (interleave_factor == 1)
11511 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11512 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11513 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11522 /* Store last halfword if we haven't done so already. */
11526 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11527 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11528 emit_insn (gen_unaligned_storehi (mem,
11529 gen_lowpart (HImode, halfword_tmp)));
11533 /* Likewise for last byte. */
11537 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11538 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11539 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11543 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11546 /* From mips_adjust_block_mem:
11548 Helper function for doing a loop-based block operation on memory
11549 reference MEM. Each iteration of the loop will operate on LENGTH
11552 Create a new base register for use within the loop and point it to
11553 the start of MEM. Create a new memory reference that uses this
11554 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11557 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11560 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11562 /* Although the new mem does not refer to a known location,
11563 it does keep up to LENGTH bytes of alignment. */
11564 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11565 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11568 /* From mips_block_move_loop:
11570 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11571 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11572 the memory regions do not overlap. */
11575 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11576 unsigned int interleave_factor,
11577 HOST_WIDE_INT bytes_per_iter)
11579 rtx label, src_reg, dest_reg, final_src, test;
11580 HOST_WIDE_INT leftover;
11582 leftover = length % bytes_per_iter;
11583 length -= leftover;
11585 /* Create registers and memory references for use within the loop. */
11586 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11587 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11589 /* Calculate the value that SRC_REG should have after the last iteration of
11591 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11592 0, 0, OPTAB_WIDEN);
11594 /* Emit the start of the loop. */
11595 label = gen_label_rtx ();
11596 emit_label (label);
11598 /* Emit the loop body. */
11599 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11600 interleave_factor);
11602 /* Move on to the next block. */
11603 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11604 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11606 /* Emit the loop condition. */
11607 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11608 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11610 /* Mop up any left-over bytes. */
11612 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11615 /* Emit a block move when either the source or destination is unaligned (not
11616 aligned to a four-byte boundary). This may need further tuning depending on
11617 core type, optimize_size setting, etc. */
11620 arm_movmemqi_unaligned (rtx *operands)
11622 HOST_WIDE_INT length = INTVAL (operands[2]);
11626 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11627 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11628 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11629 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11630 or dst_aligned though: allow more interleaving in those cases since the
11631 resulting code can be smaller. */
11632 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11633 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11636 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11637 interleave_factor, bytes_per_iter);
11639 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11640 interleave_factor);
11644 /* Note that the loop created by arm_block_move_unaligned_loop may be
11645 subject to loop unrolling, which makes tuning this condition a little
11648 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11650 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11657 arm_gen_movmemqi (rtx *operands)
11659 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11660 HOST_WIDE_INT srcoffset, dstoffset;
11662 rtx src, dst, srcbase, dstbase;
11663 rtx part_bytes_reg = NULL;
11666 if (!CONST_INT_P (operands[2])
11667 || !CONST_INT_P (operands[3])
11668 || INTVAL (operands[2]) > 64)
11671 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11672 return arm_movmemqi_unaligned (operands);
11674 if (INTVAL (operands[3]) & 3)
11677 dstbase = operands[0];
11678 srcbase = operands[1];
11680 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11681 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11683 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11684 out_words_to_go = INTVAL (operands[2]) / 4;
11685 last_bytes = INTVAL (operands[2]) & 3;
11686 dstoffset = srcoffset = 0;
11688 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11689 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11691 for (i = 0; in_words_to_go >= 2; i+=4)
11693 if (in_words_to_go > 4)
11694 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11695 TRUE, srcbase, &srcoffset));
11697 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11698 src, FALSE, srcbase,
11701 if (out_words_to_go)
11703 if (out_words_to_go > 4)
11704 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11705 TRUE, dstbase, &dstoffset));
11706 else if (out_words_to_go != 1)
11707 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11708 out_words_to_go, dst,
11711 dstbase, &dstoffset));
11714 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11715 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11716 if (last_bytes != 0)
11718 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11724 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11725 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11728 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11729 if (out_words_to_go)
11733 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11734 sreg = copy_to_reg (mem);
11736 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11737 emit_move_insn (mem, sreg);
11740 gcc_assert (!in_words_to_go); /* Sanity check */
11743 if (in_words_to_go)
11745 gcc_assert (in_words_to_go > 0);
11747 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11748 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11751 gcc_assert (!last_bytes || part_bytes_reg);
11753 if (BYTES_BIG_ENDIAN && last_bytes)
11755 rtx tmp = gen_reg_rtx (SImode);
11757 /* The bytes we want are in the top end of the word. */
11758 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11759 GEN_INT (8 * (4 - last_bytes))));
11760 part_bytes_reg = tmp;
11764 mem = adjust_automodify_address (dstbase, QImode,
11765 plus_constant (Pmode, dst,
11767 dstoffset + last_bytes - 1);
11768 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11772 tmp = gen_reg_rtx (SImode);
11773 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11774 part_bytes_reg = tmp;
11781 if (last_bytes > 1)
11783 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11784 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11788 rtx tmp = gen_reg_rtx (SImode);
11789 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11790 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11791 part_bytes_reg = tmp;
11798 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11799 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11806 /* Select a dominance comparison mode if possible for a test of the general
11807 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11808 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11809 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11810 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11811 In all cases OP will be either EQ or NE, but we don't need to know which
11812 here. If we are unable to support a dominance comparison we return
11813 CC mode. This will then fail to match for the RTL expressions that
11814 generate this call. */
11816 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11818 enum rtx_code cond1, cond2;
11821 /* Currently we will probably get the wrong result if the individual
11822 comparisons are not simple. This also ensures that it is safe to
11823 reverse a comparison if necessary. */
11824 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11826 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11830 /* The if_then_else variant of this tests the second condition if the
11831 first passes, but is true if the first fails. Reverse the first
11832 condition to get a true "inclusive-or" expression. */
11833 if (cond_or == DOM_CC_NX_OR_Y)
11834 cond1 = reverse_condition (cond1);
11836 /* If the comparisons are not equal, and one doesn't dominate the other,
11837 then we can't do this. */
11839 && !comparison_dominates_p (cond1, cond2)
11840 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11845 enum rtx_code temp = cond1;
11853 if (cond_or == DOM_CC_X_AND_Y)
11858 case EQ: return CC_DEQmode;
11859 case LE: return CC_DLEmode;
11860 case LEU: return CC_DLEUmode;
11861 case GE: return CC_DGEmode;
11862 case GEU: return CC_DGEUmode;
11863 default: gcc_unreachable ();
11867 if (cond_or == DOM_CC_X_AND_Y)
11879 gcc_unreachable ();
11883 if (cond_or == DOM_CC_X_AND_Y)
11895 gcc_unreachable ();
11899 if (cond_or == DOM_CC_X_AND_Y)
11900 return CC_DLTUmode;
11905 return CC_DLTUmode;
11907 return CC_DLEUmode;
11911 gcc_unreachable ();
11915 if (cond_or == DOM_CC_X_AND_Y)
11916 return CC_DGTUmode;
11921 return CC_DGTUmode;
11923 return CC_DGEUmode;
11927 gcc_unreachable ();
11930 /* The remaining cases only occur when both comparisons are the
11933 gcc_assert (cond1 == cond2);
11937 gcc_assert (cond1 == cond2);
11941 gcc_assert (cond1 == cond2);
11945 gcc_assert (cond1 == cond2);
11946 return CC_DLEUmode;
11949 gcc_assert (cond1 == cond2);
11950 return CC_DGEUmode;
11953 gcc_unreachable ();
11958 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11960 /* All floating point compares return CCFP if it is an equality
11961 comparison, and CCFPE otherwise. */
11962 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11985 gcc_unreachable ();
11989 /* A compare with a shifted operand. Because of canonicalization, the
11990 comparison will have to be swapped when we emit the assembler. */
11991 if (GET_MODE (y) == SImode
11992 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11993 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11994 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11995 || GET_CODE (x) == ROTATERT))
11998 /* This operation is performed swapped, but since we only rely on the Z
11999 flag we don't need an additional mode. */
12000 if (GET_MODE (y) == SImode
12001 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12002 && GET_CODE (x) == NEG
12003 && (op == EQ || op == NE))
12006 /* This is a special case that is used by combine to allow a
12007 comparison of a shifted byte load to be split into a zero-extend
12008 followed by a comparison of the shifted integer (only valid for
12009 equalities and unsigned inequalities). */
12010 if (GET_MODE (x) == SImode
12011 && GET_CODE (x) == ASHIFT
12012 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12013 && GET_CODE (XEXP (x, 0)) == SUBREG
12014 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12015 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12016 && (op == EQ || op == NE
12017 || op == GEU || op == GTU || op == LTU || op == LEU)
12018 && CONST_INT_P (y))
12021 /* A construct for a conditional compare, if the false arm contains
12022 0, then both conditions must be true, otherwise either condition
12023 must be true. Not all conditions are possible, so CCmode is
12024 returned if it can't be done. */
12025 if (GET_CODE (x) == IF_THEN_ELSE
12026 && (XEXP (x, 2) == const0_rtx
12027 || XEXP (x, 2) == const1_rtx)
12028 && COMPARISON_P (XEXP (x, 0))
12029 && COMPARISON_P (XEXP (x, 1)))
12030 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12031 INTVAL (XEXP (x, 2)));
12033 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12034 if (GET_CODE (x) == AND
12035 && (op == EQ || op == NE)
12036 && COMPARISON_P (XEXP (x, 0))
12037 && COMPARISON_P (XEXP (x, 1)))
12038 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12041 if (GET_CODE (x) == IOR
12042 && (op == EQ || op == NE)
12043 && COMPARISON_P (XEXP (x, 0))
12044 && COMPARISON_P (XEXP (x, 1)))
12045 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12048 /* An operation (on Thumb) where we want to test for a single bit.
12049 This is done by shifting that bit up into the top bit of a
12050 scratch register; we can then branch on the sign bit. */
12052 && GET_MODE (x) == SImode
12053 && (op == EQ || op == NE)
12054 && GET_CODE (x) == ZERO_EXTRACT
12055 && XEXP (x, 1) == const1_rtx)
12058 /* An operation that sets the condition codes as a side-effect, the
12059 V flag is not set correctly, so we can only use comparisons where
12060 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12062 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12063 if (GET_MODE (x) == SImode
12065 && (op == EQ || op == NE || op == LT || op == GE)
12066 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12067 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12068 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12069 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12070 || GET_CODE (x) == LSHIFTRT
12071 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12072 || GET_CODE (x) == ROTATERT
12073 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12074 return CC_NOOVmode;
12076 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12079 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12080 && GET_CODE (x) == PLUS
12081 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12084 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12090 /* A DImode comparison against zero can be implemented by
12091 or'ing the two halves together. */
12092 if (y == const0_rtx)
12095 /* We can do an equality test in three Thumb instructions. */
12105 /* DImode unsigned comparisons can be implemented by cmp +
12106 cmpeq without a scratch register. Not worth doing in
12117 /* DImode signed and unsigned comparisons can be implemented
12118 by cmp + sbcs with a scratch register, but that does not
12119 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12120 gcc_assert (op != EQ && op != NE);
12124 gcc_unreachable ();
12128 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12129 return GET_MODE (x);
12134 /* X and Y are two things to compare using CODE. Emit the compare insn and
12135 return the rtx for register 0 in the proper mode. FP means this is a
12136 floating point compare: I don't think that it is needed on the arm. */
12138 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12140 enum machine_mode mode;
12142 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12144 /* We might have X as a constant, Y as a register because of the predicates
12145 used for cmpdi. If so, force X to a register here. */
12146 if (dimode_comparison && !REG_P (x))
12147 x = force_reg (DImode, x);
12149 mode = SELECT_CC_MODE (code, x, y);
12150 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12152 if (dimode_comparison
12153 && mode != CC_CZmode)
12157 /* To compare two non-zero values for equality, XOR them and
12158 then compare against zero. Not used for ARM mode; there
12159 CC_CZmode is cheaper. */
12160 if (mode == CC_Zmode && y != const0_rtx)
12162 gcc_assert (!reload_completed);
12163 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12167 /* A scratch register is required. */
12168 if (reload_completed)
12169 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12171 scratch = gen_rtx_SCRATCH (SImode);
12173 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12174 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12175 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12178 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12183 /* Generate a sequence of insns that will generate the correct return
12184 address mask depending on the physical architecture that the program
12187 arm_gen_return_addr_mask (void)
12189 rtx reg = gen_reg_rtx (Pmode);
12191 emit_insn (gen_return_addr_mask (reg));
12196 arm_reload_in_hi (rtx *operands)
12198 rtx ref = operands[1];
12200 HOST_WIDE_INT offset = 0;
12202 if (GET_CODE (ref) == SUBREG)
12204 offset = SUBREG_BYTE (ref);
12205 ref = SUBREG_REG (ref);
12210 /* We have a pseudo which has been spilt onto the stack; there
12211 are two cases here: the first where there is a simple
12212 stack-slot replacement and a second where the stack-slot is
12213 out of range, or is used as a subreg. */
12214 if (reg_equiv_mem (REGNO (ref)))
12216 ref = reg_equiv_mem (REGNO (ref));
12217 base = find_replacement (&XEXP (ref, 0));
12220 /* The slot is out of range, or was dressed up in a SUBREG. */
12221 base = reg_equiv_address (REGNO (ref));
12224 base = find_replacement (&XEXP (ref, 0));
12226 /* Handle the case where the address is too complex to be offset by 1. */
12227 if (GET_CODE (base) == MINUS
12228 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12230 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12232 emit_set_insn (base_plus, base);
12235 else if (GET_CODE (base) == PLUS)
12237 /* The addend must be CONST_INT, or we would have dealt with it above. */
12238 HOST_WIDE_INT hi, lo;
12240 offset += INTVAL (XEXP (base, 1));
12241 base = XEXP (base, 0);
12243 /* Rework the address into a legal sequence of insns. */
12244 /* Valid range for lo is -4095 -> 4095 */
12247 : -((-offset) & 0xfff));
12249 /* Corner case, if lo is the max offset then we would be out of range
12250 once we have added the additional 1 below, so bump the msb into the
12251 pre-loading insn(s). */
12255 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12256 ^ (HOST_WIDE_INT) 0x80000000)
12257 - (HOST_WIDE_INT) 0x80000000);
12259 gcc_assert (hi + lo == offset);
12263 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12265 /* Get the base address; addsi3 knows how to handle constants
12266 that require more than one insn. */
12267 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12273 /* Operands[2] may overlap operands[0] (though it won't overlap
12274 operands[1]), that's why we asked for a DImode reg -- so we can
12275 use the bit that does not overlap. */
12276 if (REGNO (operands[2]) == REGNO (operands[0]))
12277 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12279 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12281 emit_insn (gen_zero_extendqisi2 (scratch,
12282 gen_rtx_MEM (QImode,
12283 plus_constant (Pmode, base,
12285 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12286 gen_rtx_MEM (QImode,
12287 plus_constant (Pmode, base,
12289 if (!BYTES_BIG_ENDIAN)
12290 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12291 gen_rtx_IOR (SImode,
12294 gen_rtx_SUBREG (SImode, operands[0], 0),
12298 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12299 gen_rtx_IOR (SImode,
12300 gen_rtx_ASHIFT (SImode, scratch,
12302 gen_rtx_SUBREG (SImode, operands[0], 0)));
12305 /* Handle storing a half-word to memory during reload by synthesizing as two
12306 byte stores. Take care not to clobber the input values until after we
12307 have moved them somewhere safe. This code assumes that if the DImode
12308 scratch in operands[2] overlaps either the input value or output address
12309 in some way, then that value must die in this insn (we absolutely need
12310 two scratch registers for some corner cases). */
12312 arm_reload_out_hi (rtx *operands)
12314 rtx ref = operands[0];
12315 rtx outval = operands[1];
12317 HOST_WIDE_INT offset = 0;
12319 if (GET_CODE (ref) == SUBREG)
12321 offset = SUBREG_BYTE (ref);
12322 ref = SUBREG_REG (ref);
12327 /* We have a pseudo which has been spilt onto the stack; there
12328 are two cases here: the first where there is a simple
12329 stack-slot replacement and a second where the stack-slot is
12330 out of range, or is used as a subreg. */
12331 if (reg_equiv_mem (REGNO (ref)))
12333 ref = reg_equiv_mem (REGNO (ref));
12334 base = find_replacement (&XEXP (ref, 0));
12337 /* The slot is out of range, or was dressed up in a SUBREG. */
12338 base = reg_equiv_address (REGNO (ref));
12341 base = find_replacement (&XEXP (ref, 0));
12343 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12345 /* Handle the case where the address is too complex to be offset by 1. */
12346 if (GET_CODE (base) == MINUS
12347 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12349 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12351 /* Be careful not to destroy OUTVAL. */
12352 if (reg_overlap_mentioned_p (base_plus, outval))
12354 /* Updating base_plus might destroy outval, see if we can
12355 swap the scratch and base_plus. */
12356 if (!reg_overlap_mentioned_p (scratch, outval))
12359 scratch = base_plus;
12364 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12366 /* Be conservative and copy OUTVAL into the scratch now,
12367 this should only be necessary if outval is a subreg
12368 of something larger than a word. */
12369 /* XXX Might this clobber base? I can't see how it can,
12370 since scratch is known to overlap with OUTVAL, and
12371 must be wider than a word. */
12372 emit_insn (gen_movhi (scratch_hi, outval));
12373 outval = scratch_hi;
12377 emit_set_insn (base_plus, base);
12380 else if (GET_CODE (base) == PLUS)
12382 /* The addend must be CONST_INT, or we would have dealt with it above. */
12383 HOST_WIDE_INT hi, lo;
12385 offset += INTVAL (XEXP (base, 1));
12386 base = XEXP (base, 0);
12388 /* Rework the address into a legal sequence of insns. */
12389 /* Valid range for lo is -4095 -> 4095 */
12392 : -((-offset) & 0xfff));
12394 /* Corner case, if lo is the max offset then we would be out of range
12395 once we have added the additional 1 below, so bump the msb into the
12396 pre-loading insn(s). */
12400 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12401 ^ (HOST_WIDE_INT) 0x80000000)
12402 - (HOST_WIDE_INT) 0x80000000);
12404 gcc_assert (hi + lo == offset);
12408 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12410 /* Be careful not to destroy OUTVAL. */
12411 if (reg_overlap_mentioned_p (base_plus, outval))
12413 /* Updating base_plus might destroy outval, see if we
12414 can swap the scratch and base_plus. */
12415 if (!reg_overlap_mentioned_p (scratch, outval))
12418 scratch = base_plus;
12423 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12425 /* Be conservative and copy outval into scratch now,
12426 this should only be necessary if outval is a
12427 subreg of something larger than a word. */
12428 /* XXX Might this clobber base? I can't see how it
12429 can, since scratch is known to overlap with
12431 emit_insn (gen_movhi (scratch_hi, outval));
12432 outval = scratch_hi;
12436 /* Get the base address; addsi3 knows how to handle constants
12437 that require more than one insn. */
12438 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12444 if (BYTES_BIG_ENDIAN)
12446 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12447 plus_constant (Pmode, base,
12449 gen_lowpart (QImode, outval)));
12450 emit_insn (gen_lshrsi3 (scratch,
12451 gen_rtx_SUBREG (SImode, outval, 0),
12453 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12455 gen_lowpart (QImode, scratch)));
12459 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12461 gen_lowpart (QImode, outval)));
12462 emit_insn (gen_lshrsi3 (scratch,
12463 gen_rtx_SUBREG (SImode, outval, 0),
12465 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12466 plus_constant (Pmode, base,
12468 gen_lowpart (QImode, scratch)));
12472 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12473 (padded to the size of a word) should be passed in a register. */
12476 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12478 if (TARGET_AAPCS_BASED)
12479 return must_pass_in_stack_var_size (mode, type);
12481 return must_pass_in_stack_var_size_or_pad (mode, type);
12485 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12486 Return true if an argument passed on the stack should be padded upwards,
12487 i.e. if the least-significant byte has useful data.
12488 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12489 aggregate types are placed in the lowest memory address. */
12492 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12494 if (!TARGET_AAPCS_BASED)
12495 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12497 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12504 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12505 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12506 register has useful data, and return the opposite if the most
12507 significant byte does. */
12510 arm_pad_reg_upward (enum machine_mode mode,
12511 tree type, int first ATTRIBUTE_UNUSED)
12513 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12515 /* For AAPCS, small aggregates, small fixed-point types,
12516 and small complex types are always padded upwards. */
12519 if ((AGGREGATE_TYPE_P (type)
12520 || TREE_CODE (type) == COMPLEX_TYPE
12521 || FIXED_POINT_TYPE_P (type))
12522 && int_size_in_bytes (type) <= 4)
12527 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12528 && GET_MODE_SIZE (mode) <= 4)
12533 /* Otherwise, use default padding. */
12534 return !BYTES_BIG_ENDIAN;
12537 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12538 assuming that the address in the base register is word aligned. */
12540 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12542 HOST_WIDE_INT max_offset;
12544 /* Offset must be a multiple of 4 in Thumb mode. */
12545 if (TARGET_THUMB2 && ((offset & 3) != 0))
12550 else if (TARGET_ARM)
12555 return ((offset <= max_offset) && (offset >= -max_offset));
12558 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12559 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12560 Assumes that the address in the base register RN is word aligned. Pattern
12561 guarantees that both memory accesses use the same base register,
12562 the offsets are constants within the range, and the gap between the offsets is 4.
12563 If preload complete then check that registers are legal. WBACK indicates whether
12564 address is updated. LOAD indicates whether memory access is load or store. */
12566 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12567 bool wback, bool load)
12569 unsigned int t, t2, n;
12571 if (!reload_completed)
12574 if (!offset_ok_for_ldrd_strd (offset))
12581 if ((TARGET_THUMB2)
12582 && ((wback && (n == t || n == t2))
12583 || (t == SP_REGNUM)
12584 || (t == PC_REGNUM)
12585 || (t2 == SP_REGNUM)
12586 || (t2 == PC_REGNUM)
12587 || (!load && (n == PC_REGNUM))
12588 || (load && (t == t2))
12589 /* Triggers Cortex-M3 LDRD errata. */
12590 || (!wback && load && fix_cm3_ldrd && (n == t))))
12594 && ((wback && (n == t || n == t2))
12595 || (t2 == PC_REGNUM)
12596 || (t % 2 != 0) /* First destination register is not even. */
12598 /* PC can be used as base register (for offset addressing only),
12599 but it is depricated. */
12600 || (n == PC_REGNUM)))
12607 /* Print a symbolic form of X to the debug file, F. */
12609 arm_print_value (FILE *f, rtx x)
12611 switch (GET_CODE (x))
12614 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12618 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12626 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12628 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12629 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12637 fprintf (f, "\"%s\"", XSTR (x, 0));
12641 fprintf (f, "`%s'", XSTR (x, 0));
12645 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12649 arm_print_value (f, XEXP (x, 0));
12653 arm_print_value (f, XEXP (x, 0));
12655 arm_print_value (f, XEXP (x, 1));
12663 fprintf (f, "????");
12668 /* Routines for manipulation of the constant pool. */
12670 /* Arm instructions cannot load a large constant directly into a
12671 register; they have to come from a pc relative load. The constant
12672 must therefore be placed in the addressable range of the pc
12673 relative load. Depending on the precise pc relative load
12674 instruction the range is somewhere between 256 bytes and 4k. This
12675 means that we often have to dump a constant inside a function, and
12676 generate code to branch around it.
12678 It is important to minimize this, since the branches will slow
12679 things down and make the code larger.
12681 Normally we can hide the table after an existing unconditional
12682 branch so that there is no interruption of the flow, but in the
12683 worst case the code looks like this:
12701 We fix this by performing a scan after scheduling, which notices
12702 which instructions need to have their operands fetched from the
12703 constant table and builds the table.
12705 The algorithm starts by building a table of all the constants that
12706 need fixing up and all the natural barriers in the function (places
12707 where a constant table can be dropped without breaking the flow).
12708 For each fixup we note how far the pc-relative replacement will be
12709 able to reach and the offset of the instruction into the function.
12711 Having built the table we then group the fixes together to form
12712 tables that are as large as possible (subject to addressing
12713 constraints) and emit each table of constants after the last
12714 barrier that is within range of all the instructions in the group.
12715 If a group does not contain a barrier, then we forcibly create one
12716 by inserting a jump instruction into the flow. Once the table has
12717 been inserted, the insns are then modified to reference the
12718 relevant entry in the pool.
12720 Possible enhancements to the algorithm (not implemented) are:
12722 1) For some processors and object formats, there may be benefit in
12723 aligning the pools to the start of cache lines; this alignment
12724 would need to be taken into account when calculating addressability
12727 /* These typedefs are located at the start of this file, so that
12728 they can be used in the prototypes there. This comment is to
12729 remind readers of that fact so that the following structures
12730 can be understood more easily.
12732 typedef struct minipool_node Mnode;
12733 typedef struct minipool_fixup Mfix; */
12735 struct minipool_node
12737 /* Doubly linked chain of entries. */
12740 /* The maximum offset into the code that this entry can be placed. While
12741 pushing fixes for forward references, all entries are sorted in order
12742 of increasing max_address. */
12743 HOST_WIDE_INT max_address;
12744 /* Similarly for an entry inserted for a backwards ref. */
12745 HOST_WIDE_INT min_address;
12746 /* The number of fixes referencing this entry. This can become zero
12747 if we "unpush" an entry. In this case we ignore the entry when we
12748 come to emit the code. */
12750 /* The offset from the start of the minipool. */
12751 HOST_WIDE_INT offset;
12752 /* The value in table. */
12754 /* The mode of value. */
12755 enum machine_mode mode;
12756 /* The size of the value. With iWMMXt enabled
12757 sizes > 4 also imply an alignment of 8-bytes. */
12761 struct minipool_fixup
12765 HOST_WIDE_INT address;
12767 enum machine_mode mode;
12771 HOST_WIDE_INT forwards;
12772 HOST_WIDE_INT backwards;
12775 /* Fixes less than a word need padding out to a word boundary. */
12776 #define MINIPOOL_FIX_SIZE(mode) \
12777 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12779 static Mnode * minipool_vector_head;
12780 static Mnode * minipool_vector_tail;
12781 static rtx minipool_vector_label;
12782 static int minipool_pad;
12784 /* The linked list of all minipool fixes required for this function. */
12785 Mfix * minipool_fix_head;
12786 Mfix * minipool_fix_tail;
12787 /* The fix entry for the current minipool, once it has been placed. */
12788 Mfix * minipool_barrier;
12790 /* Determines if INSN is the start of a jump table. Returns the end
12791 of the TABLE or NULL_RTX. */
12793 is_jump_table (rtx insn)
12797 if (jump_to_label_p (insn)
12798 && ((table = next_real_insn (JUMP_LABEL (insn)))
12799 == next_real_insn (insn))
12802 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12803 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12809 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12810 #define JUMP_TABLES_IN_TEXT_SECTION 0
12813 static HOST_WIDE_INT
12814 get_jump_table_size (rtx insn)
12816 /* ADDR_VECs only take room if read-only data does into the text
12818 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12820 rtx body = PATTERN (insn);
12821 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12822 HOST_WIDE_INT size;
12823 HOST_WIDE_INT modesize;
12825 modesize = GET_MODE_SIZE (GET_MODE (body));
12826 size = modesize * XVECLEN (body, elt);
12830 /* Round up size of TBB table to a halfword boundary. */
12831 size = (size + 1) & ~(HOST_WIDE_INT)1;
12834 /* No padding necessary for TBH. */
12837 /* Add two bytes for alignment on Thumb. */
12842 gcc_unreachable ();
12850 /* Return the maximum amount of padding that will be inserted before
12853 static HOST_WIDE_INT
12854 get_label_padding (rtx label)
12856 HOST_WIDE_INT align, min_insn_size;
12858 align = 1 << label_to_alignment (label);
12859 min_insn_size = TARGET_THUMB ? 2 : 4;
12860 return align > min_insn_size ? align - min_insn_size : 0;
12863 /* Move a minipool fix MP from its current location to before MAX_MP.
12864 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12865 constraints may need updating. */
12867 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12868 HOST_WIDE_INT max_address)
12870 /* The code below assumes these are different. */
12871 gcc_assert (mp != max_mp);
12873 if (max_mp == NULL)
12875 if (max_address < mp->max_address)
12876 mp->max_address = max_address;
12880 if (max_address > max_mp->max_address - mp->fix_size)
12881 mp->max_address = max_mp->max_address - mp->fix_size;
12883 mp->max_address = max_address;
12885 /* Unlink MP from its current position. Since max_mp is non-null,
12886 mp->prev must be non-null. */
12887 mp->prev->next = mp->next;
12888 if (mp->next != NULL)
12889 mp->next->prev = mp->prev;
12891 minipool_vector_tail = mp->prev;
12893 /* Re-insert it before MAX_MP. */
12895 mp->prev = max_mp->prev;
12898 if (mp->prev != NULL)
12899 mp->prev->next = mp;
12901 minipool_vector_head = mp;
12904 /* Save the new entry. */
12907 /* Scan over the preceding entries and adjust their addresses as
12909 while (mp->prev != NULL
12910 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12912 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12919 /* Add a constant to the minipool for a forward reference. Returns the
12920 node added or NULL if the constant will not fit in this pool. */
12922 add_minipool_forward_ref (Mfix *fix)
12924 /* If set, max_mp is the first pool_entry that has a lower
12925 constraint than the one we are trying to add. */
12926 Mnode * max_mp = NULL;
12927 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12930 /* If the minipool starts before the end of FIX->INSN then this FIX
12931 can not be placed into the current pool. Furthermore, adding the
12932 new constant pool entry may cause the pool to start FIX_SIZE bytes
12934 if (minipool_vector_head &&
12935 (fix->address + get_attr_length (fix->insn)
12936 >= minipool_vector_head->max_address - fix->fix_size))
12939 /* Scan the pool to see if a constant with the same value has
12940 already been added. While we are doing this, also note the
12941 location where we must insert the constant if it doesn't already
12943 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12945 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12946 && fix->mode == mp->mode
12947 && (!LABEL_P (fix->value)
12948 || (CODE_LABEL_NUMBER (fix->value)
12949 == CODE_LABEL_NUMBER (mp->value)))
12950 && rtx_equal_p (fix->value, mp->value))
12952 /* More than one fix references this entry. */
12954 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12957 /* Note the insertion point if necessary. */
12959 && mp->max_address > max_address)
12962 /* If we are inserting an 8-bytes aligned quantity and
12963 we have not already found an insertion point, then
12964 make sure that all such 8-byte aligned quantities are
12965 placed at the start of the pool. */
12966 if (ARM_DOUBLEWORD_ALIGN
12968 && fix->fix_size >= 8
12969 && mp->fix_size < 8)
12972 max_address = mp->max_address;
12976 /* The value is not currently in the minipool, so we need to create
12977 a new entry for it. If MAX_MP is NULL, the entry will be put on
12978 the end of the list since the placement is less constrained than
12979 any existing entry. Otherwise, we insert the new fix before
12980 MAX_MP and, if necessary, adjust the constraints on the other
12983 mp->fix_size = fix->fix_size;
12984 mp->mode = fix->mode;
12985 mp->value = fix->value;
12987 /* Not yet required for a backwards ref. */
12988 mp->min_address = -65536;
12990 if (max_mp == NULL)
12992 mp->max_address = max_address;
12994 mp->prev = minipool_vector_tail;
12996 if (mp->prev == NULL)
12998 minipool_vector_head = mp;
12999 minipool_vector_label = gen_label_rtx ();
13002 mp->prev->next = mp;
13004 minipool_vector_tail = mp;
13008 if (max_address > max_mp->max_address - mp->fix_size)
13009 mp->max_address = max_mp->max_address - mp->fix_size;
13011 mp->max_address = max_address;
13014 mp->prev = max_mp->prev;
13016 if (mp->prev != NULL)
13017 mp->prev->next = mp;
13019 minipool_vector_head = mp;
13022 /* Save the new entry. */
13025 /* Scan over the preceding entries and adjust their addresses as
13027 while (mp->prev != NULL
13028 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13030 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13038 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13039 HOST_WIDE_INT min_address)
13041 HOST_WIDE_INT offset;
13043 /* The code below assumes these are different. */
13044 gcc_assert (mp != min_mp);
13046 if (min_mp == NULL)
13048 if (min_address > mp->min_address)
13049 mp->min_address = min_address;
13053 /* We will adjust this below if it is too loose. */
13054 mp->min_address = min_address;
13056 /* Unlink MP from its current position. Since min_mp is non-null,
13057 mp->next must be non-null. */
13058 mp->next->prev = mp->prev;
13059 if (mp->prev != NULL)
13060 mp->prev->next = mp->next;
13062 minipool_vector_head = mp->next;
13064 /* Reinsert it after MIN_MP. */
13066 mp->next = min_mp->next;
13068 if (mp->next != NULL)
13069 mp->next->prev = mp;
13071 minipool_vector_tail = mp;
13077 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13079 mp->offset = offset;
13080 if (mp->refcount > 0)
13081 offset += mp->fix_size;
13083 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13084 mp->next->min_address = mp->min_address + mp->fix_size;
13090 /* Add a constant to the minipool for a backward reference. Returns the
13091 node added or NULL if the constant will not fit in this pool.
13093 Note that the code for insertion for a backwards reference can be
13094 somewhat confusing because the calculated offsets for each fix do
13095 not take into account the size of the pool (which is still under
13098 add_minipool_backward_ref (Mfix *fix)
13100 /* If set, min_mp is the last pool_entry that has a lower constraint
13101 than the one we are trying to add. */
13102 Mnode *min_mp = NULL;
13103 /* This can be negative, since it is only a constraint. */
13104 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13107 /* If we can't reach the current pool from this insn, or if we can't
13108 insert this entry at the end of the pool without pushing other
13109 fixes out of range, then we don't try. This ensures that we
13110 can't fail later on. */
13111 if (min_address >= minipool_barrier->address
13112 || (minipool_vector_tail->min_address + fix->fix_size
13113 >= minipool_barrier->address))
13116 /* Scan the pool to see if a constant with the same value has
13117 already been added. While we are doing this, also note the
13118 location where we must insert the constant if it doesn't already
13120 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13122 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13123 && fix->mode == mp->mode
13124 && (!LABEL_P (fix->value)
13125 || (CODE_LABEL_NUMBER (fix->value)
13126 == CODE_LABEL_NUMBER (mp->value)))
13127 && rtx_equal_p (fix->value, mp->value)
13128 /* Check that there is enough slack to move this entry to the
13129 end of the table (this is conservative). */
13130 && (mp->max_address
13131 > (minipool_barrier->address
13132 + minipool_vector_tail->offset
13133 + minipool_vector_tail->fix_size)))
13136 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13139 if (min_mp != NULL)
13140 mp->min_address += fix->fix_size;
13143 /* Note the insertion point if necessary. */
13144 if (mp->min_address < min_address)
13146 /* For now, we do not allow the insertion of 8-byte alignment
13147 requiring nodes anywhere but at the start of the pool. */
13148 if (ARM_DOUBLEWORD_ALIGN
13149 && fix->fix_size >= 8 && mp->fix_size < 8)
13154 else if (mp->max_address
13155 < minipool_barrier->address + mp->offset + fix->fix_size)
13157 /* Inserting before this entry would push the fix beyond
13158 its maximum address (which can happen if we have
13159 re-located a forwards fix); force the new fix to come
13161 if (ARM_DOUBLEWORD_ALIGN
13162 && fix->fix_size >= 8 && mp->fix_size < 8)
13167 min_address = mp->min_address + fix->fix_size;
13170 /* Do not insert a non-8-byte aligned quantity before 8-byte
13171 aligned quantities. */
13172 else if (ARM_DOUBLEWORD_ALIGN
13173 && fix->fix_size < 8
13174 && mp->fix_size >= 8)
13177 min_address = mp->min_address + fix->fix_size;
13182 /* We need to create a new entry. */
13184 mp->fix_size = fix->fix_size;
13185 mp->mode = fix->mode;
13186 mp->value = fix->value;
13188 mp->max_address = minipool_barrier->address + 65536;
13190 mp->min_address = min_address;
13192 if (min_mp == NULL)
13195 mp->next = minipool_vector_head;
13197 if (mp->next == NULL)
13199 minipool_vector_tail = mp;
13200 minipool_vector_label = gen_label_rtx ();
13203 mp->next->prev = mp;
13205 minipool_vector_head = mp;
13209 mp->next = min_mp->next;
13213 if (mp->next != NULL)
13214 mp->next->prev = mp;
13216 minipool_vector_tail = mp;
13219 /* Save the new entry. */
13227 /* Scan over the following entries and adjust their offsets. */
13228 while (mp->next != NULL)
13230 if (mp->next->min_address < mp->min_address + mp->fix_size)
13231 mp->next->min_address = mp->min_address + mp->fix_size;
13234 mp->next->offset = mp->offset + mp->fix_size;
13236 mp->next->offset = mp->offset;
13245 assign_minipool_offsets (Mfix *barrier)
13247 HOST_WIDE_INT offset = 0;
13250 minipool_barrier = barrier;
13252 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13254 mp->offset = offset;
13256 if (mp->refcount > 0)
13257 offset += mp->fix_size;
13261 /* Output the literal table */
13263 dump_minipool (rtx scan)
13269 if (ARM_DOUBLEWORD_ALIGN)
13270 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13271 if (mp->refcount > 0 && mp->fix_size >= 8)
13278 fprintf (dump_file,
13279 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13280 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13282 scan = emit_label_after (gen_label_rtx (), scan);
13283 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13284 scan = emit_label_after (minipool_vector_label, scan);
13286 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13288 if (mp->refcount > 0)
13292 fprintf (dump_file,
13293 ";; Offset %u, min %ld, max %ld ",
13294 (unsigned) mp->offset, (unsigned long) mp->min_address,
13295 (unsigned long) mp->max_address);
13296 arm_print_value (dump_file, mp->value);
13297 fputc ('\n', dump_file);
13300 switch (mp->fix_size)
13302 #ifdef HAVE_consttable_1
13304 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13308 #ifdef HAVE_consttable_2
13310 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13314 #ifdef HAVE_consttable_4
13316 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13320 #ifdef HAVE_consttable_8
13322 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13326 #ifdef HAVE_consttable_16
13328 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13333 gcc_unreachable ();
13341 minipool_vector_head = minipool_vector_tail = NULL;
13342 scan = emit_insn_after (gen_consttable_end (), scan);
13343 scan = emit_barrier_after (scan);
13346 /* Return the cost of forcibly inserting a barrier after INSN. */
13348 arm_barrier_cost (rtx insn)
13350 /* Basing the location of the pool on the loop depth is preferable,
13351 but at the moment, the basic block information seems to be
13352 corrupt by this stage of the compilation. */
13353 int base_cost = 50;
13354 rtx next = next_nonnote_insn (insn);
13356 if (next != NULL && LABEL_P (next))
13359 switch (GET_CODE (insn))
13362 /* It will always be better to place the table before the label, rather
13371 return base_cost - 10;
13374 return base_cost + 10;
13378 /* Find the best place in the insn stream in the range
13379 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13380 Create the barrier by inserting a jump and add a new fix entry for
13383 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13385 HOST_WIDE_INT count = 0;
13387 rtx from = fix->insn;
13388 /* The instruction after which we will insert the jump. */
13389 rtx selected = NULL;
13391 /* The address at which the jump instruction will be placed. */
13392 HOST_WIDE_INT selected_address;
13394 HOST_WIDE_INT max_count = max_address - fix->address;
13395 rtx label = gen_label_rtx ();
13397 selected_cost = arm_barrier_cost (from);
13398 selected_address = fix->address;
13400 while (from && count < max_count)
13405 /* This code shouldn't have been called if there was a natural barrier
13407 gcc_assert (!BARRIER_P (from));
13409 /* Count the length of this insn. This must stay in sync with the
13410 code that pushes minipool fixes. */
13411 if (LABEL_P (from))
13412 count += get_label_padding (from);
13414 count += get_attr_length (from);
13416 /* If there is a jump table, add its length. */
13417 tmp = is_jump_table (from);
13420 count += get_jump_table_size (tmp);
13422 /* Jump tables aren't in a basic block, so base the cost on
13423 the dispatch insn. If we select this location, we will
13424 still put the pool after the table. */
13425 new_cost = arm_barrier_cost (from);
13427 if (count < max_count
13428 && (!selected || new_cost <= selected_cost))
13431 selected_cost = new_cost;
13432 selected_address = fix->address + count;
13435 /* Continue after the dispatch table. */
13436 from = NEXT_INSN (tmp);
13440 new_cost = arm_barrier_cost (from);
13442 if (count < max_count
13443 && (!selected || new_cost <= selected_cost))
13446 selected_cost = new_cost;
13447 selected_address = fix->address + count;
13450 from = NEXT_INSN (from);
13453 /* Make sure that we found a place to insert the jump. */
13454 gcc_assert (selected);
13456 /* Make sure we do not split a call and its corresponding
13457 CALL_ARG_LOCATION note. */
13458 if (CALL_P (selected))
13460 rtx next = NEXT_INSN (selected);
13461 if (next && NOTE_P (next)
13462 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13466 /* Create a new JUMP_INSN that branches around a barrier. */
13467 from = emit_jump_insn_after (gen_jump (label), selected);
13468 JUMP_LABEL (from) = label;
13469 barrier = emit_barrier_after (from);
13470 emit_label_after (label, barrier);
13472 /* Create a minipool barrier entry for the new barrier. */
13473 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13474 new_fix->insn = barrier;
13475 new_fix->address = selected_address;
13476 new_fix->next = fix->next;
13477 fix->next = new_fix;
13482 /* Record that there is a natural barrier in the insn stream at
13485 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13487 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13490 fix->address = address;
13493 if (minipool_fix_head != NULL)
13494 minipool_fix_tail->next = fix;
13496 minipool_fix_head = fix;
13498 minipool_fix_tail = fix;
13501 /* Record INSN, which will need fixing up to load a value from the
13502 minipool. ADDRESS is the offset of the insn since the start of the
13503 function; LOC is a pointer to the part of the insn which requires
13504 fixing; VALUE is the constant that must be loaded, which is of type
13507 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13508 enum machine_mode mode, rtx value)
13510 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13513 fix->address = address;
13516 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13517 fix->value = value;
13518 fix->forwards = get_attr_pool_range (insn);
13519 fix->backwards = get_attr_neg_pool_range (insn);
13520 fix->minipool = NULL;
13522 /* If an insn doesn't have a range defined for it, then it isn't
13523 expecting to be reworked by this code. Better to stop now than
13524 to generate duff assembly code. */
13525 gcc_assert (fix->forwards || fix->backwards);
13527 /* If an entry requires 8-byte alignment then assume all constant pools
13528 require 4 bytes of padding. Trying to do this later on a per-pool
13529 basis is awkward because existing pool entries have to be modified. */
13530 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13535 fprintf (dump_file,
13536 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13537 GET_MODE_NAME (mode),
13538 INSN_UID (insn), (unsigned long) address,
13539 -1 * (long)fix->backwards, (long)fix->forwards);
13540 arm_print_value (dump_file, fix->value);
13541 fprintf (dump_file, "\n");
13544 /* Add it to the chain of fixes. */
13547 if (minipool_fix_head != NULL)
13548 minipool_fix_tail->next = fix;
13550 minipool_fix_head = fix;
13552 minipool_fix_tail = fix;
13555 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13556 Returns the number of insns needed, or 99 if we don't know how to
13559 arm_const_double_inline_cost (rtx val)
13561 rtx lowpart, highpart;
13562 enum machine_mode mode;
13564 mode = GET_MODE (val);
13566 if (mode == VOIDmode)
13569 gcc_assert (GET_MODE_SIZE (mode) == 8);
13571 lowpart = gen_lowpart (SImode, val);
13572 highpart = gen_highpart_mode (SImode, mode, val);
13574 gcc_assert (CONST_INT_P (lowpart));
13575 gcc_assert (CONST_INT_P (highpart));
13577 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13578 NULL_RTX, NULL_RTX, 0, 0)
13579 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13580 NULL_RTX, NULL_RTX, 0, 0));
13583 /* Return true if it is worthwhile to split a 64-bit constant into two
13584 32-bit operations. This is the case if optimizing for size, or
13585 if we have load delay slots, or if one 32-bit part can be done with
13586 a single data operation. */
13588 arm_const_double_by_parts (rtx val)
13590 enum machine_mode mode = GET_MODE (val);
13593 if (optimize_size || arm_ld_sched)
13596 if (mode == VOIDmode)
13599 part = gen_highpart_mode (SImode, mode, val);
13601 gcc_assert (CONST_INT_P (part));
13603 if (const_ok_for_arm (INTVAL (part))
13604 || const_ok_for_arm (~INTVAL (part)))
13607 part = gen_lowpart (SImode, val);
13609 gcc_assert (CONST_INT_P (part));
13611 if (const_ok_for_arm (INTVAL (part))
13612 || const_ok_for_arm (~INTVAL (part)))
13618 /* Return true if it is possible to inline both the high and low parts
13619 of a 64-bit constant into 32-bit data processing instructions. */
13621 arm_const_double_by_immediates (rtx val)
13623 enum machine_mode mode = GET_MODE (val);
13626 if (mode == VOIDmode)
13629 part = gen_highpart_mode (SImode, mode, val);
13631 gcc_assert (CONST_INT_P (part));
13633 if (!const_ok_for_arm (INTVAL (part)))
13636 part = gen_lowpart (SImode, val);
13638 gcc_assert (CONST_INT_P (part));
13640 if (!const_ok_for_arm (INTVAL (part)))
13646 /* Scan INSN and note any of its operands that need fixing.
13647 If DO_PUSHES is false we do not actually push any of the fixups
13650 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13654 extract_insn (insn);
13656 if (!constrain_operands (1))
13657 fatal_insn_not_found (insn);
13659 if (recog_data.n_alternatives == 0)
13662 /* Fill in recog_op_alt with information about the constraints of
13664 preprocess_constraints ();
13666 for (opno = 0; opno < recog_data.n_operands; opno++)
13668 /* Things we need to fix can only occur in inputs. */
13669 if (recog_data.operand_type[opno] != OP_IN)
13672 /* If this alternative is a memory reference, then any mention
13673 of constants in this alternative is really to fool reload
13674 into allowing us to accept one there. We need to fix them up
13675 now so that we output the right code. */
13676 if (recog_op_alt[opno][which_alternative].memory_ok)
13678 rtx op = recog_data.operand[opno];
13680 if (CONSTANT_P (op))
13683 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13684 recog_data.operand_mode[opno], op);
13686 else if (MEM_P (op)
13687 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13688 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13692 rtx cop = avoid_constant_pool_reference (op);
13694 /* Casting the address of something to a mode narrower
13695 than a word can cause avoid_constant_pool_reference()
13696 to return the pool reference itself. That's no good to
13697 us here. Lets just hope that we can use the
13698 constant pool value directly. */
13700 cop = get_pool_constant (XEXP (op, 0));
13702 push_minipool_fix (insn, address,
13703 recog_data.operand_loc[opno],
13704 recog_data.operand_mode[opno], cop);
13714 /* Rewrite move insn into subtract of 0 if the condition codes will
13715 be useful in next conditional jump insn. */
13718 thumb1_reorg (void)
13724 rtx set, dest, src;
13726 rtx prev, insn = BB_END (bb);
13728 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
13729 insn = PREV_INSN (insn);
13731 /* Find the last cbranchsi4_insn in basic block BB. */
13732 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
13735 /* Find the first non-note insn before INSN in basic block BB. */
13736 gcc_assert (insn != BB_HEAD (bb));
13737 prev = PREV_INSN (insn);
13738 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
13739 prev = PREV_INSN (prev);
13741 set = single_set (prev);
13745 dest = SET_DEST (set);
13746 src = SET_SRC (set);
13747 if (!low_register_operand (dest, SImode)
13748 || !low_register_operand (src, SImode))
13751 pat = PATTERN (insn);
13752 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
13753 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
13754 in INSN. Don't need to check dest since cprop_hardreg pass propagates
13756 if (REGNO (op0) == REGNO (src))
13758 dest = copy_rtx (dest);
13759 src = copy_rtx (src);
13760 src = gen_rtx_MINUS (SImode, src, const0_rtx);
13761 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
13762 INSN_CODE (prev) = -1;
13763 /* Set test register in INSN to dest. */
13764 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
13765 INSN_CODE (insn) = -1;
13770 /* Convert instructions to their cc-clobbering variant if possible, since
13771 that allows us to use smaller encodings. */
13774 thumb2_reorg (void)
13779 INIT_REG_SET (&live);
13781 /* We are freeing block_for_insn in the toplev to keep compatibility
13782 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13783 compute_bb_for_insn ();
13790 COPY_REG_SET (&live, DF_LR_OUT (bb));
13791 df_simulate_initialize_backwards (bb, &live);
13792 FOR_BB_INSNS_REVERSE (bb, insn)
13794 if (NONJUMP_INSN_P (insn)
13795 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13796 && GET_CODE (PATTERN (insn)) == SET)
13798 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13799 rtx pat = PATTERN (insn);
13800 rtx dst = XEXP (pat, 0);
13801 rtx src = XEXP (pat, 1);
13802 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13804 if (!OBJECT_P (src))
13805 op0 = XEXP (src, 0);
13807 if (BINARY_P (src))
13808 op1 = XEXP (src, 1);
13810 if (low_register_operand (dst, SImode))
13812 switch (GET_CODE (src))
13815 /* Adding two registers and storing the result
13816 in the first source is already a 16-bit
13818 if (rtx_equal_p (dst, op0)
13819 && register_operand (op1, SImode))
13822 if (low_register_operand (op0, SImode))
13824 /* ADDS <Rd>,<Rn>,<Rm> */
13825 if (low_register_operand (op1, SImode))
13827 /* ADDS <Rdn>,#<imm8> */
13828 /* SUBS <Rdn>,#<imm8> */
13829 else if (rtx_equal_p (dst, op0)
13830 && CONST_INT_P (op1)
13831 && IN_RANGE (INTVAL (op1), -255, 255))
13833 /* ADDS <Rd>,<Rn>,#<imm3> */
13834 /* SUBS <Rd>,<Rn>,#<imm3> */
13835 else if (CONST_INT_P (op1)
13836 && IN_RANGE (INTVAL (op1), -7, 7))
13842 /* RSBS <Rd>,<Rn>,#0
13843 Not handled here: see NEG below. */
13844 /* SUBS <Rd>,<Rn>,#<imm3>
13846 Not handled here: see PLUS above. */
13847 /* SUBS <Rd>,<Rn>,<Rm> */
13848 if (low_register_operand (op0, SImode)
13849 && low_register_operand (op1, SImode))
13854 /* MULS <Rdm>,<Rn>,<Rdm>
13855 As an exception to the rule, this is only used
13856 when optimizing for size since MULS is slow on all
13857 known implementations. We do not even want to use
13858 MULS in cold code, if optimizing for speed, so we
13859 test the global flag here. */
13860 if (!optimize_size)
13862 /* else fall through. */
13866 /* ANDS <Rdn>,<Rm> */
13867 if (rtx_equal_p (dst, op0)
13868 && low_register_operand (op1, SImode))
13870 else if (rtx_equal_p (dst, op1)
13871 && low_register_operand (op0, SImode))
13872 action = SWAP_CONV;
13878 /* ASRS <Rdn>,<Rm> */
13879 /* LSRS <Rdn>,<Rm> */
13880 /* LSLS <Rdn>,<Rm> */
13881 if (rtx_equal_p (dst, op0)
13882 && low_register_operand (op1, SImode))
13884 /* ASRS <Rd>,<Rm>,#<imm5> */
13885 /* LSRS <Rd>,<Rm>,#<imm5> */
13886 /* LSLS <Rd>,<Rm>,#<imm5> */
13887 else if (low_register_operand (op0, SImode)
13888 && CONST_INT_P (op1)
13889 && IN_RANGE (INTVAL (op1), 0, 31))
13894 /* RORS <Rdn>,<Rm> */
13895 if (rtx_equal_p (dst, op0)
13896 && low_register_operand (op1, SImode))
13902 /* MVNS <Rd>,<Rm> */
13903 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13904 if (low_register_operand (op0, SImode))
13909 /* MOVS <Rd>,#<imm8> */
13910 if (CONST_INT_P (src)
13911 && IN_RANGE (INTVAL (src), 0, 255))
13916 /* MOVS and MOV<c> with registers have different
13917 encodings, so are not relevant here. */
13925 if (action != SKIP)
13927 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13928 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13931 if (action == SWAP_CONV)
13933 src = copy_rtx (src);
13934 XEXP (src, 0) = op1;
13935 XEXP (src, 1) = op0;
13936 pat = gen_rtx_SET (VOIDmode, dst, src);
13937 vec = gen_rtvec (2, pat, clobber);
13939 else /* action == CONV */
13940 vec = gen_rtvec (2, pat, clobber);
13942 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13943 INSN_CODE (insn) = -1;
13947 if (NONDEBUG_INSN_P (insn))
13948 df_simulate_one_insn_backwards (bb, insn, &live);
13952 CLEAR_REG_SET (&live);
13955 /* Gcc puts the pool in the wrong place for ARM, since we can only
13956 load addresses a limited distance around the pc. We do some
13957 special munging to move the constant pool values to the correct
13958 point in the code. */
13963 HOST_WIDE_INT address = 0;
13968 else if (TARGET_THUMB2)
13971 /* Ensure all insns that must be split have been split at this point.
13972 Otherwise, the pool placement code below may compute incorrect
13973 insn lengths. Note that when optimizing, all insns have already
13974 been split at this point. */
13976 split_all_insns_noflow ();
13978 minipool_fix_head = minipool_fix_tail = NULL;
13980 /* The first insn must always be a note, or the code below won't
13981 scan it properly. */
13982 insn = get_insns ();
13983 gcc_assert (NOTE_P (insn));
13986 /* Scan all the insns and record the operands that will need fixing. */
13987 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13989 if (BARRIER_P (insn))
13990 push_minipool_barrier (insn, address);
13991 else if (INSN_P (insn))
13995 note_invalid_constants (insn, address, true);
13996 address += get_attr_length (insn);
13998 /* If the insn is a vector jump, add the size of the table
13999 and skip the table. */
14000 if ((table = is_jump_table (insn)) != NULL)
14002 address += get_jump_table_size (table);
14006 else if (LABEL_P (insn))
14007 /* Add the worst-case padding due to alignment. We don't add
14008 the _current_ padding because the minipool insertions
14009 themselves might change it. */
14010 address += get_label_padding (insn);
14013 fix = minipool_fix_head;
14015 /* Now scan the fixups and perform the required changes. */
14020 Mfix * last_added_fix;
14021 Mfix * last_barrier = NULL;
14024 /* Skip any further barriers before the next fix. */
14025 while (fix && BARRIER_P (fix->insn))
14028 /* No more fixes. */
14032 last_added_fix = NULL;
14034 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14036 if (BARRIER_P (ftmp->insn))
14038 if (ftmp->address >= minipool_vector_head->max_address)
14041 last_barrier = ftmp;
14043 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14046 last_added_fix = ftmp; /* Keep track of the last fix added. */
14049 /* If we found a barrier, drop back to that; any fixes that we
14050 could have reached but come after the barrier will now go in
14051 the next mini-pool. */
14052 if (last_barrier != NULL)
14054 /* Reduce the refcount for those fixes that won't go into this
14056 for (fdel = last_barrier->next;
14057 fdel && fdel != ftmp;
14060 fdel->minipool->refcount--;
14061 fdel->minipool = NULL;
14064 ftmp = last_barrier;
14068 /* ftmp is first fix that we can't fit into this pool and
14069 there no natural barriers that we could use. Insert a
14070 new barrier in the code somewhere between the previous
14071 fix and this one, and arrange to jump around it. */
14072 HOST_WIDE_INT max_address;
14074 /* The last item on the list of fixes must be a barrier, so
14075 we can never run off the end of the list of fixes without
14076 last_barrier being set. */
14079 max_address = minipool_vector_head->max_address;
14080 /* Check that there isn't another fix that is in range that
14081 we couldn't fit into this pool because the pool was
14082 already too large: we need to put the pool before such an
14083 instruction. The pool itself may come just after the
14084 fix because create_fix_barrier also allows space for a
14085 jump instruction. */
14086 if (ftmp->address < max_address)
14087 max_address = ftmp->address + 1;
14089 last_barrier = create_fix_barrier (last_added_fix, max_address);
14092 assign_minipool_offsets (last_barrier);
14096 if (!BARRIER_P (ftmp->insn)
14097 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14104 /* Scan over the fixes we have identified for this pool, fixing them
14105 up and adding the constants to the pool itself. */
14106 for (this_fix = fix; this_fix && ftmp != this_fix;
14107 this_fix = this_fix->next)
14108 if (!BARRIER_P (this_fix->insn))
14111 = plus_constant (Pmode,
14112 gen_rtx_LABEL_REF (VOIDmode,
14113 minipool_vector_label),
14114 this_fix->minipool->offset);
14115 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14118 dump_minipool (last_barrier->insn);
14122 /* From now on we must synthesize any constants that we can't handle
14123 directly. This can happen if the RTL gets split during final
14124 instruction generation. */
14125 after_arm_reorg = 1;
14127 /* Free the minipool memory. */
14128 obstack_free (&minipool_obstack, minipool_startobj);
14131 /* Routines to output assembly language. */
14133 /* If the rtx is the correct value then return the string of the number.
14134 In this way we can ensure that valid double constants are generated even
14135 when cross compiling. */
14137 fp_immediate_constant (rtx x)
14141 if (!fp_consts_inited)
14144 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14146 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14150 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14151 static const char *
14152 fp_const_from_val (REAL_VALUE_TYPE *r)
14154 if (!fp_consts_inited)
14157 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14161 /* OPERANDS[0] is the entire list of insns that constitute pop,
14162 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14163 is in the list, UPDATE is true iff the list contains explicit
14164 update of base register. */
14166 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14172 const char *conditional;
14173 int num_saves = XVECLEN (operands[0], 0);
14174 unsigned int regno;
14175 unsigned int regno_base = REGNO (operands[1]);
14178 offset += update ? 1 : 0;
14179 offset += return_pc ? 1 : 0;
14181 /* Is the base register in the list? */
14182 for (i = offset; i < num_saves; i++)
14184 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14185 /* If SP is in the list, then the base register must be SP. */
14186 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14187 /* If base register is in the list, there must be no explicit update. */
14188 if (regno == regno_base)
14189 gcc_assert (!update);
14192 conditional = reverse ? "%?%D0" : "%?%d0";
14193 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14195 /* Output pop (not stmfd) because it has a shorter encoding. */
14196 gcc_assert (update);
14197 sprintf (pattern, "pop%s\t{", conditional);
14201 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14202 It's just a convention, their semantics are identical. */
14203 if (regno_base == SP_REGNUM)
14204 sprintf (pattern, "ldm%sfd\t", conditional);
14205 else if (TARGET_UNIFIED_ASM)
14206 sprintf (pattern, "ldmia%s\t", conditional);
14208 sprintf (pattern, "ldm%sia\t", conditional);
14210 strcat (pattern, reg_names[regno_base]);
14212 strcat (pattern, "!, {");
14214 strcat (pattern, ", {");
14217 /* Output the first destination register. */
14219 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14221 /* Output the rest of the destination registers. */
14222 for (i = offset + 1; i < num_saves; i++)
14224 strcat (pattern, ", ");
14226 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14229 strcat (pattern, "}");
14231 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14232 strcat (pattern, "^");
14234 output_asm_insn (pattern, &cond);
14238 /* Output the assembly for a store multiple. */
14241 vfp_output_fstmd (rtx * operands)
14248 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14249 p = strlen (pattern);
14251 gcc_assert (REG_P (operands[1]));
14253 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14254 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14256 p += sprintf (&pattern[p], ", d%d", base + i);
14258 strcpy (&pattern[p], "}");
14260 output_asm_insn (pattern, operands);
14265 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14266 number of bytes pushed. */
14269 vfp_emit_fstmd (int base_reg, int count)
14276 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14277 register pairs are stored by a store multiple insn. We avoid this
14278 by pushing an extra pair. */
14279 if (count == 2 && !arm_arch6)
14281 if (base_reg == LAST_VFP_REGNUM - 3)
14286 /* FSTMD may not store more than 16 doubleword registers at once. Split
14287 larger stores into multiple parts (up to a maximum of two, in
14292 /* NOTE: base_reg is an internal register number, so each D register
14294 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14295 saved += vfp_emit_fstmd (base_reg, 16);
14299 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14300 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14302 reg = gen_rtx_REG (DFmode, base_reg);
14305 XVECEXP (par, 0, 0)
14306 = gen_rtx_SET (VOIDmode,
14309 gen_rtx_PRE_MODIFY (Pmode,
14312 (Pmode, stack_pointer_rtx,
14315 gen_rtx_UNSPEC (BLKmode,
14316 gen_rtvec (1, reg),
14317 UNSPEC_PUSH_MULT));
14319 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14320 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14321 RTX_FRAME_RELATED_P (tmp) = 1;
14322 XVECEXP (dwarf, 0, 0) = tmp;
14324 tmp = gen_rtx_SET (VOIDmode,
14325 gen_frame_mem (DFmode, stack_pointer_rtx),
14327 RTX_FRAME_RELATED_P (tmp) = 1;
14328 XVECEXP (dwarf, 0, 1) = tmp;
14330 for (i = 1; i < count; i++)
14332 reg = gen_rtx_REG (DFmode, base_reg);
14334 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14336 tmp = gen_rtx_SET (VOIDmode,
14337 gen_frame_mem (DFmode,
14338 plus_constant (Pmode,
14342 RTX_FRAME_RELATED_P (tmp) = 1;
14343 XVECEXP (dwarf, 0, i + 1) = tmp;
14346 par = emit_insn (par);
14347 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14348 RTX_FRAME_RELATED_P (par) = 1;
14353 /* Emit a call instruction with pattern PAT. ADDR is the address of
14354 the call target. */
14357 arm_emit_call_insn (rtx pat, rtx addr)
14361 insn = emit_call_insn (pat);
14363 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14364 If the call might use such an entry, add a use of the PIC register
14365 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14366 if (TARGET_VXWORKS_RTP
14368 && GET_CODE (addr) == SYMBOL_REF
14369 && (SYMBOL_REF_DECL (addr)
14370 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14371 : !SYMBOL_REF_LOCAL_P (addr)))
14373 require_pic_register ();
14374 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14378 /* Output a 'call' insn. */
14380 output_call (rtx *operands)
14382 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14384 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14385 if (REGNO (operands[0]) == LR_REGNUM)
14387 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14388 output_asm_insn ("mov%?\t%0, %|lr", operands);
14391 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14393 if (TARGET_INTERWORK || arm_arch4t)
14394 output_asm_insn ("bx%?\t%0", operands);
14396 output_asm_insn ("mov%?\t%|pc, %0", operands);
14401 /* Output a 'call' insn that is a reference in memory. This is
14402 disabled for ARMv5 and we prefer a blx instead because otherwise
14403 there's a significant performance overhead. */
14405 output_call_mem (rtx *operands)
14407 gcc_assert (!arm_arch5);
14408 if (TARGET_INTERWORK)
14410 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14411 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14412 output_asm_insn ("bx%?\t%|ip", operands);
14414 else if (regno_use_in (LR_REGNUM, operands[0]))
14416 /* LR is used in the memory address. We load the address in the
14417 first instruction. It's safe to use IP as the target of the
14418 load since the call will kill it anyway. */
14419 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14420 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14422 output_asm_insn ("bx%?\t%|ip", operands);
14424 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14428 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14429 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14436 /* Output a move from arm registers to arm registers of a long double
14437 OPERANDS[0] is the destination.
14438 OPERANDS[1] is the source. */
14440 output_mov_long_double_arm_from_arm (rtx *operands)
14442 /* We have to be careful here because the two might overlap. */
14443 int dest_start = REGNO (operands[0]);
14444 int src_start = REGNO (operands[1]);
14448 if (dest_start < src_start)
14450 for (i = 0; i < 3; i++)
14452 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14453 ops[1] = gen_rtx_REG (SImode, src_start + i);
14454 output_asm_insn ("mov%?\t%0, %1", ops);
14459 for (i = 2; i >= 0; i--)
14461 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14462 ops[1] = gen_rtx_REG (SImode, src_start + i);
14463 output_asm_insn ("mov%?\t%0, %1", ops);
14471 arm_emit_movpair (rtx dest, rtx src)
14473 /* If the src is an immediate, simplify it. */
14474 if (CONST_INT_P (src))
14476 HOST_WIDE_INT val = INTVAL (src);
14477 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14478 if ((val >> 16) & 0x0000ffff)
14479 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14481 GEN_INT ((val >> 16) & 0x0000ffff));
14484 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14485 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14488 /* Output a move between double words. It must be REG<-MEM
14491 output_move_double (rtx *operands, bool emit, int *count)
14493 enum rtx_code code0 = GET_CODE (operands[0]);
14494 enum rtx_code code1 = GET_CODE (operands[1]);
14499 /* The only case when this might happen is when
14500 you are looking at the length of a DImode instruction
14501 that has an invalid constant in it. */
14502 if (code0 == REG && code1 != MEM)
14504 gcc_assert (!emit);
14511 unsigned int reg0 = REGNO (operands[0]);
14513 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14515 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14517 switch (GET_CODE (XEXP (operands[1], 0)))
14524 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14525 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14527 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14532 gcc_assert (TARGET_LDRD);
14534 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14541 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14543 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14551 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14553 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14558 gcc_assert (TARGET_LDRD);
14560 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14565 /* Autoicrement addressing modes should never have overlapping
14566 base and destination registers, and overlapping index registers
14567 are already prohibited, so this doesn't need to worry about
14569 otherops[0] = operands[0];
14570 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14571 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14573 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14575 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14577 /* Registers overlap so split out the increment. */
14580 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14581 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14588 /* Use a single insn if we can.
14589 FIXME: IWMMXT allows offsets larger than ldrd can
14590 handle, fix these up with a pair of ldr. */
14592 || !CONST_INT_P (otherops[2])
14593 || (INTVAL (otherops[2]) > -256
14594 && INTVAL (otherops[2]) < 256))
14597 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14603 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14604 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14614 /* Use a single insn if we can.
14615 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14616 fix these up with a pair of ldr. */
14618 || !CONST_INT_P (otherops[2])
14619 || (INTVAL (otherops[2]) > -256
14620 && INTVAL (otherops[2]) < 256))
14623 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14629 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14630 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14640 /* We might be able to use ldrd %0, %1 here. However the range is
14641 different to ldr/adr, and it is broken on some ARMv7-M
14642 implementations. */
14643 /* Use the second register of the pair to avoid problematic
14645 otherops[1] = operands[1];
14647 output_asm_insn ("adr%?\t%0, %1", otherops);
14648 operands[1] = otherops[0];
14652 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14654 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14661 /* ??? This needs checking for thumb2. */
14663 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14664 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14666 otherops[0] = operands[0];
14667 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14668 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14670 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14672 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14674 switch ((int) INTVAL (otherops[2]))
14678 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14684 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14690 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14694 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14695 operands[1] = otherops[0];
14697 && (REG_P (otherops[2])
14699 || (CONST_INT_P (otherops[2])
14700 && INTVAL (otherops[2]) > -256
14701 && INTVAL (otherops[2]) < 256)))
14703 if (reg_overlap_mentioned_p (operands[0],
14707 /* Swap base and index registers over to
14708 avoid a conflict. */
14710 otherops[1] = otherops[2];
14713 /* If both registers conflict, it will usually
14714 have been fixed by a splitter. */
14715 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14716 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14720 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14721 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14728 otherops[0] = operands[0];
14730 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14735 if (CONST_INT_P (otherops[2]))
14739 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14740 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14742 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14748 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14754 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14761 return "ldr%(d%)\t%0, [%1]";
14763 return "ldm%(ia%)\t%1, %M0";
14767 otherops[1] = adjust_address (operands[1], SImode, 4);
14768 /* Take care of overlapping base/data reg. */
14769 if (reg_mentioned_p (operands[0], operands[1]))
14773 output_asm_insn ("ldr%?\t%0, %1", otherops);
14774 output_asm_insn ("ldr%?\t%0, %1", operands);
14784 output_asm_insn ("ldr%?\t%0, %1", operands);
14785 output_asm_insn ("ldr%?\t%0, %1", otherops);
14795 /* Constraints should ensure this. */
14796 gcc_assert (code0 == MEM && code1 == REG);
14797 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14799 switch (GET_CODE (XEXP (operands[0], 0)))
14805 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14807 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14812 gcc_assert (TARGET_LDRD);
14814 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14821 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14823 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14831 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14833 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14838 gcc_assert (TARGET_LDRD);
14840 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14845 otherops[0] = operands[1];
14846 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14847 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14849 /* IWMMXT allows offsets larger than ldrd can handle,
14850 fix these up with a pair of ldr. */
14852 && CONST_INT_P (otherops[2])
14853 && (INTVAL(otherops[2]) <= -256
14854 || INTVAL(otherops[2]) >= 256))
14856 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14860 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14861 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14870 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14871 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14877 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14880 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14885 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14890 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14891 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14893 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14897 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14904 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14911 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14916 && (REG_P (otherops[2])
14918 || (CONST_INT_P (otherops[2])
14919 && INTVAL (otherops[2]) > -256
14920 && INTVAL (otherops[2]) < 256)))
14922 otherops[0] = operands[1];
14923 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14925 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14931 otherops[0] = adjust_address (operands[0], SImode, 4);
14932 otherops[1] = operands[1];
14935 output_asm_insn ("str%?\t%1, %0", operands);
14936 output_asm_insn ("str%?\t%H1, %0", otherops);
14946 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14947 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14950 output_move_quad (rtx *operands)
14952 if (REG_P (operands[0]))
14954 /* Load, or reg->reg move. */
14956 if (MEM_P (operands[1]))
14958 switch (GET_CODE (XEXP (operands[1], 0)))
14961 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14966 output_asm_insn ("adr%?\t%0, %1", operands);
14967 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14971 gcc_unreachable ();
14979 gcc_assert (REG_P (operands[1]));
14981 dest = REGNO (operands[0]);
14982 src = REGNO (operands[1]);
14984 /* This seems pretty dumb, but hopefully GCC won't try to do it
14987 for (i = 0; i < 4; i++)
14989 ops[0] = gen_rtx_REG (SImode, dest + i);
14990 ops[1] = gen_rtx_REG (SImode, src + i);
14991 output_asm_insn ("mov%?\t%0, %1", ops);
14994 for (i = 3; i >= 0; i--)
14996 ops[0] = gen_rtx_REG (SImode, dest + i);
14997 ops[1] = gen_rtx_REG (SImode, src + i);
14998 output_asm_insn ("mov%?\t%0, %1", ops);
15004 gcc_assert (MEM_P (operands[0]));
15005 gcc_assert (REG_P (operands[1]));
15006 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15008 switch (GET_CODE (XEXP (operands[0], 0)))
15011 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15015 gcc_unreachable ();
15022 /* Output a VFP load or store instruction. */
15025 output_move_vfp (rtx *operands)
15027 rtx reg, mem, addr, ops[2];
15028 int load = REG_P (operands[0]);
15029 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15030 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15033 enum machine_mode mode;
15035 reg = operands[!load];
15036 mem = operands[load];
15038 mode = GET_MODE (reg);
15040 gcc_assert (REG_P (reg));
15041 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15042 gcc_assert (mode == SFmode
15046 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15047 gcc_assert (MEM_P (mem));
15049 addr = XEXP (mem, 0);
15051 switch (GET_CODE (addr))
15054 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15055 ops[0] = XEXP (addr, 0);
15060 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15061 ops[0] = XEXP (addr, 0);
15066 templ = "f%s%c%%?\t%%%s0, %%1%s";
15072 sprintf (buff, templ,
15073 load ? "ld" : "st",
15076 integer_p ? "\t%@ int" : "");
15077 output_asm_insn (buff, ops);
15082 /* Output a Neon double-word or quad-word load or store, or a load
15083 or store for larger structure modes.
15085 WARNING: The ordering of elements is weird in big-endian mode,
15086 because the EABI requires that vectors stored in memory appear
15087 as though they were stored by a VSTM, as required by the EABI.
15088 GCC RTL defines element ordering based on in-memory order.
15089 This can be different from the architectural ordering of elements
15090 within a NEON register. The intrinsics defined in arm_neon.h use the
15091 NEON register element ordering, not the GCC RTL element ordering.
15093 For example, the in-memory ordering of a big-endian a quadword
15094 vector with 16-bit elements when stored from register pair {d0,d1}
15095 will be (lowest address first, d0[N] is NEON register element N):
15097 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15099 When necessary, quadword registers (dN, dN+1) are moved to ARM
15100 registers from rN in the order:
15102 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15104 So that STM/LDM can be used on vectors in ARM registers, and the
15105 same memory layout will result as if VSTM/VLDM were used.
15107 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15108 possible, which allows use of appropriate alignment tags.
15109 Note that the choice of "64" is independent of the actual vector
15110 element size; this size simply ensures that the behavior is
15111 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15113 Due to limitations of those instructions, use of VST1.64/VLD1.64
15114 is not possible if:
15115 - the address contains PRE_DEC, or
15116 - the mode refers to more than 4 double-word registers
15118 In those cases, it would be possible to replace VSTM/VLDM by a
15119 sequence of instructions; this is not currently implemented since
15120 this is not certain to actually improve performance. */
15123 output_move_neon (rtx *operands)
15125 rtx reg, mem, addr, ops[2];
15126 int regno, nregs, load = REG_P (operands[0]);
15129 enum machine_mode mode;
15131 reg = operands[!load];
15132 mem = operands[load];
15134 mode = GET_MODE (reg);
15136 gcc_assert (REG_P (reg));
15137 regno = REGNO (reg);
15138 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15139 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15140 || NEON_REGNO_OK_FOR_QUAD (regno));
15141 gcc_assert (VALID_NEON_DREG_MODE (mode)
15142 || VALID_NEON_QREG_MODE (mode)
15143 || VALID_NEON_STRUCT_MODE (mode));
15144 gcc_assert (MEM_P (mem));
15146 addr = XEXP (mem, 0);
15148 /* Strip off const from addresses like (const (plus (...))). */
15149 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15150 addr = XEXP (addr, 0);
15152 switch (GET_CODE (addr))
15155 /* We have to use vldm / vstm for too-large modes. */
15158 templ = "v%smia%%?\t%%0!, %%h1";
15159 ops[0] = XEXP (addr, 0);
15163 templ = "v%s1.64\t%%h1, %%A0";
15170 /* We have to use vldm / vstm in this case, since there is no
15171 pre-decrement form of the vld1 / vst1 instructions. */
15172 templ = "v%smdb%%?\t%%0!, %%h1";
15173 ops[0] = XEXP (addr, 0);
15178 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15179 gcc_unreachable ();
15186 for (i = 0; i < nregs; i++)
15188 /* We're only using DImode here because it's a convenient size. */
15189 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15190 ops[1] = adjust_address (mem, DImode, 8 * i);
15191 if (reg_overlap_mentioned_p (ops[0], mem))
15193 gcc_assert (overlap == -1);
15198 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15199 output_asm_insn (buff, ops);
15204 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15205 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15206 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15207 output_asm_insn (buff, ops);
15214 /* We have to use vldm / vstm for too-large modes. */
15216 templ = "v%smia%%?\t%%m0, %%h1";
15218 templ = "v%s1.64\t%%h1, %%A0";
15224 sprintf (buff, templ, load ? "ld" : "st");
15225 output_asm_insn (buff, ops);
15230 /* Compute and return the length of neon_mov<mode>, where <mode> is
15231 one of VSTRUCT modes: EI, OI, CI or XI. */
15233 arm_attr_length_move_neon (rtx insn)
15235 rtx reg, mem, addr;
15237 enum machine_mode mode;
15239 extract_insn_cached (insn);
15241 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15243 mode = GET_MODE (recog_data.operand[0]);
15254 gcc_unreachable ();
15258 load = REG_P (recog_data.operand[0]);
15259 reg = recog_data.operand[!load];
15260 mem = recog_data.operand[load];
15262 gcc_assert (MEM_P (mem));
15264 mode = GET_MODE (reg);
15265 addr = XEXP (mem, 0);
15267 /* Strip off const from addresses like (const (plus (...))). */
15268 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15269 addr = XEXP (addr, 0);
15271 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15273 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15280 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15284 arm_address_offset_is_imm (rtx insn)
15288 extract_insn_cached (insn);
15290 if (REG_P (recog_data.operand[0]))
15293 mem = recog_data.operand[0];
15295 gcc_assert (MEM_P (mem));
15297 addr = XEXP (mem, 0);
15300 || (GET_CODE (addr) == PLUS
15301 && REG_P (XEXP (addr, 0))
15302 && CONST_INT_P (XEXP (addr, 1))))
15308 /* Output an ADD r, s, #n where n may be too big for one instruction.
15309 If adding zero to one register, output nothing. */
15311 output_add_immediate (rtx *operands)
15313 HOST_WIDE_INT n = INTVAL (operands[2]);
15315 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15318 output_multi_immediate (operands,
15319 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15322 output_multi_immediate (operands,
15323 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15330 /* Output a multiple immediate operation.
15331 OPERANDS is the vector of operands referred to in the output patterns.
15332 INSTR1 is the output pattern to use for the first constant.
15333 INSTR2 is the output pattern to use for subsequent constants.
15334 IMMED_OP is the index of the constant slot in OPERANDS.
15335 N is the constant value. */
15336 static const char *
15337 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15338 int immed_op, HOST_WIDE_INT n)
15340 #if HOST_BITS_PER_WIDE_INT > 32
15346 /* Quick and easy output. */
15347 operands[immed_op] = const0_rtx;
15348 output_asm_insn (instr1, operands);
15353 const char * instr = instr1;
15355 /* Note that n is never zero here (which would give no output). */
15356 for (i = 0; i < 32; i += 2)
15360 operands[immed_op] = GEN_INT (n & (255 << i));
15361 output_asm_insn (instr, operands);
15371 /* Return the name of a shifter operation. */
15372 static const char *
15373 arm_shift_nmem(enum rtx_code code)
15378 return ARM_LSL_NAME;
15394 /* Return the appropriate ARM instruction for the operation code.
15395 The returned result should not be overwritten. OP is the rtx of the
15396 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15399 arithmetic_instr (rtx op, int shift_first_arg)
15401 switch (GET_CODE (op))
15407 return shift_first_arg ? "rsb" : "sub";
15422 return arm_shift_nmem(GET_CODE(op));
15425 gcc_unreachable ();
15429 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15430 for the operation code. The returned result should not be overwritten.
15431 OP is the rtx code of the shift.
15432 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15434 static const char *
15435 shift_op (rtx op, HOST_WIDE_INT *amountp)
15438 enum rtx_code code = GET_CODE (op);
15443 if (!CONST_INT_P (XEXP (op, 1)))
15445 output_operand_lossage ("invalid shift operand");
15450 *amountp = 32 - INTVAL (XEXP (op, 1));
15458 mnem = arm_shift_nmem(code);
15459 if (CONST_INT_P (XEXP (op, 1)))
15461 *amountp = INTVAL (XEXP (op, 1));
15463 else if (REG_P (XEXP (op, 1)))
15470 output_operand_lossage ("invalid shift operand");
15476 /* We never have to worry about the amount being other than a
15477 power of 2, since this case can never be reloaded from a reg. */
15478 if (!CONST_INT_P (XEXP (op, 1)))
15480 output_operand_lossage ("invalid shift operand");
15484 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
15486 /* Amount must be a power of two. */
15487 if (*amountp & (*amountp - 1))
15489 output_operand_lossage ("invalid shift operand");
15493 *amountp = int_log2 (*amountp);
15494 return ARM_LSL_NAME;
15497 output_operand_lossage ("invalid shift operand");
15501 /* This is not 100% correct, but follows from the desire to merge
15502 multiplication by a power of 2 with the recognizer for a
15503 shift. >=32 is not a valid shift for "lsl", so we must try and
15504 output a shift that produces the correct arithmetical result.
15505 Using lsr #32 is identical except for the fact that the carry bit
15506 is not set correctly if we set the flags; but we never use the
15507 carry bit from such an operation, so we can ignore that. */
15508 if (code == ROTATERT)
15509 /* Rotate is just modulo 32. */
15511 else if (*amountp != (*amountp & 31))
15513 if (code == ASHIFT)
15518 /* Shifts of 0 are no-ops. */
15525 /* Obtain the shift from the POWER of two. */
15527 static HOST_WIDE_INT
15528 int_log2 (HOST_WIDE_INT power)
15530 HOST_WIDE_INT shift = 0;
15532 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15534 gcc_assert (shift <= 31);
15541 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15542 because /bin/as is horribly restrictive. The judgement about
15543 whether or not each character is 'printable' (and can be output as
15544 is) or not (and must be printed with an octal escape) must be made
15545 with reference to the *host* character set -- the situation is
15546 similar to that discussed in the comments above pp_c_char in
15547 c-pretty-print.c. */
15549 #define MAX_ASCII_LEN 51
15552 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15555 int len_so_far = 0;
15557 fputs ("\t.ascii\t\"", stream);
15559 for (i = 0; i < len; i++)
15563 if (len_so_far >= MAX_ASCII_LEN)
15565 fputs ("\"\n\t.ascii\t\"", stream);
15571 if (c == '\\' || c == '\"')
15573 putc ('\\', stream);
15581 fprintf (stream, "\\%03o", c);
15586 fputs ("\"\n", stream);
15589 /* Compute the register save mask for registers 0 through 12
15590 inclusive. This code is used by arm_compute_save_reg_mask. */
15592 static unsigned long
15593 arm_compute_save_reg0_reg12_mask (void)
15595 unsigned long func_type = arm_current_func_type ();
15596 unsigned long save_reg_mask = 0;
15599 if (IS_INTERRUPT (func_type))
15601 unsigned int max_reg;
15602 /* Interrupt functions must not corrupt any registers,
15603 even call clobbered ones. If this is a leaf function
15604 we can just examine the registers used by the RTL, but
15605 otherwise we have to assume that whatever function is
15606 called might clobber anything, and so we have to save
15607 all the call-clobbered registers as well. */
15608 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15609 /* FIQ handlers have registers r8 - r12 banked, so
15610 we only need to check r0 - r7, Normal ISRs only
15611 bank r14 and r15, so we must check up to r12.
15612 r13 is the stack pointer which is always preserved,
15613 so we do not need to consider it here. */
15618 for (reg = 0; reg <= max_reg; reg++)
15619 if (df_regs_ever_live_p (reg)
15620 || (! crtl->is_leaf && call_used_regs[reg]))
15621 save_reg_mask |= (1 << reg);
15623 /* Also save the pic base register if necessary. */
15625 && !TARGET_SINGLE_PIC_BASE
15626 && arm_pic_register != INVALID_REGNUM
15627 && crtl->uses_pic_offset_table)
15628 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15630 else if (IS_VOLATILE(func_type))
15632 /* For noreturn functions we historically omitted register saves
15633 altogether. However this really messes up debugging. As a
15634 compromise save just the frame pointers. Combined with the link
15635 register saved elsewhere this should be sufficient to get
15637 if (frame_pointer_needed)
15638 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15639 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15640 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15641 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15642 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15646 /* In the normal case we only need to save those registers
15647 which are call saved and which are used by this function. */
15648 for (reg = 0; reg <= 11; reg++)
15649 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15650 save_reg_mask |= (1 << reg);
15652 /* Handle the frame pointer as a special case. */
15653 if (frame_pointer_needed)
15654 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15656 /* If we aren't loading the PIC register,
15657 don't stack it even though it may be live. */
15659 && !TARGET_SINGLE_PIC_BASE
15660 && arm_pic_register != INVALID_REGNUM
15661 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15662 || crtl->uses_pic_offset_table))
15663 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15665 /* The prologue will copy SP into R0, so save it. */
15666 if (IS_STACKALIGN (func_type))
15667 save_reg_mask |= 1;
15670 /* Save registers so the exception handler can modify them. */
15671 if (crtl->calls_eh_return)
15677 reg = EH_RETURN_DATA_REGNO (i);
15678 if (reg == INVALID_REGNUM)
15680 save_reg_mask |= 1 << reg;
15684 return save_reg_mask;
15688 /* Compute the number of bytes used to store the static chain register on the
15689 stack, above the stack frame. We need to know this accurately to get the
15690 alignment of the rest of the stack frame correct. */
15692 static int arm_compute_static_chain_stack_bytes (void)
15694 unsigned long func_type = arm_current_func_type ();
15695 int static_chain_stack_bytes = 0;
15697 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15698 IS_NESTED (func_type) &&
15699 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15700 static_chain_stack_bytes = 4;
15702 return static_chain_stack_bytes;
15706 /* Compute a bit mask of which registers need to be
15707 saved on the stack for the current function.
15708 This is used by arm_get_frame_offsets, which may add extra registers. */
15710 static unsigned long
15711 arm_compute_save_reg_mask (void)
15713 unsigned int save_reg_mask = 0;
15714 unsigned long func_type = arm_current_func_type ();
15717 if (IS_NAKED (func_type))
15718 /* This should never really happen. */
15721 /* If we are creating a stack frame, then we must save the frame pointer,
15722 IP (which will hold the old stack pointer), LR and the PC. */
15723 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15725 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15728 | (1 << PC_REGNUM);
15730 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15732 /* Decide if we need to save the link register.
15733 Interrupt routines have their own banked link register,
15734 so they never need to save it.
15735 Otherwise if we do not use the link register we do not need to save
15736 it. If we are pushing other registers onto the stack however, we
15737 can save an instruction in the epilogue by pushing the link register
15738 now and then popping it back into the PC. This incurs extra memory
15739 accesses though, so we only do it when optimizing for size, and only
15740 if we know that we will not need a fancy return sequence. */
15741 if (df_regs_ever_live_p (LR_REGNUM)
15744 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15745 && !crtl->calls_eh_return))
15746 save_reg_mask |= 1 << LR_REGNUM;
15748 if (cfun->machine->lr_save_eliminated)
15749 save_reg_mask &= ~ (1 << LR_REGNUM);
15751 if (TARGET_REALLY_IWMMXT
15752 && ((bit_count (save_reg_mask)
15753 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15754 arm_compute_static_chain_stack_bytes())
15757 /* The total number of registers that are going to be pushed
15758 onto the stack is odd. We need to ensure that the stack
15759 is 64-bit aligned before we start to save iWMMXt registers,
15760 and also before we start to create locals. (A local variable
15761 might be a double or long long which we will load/store using
15762 an iWMMXt instruction). Therefore we need to push another
15763 ARM register, so that the stack will be 64-bit aligned. We
15764 try to avoid using the arg registers (r0 -r3) as they might be
15765 used to pass values in a tail call. */
15766 for (reg = 4; reg <= 12; reg++)
15767 if ((save_reg_mask & (1 << reg)) == 0)
15771 save_reg_mask |= (1 << reg);
15774 cfun->machine->sibcall_blocked = 1;
15775 save_reg_mask |= (1 << 3);
15779 /* We may need to push an additional register for use initializing the
15780 PIC base register. */
15781 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15782 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15784 reg = thumb_find_work_register (1 << 4);
15785 if (!call_used_regs[reg])
15786 save_reg_mask |= (1 << reg);
15789 return save_reg_mask;
15793 /* Compute a bit mask of which registers need to be
15794 saved on the stack for the current function. */
15795 static unsigned long
15796 thumb1_compute_save_reg_mask (void)
15798 unsigned long mask;
15802 for (reg = 0; reg < 12; reg ++)
15803 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15807 && !TARGET_SINGLE_PIC_BASE
15808 && arm_pic_register != INVALID_REGNUM
15809 && crtl->uses_pic_offset_table)
15810 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15812 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15813 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15814 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15816 /* LR will also be pushed if any lo regs are pushed. */
15817 if (mask & 0xff || thumb_force_lr_save ())
15818 mask |= (1 << LR_REGNUM);
15820 /* Make sure we have a low work register if we need one.
15821 We will need one if we are going to push a high register,
15822 but we are not currently intending to push a low register. */
15823 if ((mask & 0xff) == 0
15824 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15826 /* Use thumb_find_work_register to choose which register
15827 we will use. If the register is live then we will
15828 have to push it. Use LAST_LO_REGNUM as our fallback
15829 choice for the register to select. */
15830 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15831 /* Make sure the register returned by thumb_find_work_register is
15832 not part of the return value. */
15833 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15834 reg = LAST_LO_REGNUM;
15836 if (! call_used_regs[reg])
15840 /* The 504 below is 8 bytes less than 512 because there are two possible
15841 alignment words. We can't tell here if they will be present or not so we
15842 have to play it safe and assume that they are. */
15843 if ((CALLER_INTERWORKING_SLOT_SIZE +
15844 ROUND_UP_WORD (get_frame_size ()) +
15845 crtl->outgoing_args_size) >= 504)
15847 /* This is the same as the code in thumb1_expand_prologue() which
15848 determines which register to use for stack decrement. */
15849 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15850 if (mask & (1 << reg))
15853 if (reg > LAST_LO_REGNUM)
15855 /* Make sure we have a register available for stack decrement. */
15856 mask |= 1 << LAST_LO_REGNUM;
15864 /* Return the number of bytes required to save VFP registers. */
15866 arm_get_vfp_saved_size (void)
15868 unsigned int regno;
15873 /* Space for saved VFP registers. */
15874 if (TARGET_HARD_FLOAT && TARGET_VFP)
15877 for (regno = FIRST_VFP_REGNUM;
15878 regno < LAST_VFP_REGNUM;
15881 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15882 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15886 /* Workaround ARM10 VFPr1 bug. */
15887 if (count == 2 && !arm_arch6)
15889 saved += count * 8;
15898 if (count == 2 && !arm_arch6)
15900 saved += count * 8;
15907 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15908 everything bar the final return instruction. If simple_return is true,
15909 then do not output epilogue, because it has already been emitted in RTL. */
15911 output_return_instruction (rtx operand, bool really_return, bool reverse,
15912 bool simple_return)
15914 char conditional[10];
15917 unsigned long live_regs_mask;
15918 unsigned long func_type;
15919 arm_stack_offsets *offsets;
15921 func_type = arm_current_func_type ();
15923 if (IS_NAKED (func_type))
15926 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15928 /* If this function was declared non-returning, and we have
15929 found a tail call, then we have to trust that the called
15930 function won't return. */
15935 /* Otherwise, trap an attempted return by aborting. */
15937 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15939 assemble_external_libcall (ops[1]);
15940 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15946 gcc_assert (!cfun->calls_alloca || really_return);
15948 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15950 cfun->machine->return_used_this_function = 1;
15952 offsets = arm_get_frame_offsets ();
15953 live_regs_mask = offsets->saved_regs_mask;
15955 if (!simple_return && live_regs_mask)
15957 const char * return_reg;
15959 /* If we do not have any special requirements for function exit
15960 (e.g. interworking) then we can load the return address
15961 directly into the PC. Otherwise we must load it into LR. */
15963 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15964 return_reg = reg_names[PC_REGNUM];
15966 return_reg = reg_names[LR_REGNUM];
15968 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15970 /* There are three possible reasons for the IP register
15971 being saved. 1) a stack frame was created, in which case
15972 IP contains the old stack pointer, or 2) an ISR routine
15973 corrupted it, or 3) it was saved to align the stack on
15974 iWMMXt. In case 1, restore IP into SP, otherwise just
15976 if (frame_pointer_needed)
15978 live_regs_mask &= ~ (1 << IP_REGNUM);
15979 live_regs_mask |= (1 << SP_REGNUM);
15982 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15985 /* On some ARM architectures it is faster to use LDR rather than
15986 LDM to load a single register. On other architectures, the
15987 cost is the same. In 26 bit mode, or for exception handlers,
15988 we have to use LDM to load the PC so that the CPSR is also
15990 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15991 if (live_regs_mask == (1U << reg))
15994 if (reg <= LAST_ARM_REGNUM
15995 && (reg != LR_REGNUM
15997 || ! IS_INTERRUPT (func_type)))
15999 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16000 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16007 /* Generate the load multiple instruction to restore the
16008 registers. Note we can get here, even if
16009 frame_pointer_needed is true, but only if sp already
16010 points to the base of the saved core registers. */
16011 if (live_regs_mask & (1 << SP_REGNUM))
16013 unsigned HOST_WIDE_INT stack_adjust;
16015 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16016 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16018 if (stack_adjust && arm_arch5 && TARGET_ARM)
16019 if (TARGET_UNIFIED_ASM)
16020 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16022 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16025 /* If we can't use ldmib (SA110 bug),
16026 then try to pop r3 instead. */
16028 live_regs_mask |= 1 << 3;
16030 if (TARGET_UNIFIED_ASM)
16031 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16033 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16037 if (TARGET_UNIFIED_ASM)
16038 sprintf (instr, "pop%s\t{", conditional);
16040 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16042 p = instr + strlen (instr);
16044 for (reg = 0; reg <= SP_REGNUM; reg++)
16045 if (live_regs_mask & (1 << reg))
16047 int l = strlen (reg_names[reg]);
16053 memcpy (p, ", ", 2);
16057 memcpy (p, "%|", 2);
16058 memcpy (p + 2, reg_names[reg], l);
16062 if (live_regs_mask & (1 << LR_REGNUM))
16064 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16065 /* If returning from an interrupt, restore the CPSR. */
16066 if (IS_INTERRUPT (func_type))
16073 output_asm_insn (instr, & operand);
16075 /* See if we need to generate an extra instruction to
16076 perform the actual function return. */
16078 && func_type != ARM_FT_INTERWORKED
16079 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16081 /* The return has already been handled
16082 by loading the LR into the PC. */
16089 switch ((int) ARM_FUNC_TYPE (func_type))
16093 /* ??? This is wrong for unified assembly syntax. */
16094 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16097 case ARM_FT_INTERWORKED:
16098 sprintf (instr, "bx%s\t%%|lr", conditional);
16101 case ARM_FT_EXCEPTION:
16102 /* ??? This is wrong for unified assembly syntax. */
16103 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16107 /* Use bx if it's available. */
16108 if (arm_arch5 || arm_arch4t)
16109 sprintf (instr, "bx%s\t%%|lr", conditional);
16111 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16115 output_asm_insn (instr, & operand);
16121 /* Write the function name into the code section, directly preceding
16122 the function prologue.
16124 Code will be output similar to this:
16126 .ascii "arm_poke_function_name", 0
16129 .word 0xff000000 + (t1 - t0)
16130 arm_poke_function_name
16132 stmfd sp!, {fp, ip, lr, pc}
16135 When performing a stack backtrace, code can inspect the value
16136 of 'pc' stored at 'fp' + 0. If the trace function then looks
16137 at location pc - 12 and the top 8 bits are set, then we know
16138 that there is a function name embedded immediately preceding this
16139 location and has length ((pc[-3]) & 0xff000000).
16141 We assume that pc is declared as a pointer to an unsigned long.
16143 It is of no benefit to output the function name if we are assembling
16144 a leaf function. These function types will not contain a stack
16145 backtrace structure, therefore it is not possible to determine the
16148 arm_poke_function_name (FILE *stream, const char *name)
16150 unsigned long alignlength;
16151 unsigned long length;
16154 length = strlen (name) + 1;
16155 alignlength = ROUND_UP_WORD (length);
16157 ASM_OUTPUT_ASCII (stream, name, length);
16158 ASM_OUTPUT_ALIGN (stream, 2);
16159 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16160 assemble_aligned_integer (UNITS_PER_WORD, x);
16163 /* Place some comments into the assembler stream
16164 describing the current function. */
16166 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16168 unsigned long func_type;
16170 /* ??? Do we want to print some of the below anyway? */
16174 /* Sanity check. */
16175 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16177 func_type = arm_current_func_type ();
16179 switch ((int) ARM_FUNC_TYPE (func_type))
16182 case ARM_FT_NORMAL:
16184 case ARM_FT_INTERWORKED:
16185 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16188 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16191 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16193 case ARM_FT_EXCEPTION:
16194 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16198 if (IS_NAKED (func_type))
16199 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16201 if (IS_VOLATILE (func_type))
16202 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16204 if (IS_NESTED (func_type))
16205 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16206 if (IS_STACKALIGN (func_type))
16207 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16209 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16211 crtl->args.pretend_args_size, frame_size);
16213 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16214 frame_pointer_needed,
16215 cfun->machine->uses_anonymous_args);
16217 if (cfun->machine->lr_save_eliminated)
16218 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16220 if (crtl->calls_eh_return)
16221 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16226 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16227 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16229 arm_stack_offsets *offsets;
16235 /* Emit any call-via-reg trampolines that are needed for v4t support
16236 of call_reg and call_value_reg type insns. */
16237 for (regno = 0; regno < LR_REGNUM; regno++)
16239 rtx label = cfun->machine->call_via[regno];
16243 switch_to_section (function_section (current_function_decl));
16244 targetm.asm_out.internal_label (asm_out_file, "L",
16245 CODE_LABEL_NUMBER (label));
16246 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16250 /* ??? Probably not safe to set this here, since it assumes that a
16251 function will be emitted as assembly immediately after we generate
16252 RTL for it. This does not happen for inline functions. */
16253 cfun->machine->return_used_this_function = 0;
16255 else /* TARGET_32BIT */
16257 /* We need to take into account any stack-frame rounding. */
16258 offsets = arm_get_frame_offsets ();
16260 gcc_assert (!use_return_insn (FALSE, NULL)
16261 || (cfun->machine->return_used_this_function != 0)
16262 || offsets->saved_regs == offsets->outgoing_args
16263 || frame_pointer_needed);
16265 /* Reset the ARM-specific per-function variables. */
16266 after_arm_reorg = 0;
16270 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
16271 number of registers are being pushed, multiple STRD patterns are created for
16272 all register pairs. If odd number of registers are pushed, emit a
16273 combination of STRDs and STR for the prologue saves. */
16275 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16279 rtx par = NULL_RTX;
16280 rtx insn = NULL_RTX;
16281 rtx dwarf = NULL_RTX;
16282 rtx tmp, reg, tmp1;
16284 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16285 if (saved_regs_mask & (1 << i))
16288 gcc_assert (num_regs && num_regs <= 16);
16290 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
16291 registers to push. */
16292 tmp = gen_rtx_SET (VOIDmode,
16294 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16295 RTX_FRAME_RELATED_P (tmp) = 1;
16296 insn = emit_insn (tmp);
16298 /* Create sequence for DWARF info. */
16299 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16301 /* RTLs cannot be shared, hence create new copy for dwarf. */
16302 tmp1 = gen_rtx_SET (VOIDmode,
16304 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16305 RTX_FRAME_RELATED_P (tmp1) = 1;
16306 XVECEXP (dwarf, 0, 0) = tmp1;
16308 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16309 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16311 /* Var j iterates over all the registers to gather all the registers in
16312 saved_regs_mask. Var i gives index of register R_j in stack frame.
16313 A PARALLEL RTX of register-pair is created here, so that pattern for
16314 STRD can be matched. If num_regs is odd, 1st register will be pushed
16315 using STR and remaining registers will be pushed with STRD in pairs.
16316 If num_regs is even, all registers are pushed with STRD in pairs.
16317 Hence, skip first element for odd num_regs. */
16318 for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
16319 if (saved_regs_mask & (1 << j))
16321 /* Create RTX for store. New RTX is created for dwarf as
16322 they are not sharable. */
16323 reg = gen_rtx_REG (SImode, j);
16324 tmp = gen_rtx_SET (SImode,
16327 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16330 tmp1 = gen_rtx_SET (SImode,
16333 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16335 RTX_FRAME_RELATED_P (tmp) = 1;
16336 RTX_FRAME_RELATED_P (tmp1) = 1;
16338 if (((i - (num_regs % 2)) % 2) == 1)
16339 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
16340 be created. Hence create it first. The STRD pattern we are
16342 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
16343 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
16344 where the target registers need not be consecutive. */
16345 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16347 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
16348 even, the reg_j is added as 0th element and if it is odd, reg_i is
16349 added as 1st element of STRD pattern shown above. */
16350 XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
16351 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16353 if (((i - (num_regs % 2)) % 2) == 0)
16354 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
16355 to be loaded are generated in above given STRD pattern, and the
16356 pattern can be emitted now. */
16362 if ((num_regs % 2) == 1)
16364 /* If odd number of registers are pushed, generate STR pattern to store
16366 for (; (saved_regs_mask & (1 << j)) == 0; j--);
16368 tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
16369 stack_pointer_rtx, 4 * i));
16370 reg = gen_rtx_REG (SImode, j);
16371 tmp = gen_rtx_SET (SImode, tmp1, reg);
16372 RTX_FRAME_RELATED_P (tmp) = 1;
16376 tmp1 = gen_rtx_SET (SImode,
16379 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16381 RTX_FRAME_RELATED_P (tmp1) = 1;
16382 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16385 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16386 RTX_FRAME_RELATED_P (insn) = 1;
16390 /* Generate and emit an insn that we will recognize as a push_multi.
16391 Unfortunately, since this insn does not reflect very well the actual
16392 semantics of the operation, we need to annotate the insn for the benefit
16393 of DWARF2 frame unwind information. */
16395 emit_multi_reg_push (unsigned long mask)
16398 int num_dwarf_regs;
16402 int dwarf_par_index;
16405 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16406 if (mask & (1 << i))
16409 gcc_assert (num_regs && num_regs <= 16);
16411 /* We don't record the PC in the dwarf frame information. */
16412 num_dwarf_regs = num_regs;
16413 if (mask & (1 << PC_REGNUM))
16416 /* For the body of the insn we are going to generate an UNSPEC in
16417 parallel with several USEs. This allows the insn to be recognized
16418 by the push_multi pattern in the arm.md file.
16420 The body of the insn looks something like this:
16423 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16424 (const_int:SI <num>)))
16425 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16431 For the frame note however, we try to be more explicit and actually
16432 show each register being stored into the stack frame, plus a (single)
16433 decrement of the stack pointer. We do it this way in order to be
16434 friendly to the stack unwinding code, which only wants to see a single
16435 stack decrement per instruction. The RTL we generate for the note looks
16436 something like this:
16439 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16440 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16441 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16442 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16446 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16447 instead we'd have a parallel expression detailing all
16448 the stores to the various memory addresses so that debug
16449 information is more up-to-date. Remember however while writing
16450 this to take care of the constraints with the push instruction.
16452 Note also that this has to be taken care of for the VFP registers.
16454 For more see PR43399. */
16456 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16457 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16458 dwarf_par_index = 1;
16460 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16462 if (mask & (1 << i))
16464 reg = gen_rtx_REG (SImode, i);
16466 XVECEXP (par, 0, 0)
16467 = gen_rtx_SET (VOIDmode,
16470 gen_rtx_PRE_MODIFY (Pmode,
16473 (Pmode, stack_pointer_rtx,
16476 gen_rtx_UNSPEC (BLKmode,
16477 gen_rtvec (1, reg),
16478 UNSPEC_PUSH_MULT));
16480 if (i != PC_REGNUM)
16482 tmp = gen_rtx_SET (VOIDmode,
16483 gen_frame_mem (SImode, stack_pointer_rtx),
16485 RTX_FRAME_RELATED_P (tmp) = 1;
16486 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16494 for (j = 1, i++; j < num_regs; i++)
16496 if (mask & (1 << i))
16498 reg = gen_rtx_REG (SImode, i);
16500 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16502 if (i != PC_REGNUM)
16505 = gen_rtx_SET (VOIDmode,
16508 plus_constant (Pmode, stack_pointer_rtx,
16511 RTX_FRAME_RELATED_P (tmp) = 1;
16512 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16519 par = emit_insn (par);
16521 tmp = gen_rtx_SET (VOIDmode,
16523 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16524 RTX_FRAME_RELATED_P (tmp) = 1;
16525 XVECEXP (dwarf, 0, 0) = tmp;
16527 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16532 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
16533 SAVED_REGS_MASK shows which registers need to be restored.
16535 Unfortunately, since this insn does not reflect very well the actual
16536 semantics of the operation, we need to annotate the insn for the benefit
16537 of DWARF2 frame unwind information. */
16539 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
16544 rtx dwarf = NULL_RTX;
16550 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16551 offset_adj = return_in_pc ? 1 : 0;
16552 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16553 if (saved_regs_mask & (1 << i))
16556 gcc_assert (num_regs && num_regs <= 16);
16558 /* If SP is in reglist, then we don't emit SP update insn. */
16559 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
16561 /* The parallel needs to hold num_regs SETs
16562 and one SET for the stack update. */
16563 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
16568 XVECEXP (par, 0, 0) = tmp;
16573 /* Increment the stack pointer, based on there being
16574 num_regs 4-byte registers to restore. */
16575 tmp = gen_rtx_SET (VOIDmode,
16577 plus_constant (Pmode,
16580 RTX_FRAME_RELATED_P (tmp) = 1;
16581 XVECEXP (par, 0, offset_adj) = tmp;
16584 /* Now restore every reg, which may include PC. */
16585 for (j = 0, i = 0; j < num_regs; i++)
16586 if (saved_regs_mask & (1 << i))
16588 reg = gen_rtx_REG (SImode, i);
16589 tmp = gen_rtx_SET (VOIDmode,
16593 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
16594 RTX_FRAME_RELATED_P (tmp) = 1;
16595 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
16597 /* We need to maintain a sequence for DWARF info too. As dwarf info
16598 should not have PC, skip PC. */
16599 if (i != PC_REGNUM)
16600 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16606 par = emit_jump_insn (par);
16608 par = emit_insn (par);
16610 REG_NOTES (par) = dwarf;
16613 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16614 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16616 Unfortunately, since this insn does not reflect very well the actual
16617 semantics of the operation, we need to annotate the insn for the benefit
16618 of DWARF2 frame unwind information. */
16620 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16624 rtx dwarf = NULL_RTX;
16627 gcc_assert (num_regs && num_regs <= 32);
16629 /* Workaround ARM10 VFPr1 bug. */
16630 if (num_regs == 2 && !arm_arch6)
16632 if (first_reg == 15)
16638 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16639 there could be up to 32 D-registers to restore.
16640 If there are more than 16 D-registers, make two recursive calls,
16641 each of which emits one pop_multi instruction. */
16644 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16645 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16649 /* The parallel needs to hold num_regs SETs
16650 and one SET for the stack update. */
16651 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16653 /* Increment the stack pointer, based on there being
16654 num_regs 8-byte registers to restore. */
16655 tmp = gen_rtx_SET (VOIDmode,
16657 plus_constant (Pmode, base_reg, 8 * num_regs));
16658 RTX_FRAME_RELATED_P (tmp) = 1;
16659 XVECEXP (par, 0, 0) = tmp;
16661 /* Now show every reg that will be restored, using a SET for each. */
16662 for (j = 0, i=first_reg; j < num_regs; i += 2)
16664 reg = gen_rtx_REG (DFmode, i);
16666 tmp = gen_rtx_SET (VOIDmode,
16670 plus_constant (Pmode, base_reg, 8 * j)));
16671 RTX_FRAME_RELATED_P (tmp) = 1;
16672 XVECEXP (par, 0, j + 1) = tmp;
16674 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16679 par = emit_insn (par);
16680 REG_NOTES (par) = dwarf;
16683 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
16684 number of registers are being popped, multiple LDRD patterns are created for
16685 all register pairs. If odd number of registers are popped, last register is
16686 loaded by using LDR pattern. */
16688 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
16692 rtx par = NULL_RTX;
16693 rtx dwarf = NULL_RTX;
16694 rtx tmp, reg, tmp1;
16697 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16698 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16699 if (saved_regs_mask & (1 << i))
16702 gcc_assert (num_regs && num_regs <= 16);
16704 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
16705 to be popped. So, if num_regs is even, now it will become odd,
16706 and we can generate pop with PC. If num_regs is odd, it will be
16707 even now, and ldr with return can be generated for PC. */
16711 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16713 /* Var j iterates over all the registers to gather all the registers in
16714 saved_regs_mask. Var i gives index of saved registers in stack frame.
16715 A PARALLEL RTX of register-pair is created here, so that pattern for
16716 LDRD can be matched. As PC is always last register to be popped, and
16717 we have already decremented num_regs if PC, we don't have to worry
16718 about PC in this loop. */
16719 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
16720 if (saved_regs_mask & (1 << j))
16722 /* Create RTX for memory load. */
16723 reg = gen_rtx_REG (SImode, j);
16724 tmp = gen_rtx_SET (SImode,
16726 gen_frame_mem (SImode,
16727 plus_constant (Pmode,
16728 stack_pointer_rtx, 4 * i)));
16729 RTX_FRAME_RELATED_P (tmp) = 1;
16733 /* When saved-register index (i) is even, the RTX to be emitted is
16734 yet to be created. Hence create it first. The LDRD pattern we
16735 are generating is :
16736 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
16737 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
16738 where target registers need not be consecutive. */
16739 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16743 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
16744 added as 0th element and if i is odd, reg_i is added as 1st element
16745 of LDRD pattern shown above. */
16746 XVECEXP (par, 0, (i % 2)) = tmp;
16747 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16751 /* When saved-register index (i) is odd, RTXs for both the registers
16752 to be loaded are generated in above given LDRD pattern, and the
16753 pattern can be emitted now. */
16754 par = emit_insn (par);
16755 REG_NOTES (par) = dwarf;
16761 /* If the number of registers pushed is odd AND return_in_pc is false OR
16762 number of registers are even AND return_in_pc is true, last register is
16763 popped using LDR. It can be PC as well. Hence, adjust the stack first and
16764 then LDR with post increment. */
16766 /* Increment the stack pointer, based on there being
16767 num_regs 4-byte registers to restore. */
16768 tmp = gen_rtx_SET (VOIDmode,
16770 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
16771 RTX_FRAME_RELATED_P (tmp) = 1;
16776 if (((num_regs % 2) == 1 && !return_in_pc)
16777 || ((num_regs % 2) == 0 && return_in_pc))
16779 /* Scan for the single register to be popped. Skip until the saved
16780 register is found. */
16781 for (; (saved_regs_mask & (1 << j)) == 0; j++);
16783 /* Gen LDR with post increment here. */
16784 tmp1 = gen_rtx_MEM (SImode,
16785 gen_rtx_POST_INC (SImode,
16786 stack_pointer_rtx));
16787 set_mem_alias_set (tmp1, get_frame_alias_set ());
16789 reg = gen_rtx_REG (SImode, j);
16790 tmp = gen_rtx_SET (SImode, reg, tmp1);
16791 RTX_FRAME_RELATED_P (tmp) = 1;
16792 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16796 /* If return_in_pc, j must be PC_REGNUM. */
16797 gcc_assert (j == PC_REGNUM);
16798 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16799 XVECEXP (par, 0, 0) = ret_rtx;
16800 XVECEXP (par, 0, 1) = tmp;
16801 par = emit_jump_insn (par);
16805 par = emit_insn (tmp);
16808 REG_NOTES (par) = dwarf;
16810 else if ((num_regs % 2) == 1 && return_in_pc)
16812 /* There are 2 registers to be popped. So, generate the pattern
16813 pop_multiple_with_stack_update_and_return to pop in PC. */
16814 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
16820 /* Calculate the size of the return value that is passed in registers. */
16822 arm_size_return_regs (void)
16824 enum machine_mode mode;
16826 if (crtl->return_rtx != 0)
16827 mode = GET_MODE (crtl->return_rtx);
16829 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16831 return GET_MODE_SIZE (mode);
16834 /* Return true if the current function needs to save/restore LR. */
16836 thumb_force_lr_save (void)
16838 return !cfun->machine->lr_save_eliminated
16839 && (!leaf_function_p ()
16840 || thumb_far_jump_used_p ()
16841 || df_regs_ever_live_p (LR_REGNUM));
16845 /* Return true if r3 is used by any of the tail call insns in the
16846 current function. */
16848 any_sibcall_uses_r3 (void)
16853 if (!crtl->tail_call_emit)
16855 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16856 if (e->flags & EDGE_SIBCALL)
16858 rtx call = BB_END (e->src);
16859 if (!CALL_P (call))
16860 call = prev_nonnote_nondebug_insn (call);
16861 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16862 if (find_regno_fusage (call, USE, 3))
16869 /* Compute the distance from register FROM to register TO.
16870 These can be the arg pointer (26), the soft frame pointer (25),
16871 the stack pointer (13) or the hard frame pointer (11).
16872 In thumb mode r7 is used as the soft frame pointer, if needed.
16873 Typical stack layout looks like this:
16875 old stack pointer -> | |
16878 | | saved arguments for
16879 | | vararg functions
16882 hard FP & arg pointer -> | | \
16890 soft frame pointer -> | | /
16895 locals base pointer -> | | /
16900 current stack pointer -> | | /
16903 For a given function some or all of these stack components
16904 may not be needed, giving rise to the possibility of
16905 eliminating some of the registers.
16907 The values returned by this function must reflect the behavior
16908 of arm_expand_prologue() and arm_compute_save_reg_mask().
16910 The sign of the number returned reflects the direction of stack
16911 growth, so the values are positive for all eliminations except
16912 from the soft frame pointer to the hard frame pointer.
16914 SFP may point just inside the local variables block to ensure correct
16918 /* Calculate stack offsets. These are used to calculate register elimination
16919 offsets and in prologue/epilogue code. Also calculates which registers
16920 should be saved. */
16922 static arm_stack_offsets *
16923 arm_get_frame_offsets (void)
16925 struct arm_stack_offsets *offsets;
16926 unsigned long func_type;
16930 HOST_WIDE_INT frame_size;
16933 offsets = &cfun->machine->stack_offsets;
16935 /* We need to know if we are a leaf function. Unfortunately, it
16936 is possible to be called after start_sequence has been called,
16937 which causes get_insns to return the insns for the sequence,
16938 not the function, which will cause leaf_function_p to return
16939 the incorrect result.
16941 to know about leaf functions once reload has completed, and the
16942 frame size cannot be changed after that time, so we can safely
16943 use the cached value. */
16945 if (reload_completed)
16948 /* Initially this is the size of the local variables. It will translated
16949 into an offset once we have determined the size of preceding data. */
16950 frame_size = ROUND_UP_WORD (get_frame_size ());
16952 leaf = leaf_function_p ();
16954 /* Space for variadic functions. */
16955 offsets->saved_args = crtl->args.pretend_args_size;
16957 /* In Thumb mode this is incorrect, but never used. */
16958 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16959 arm_compute_static_chain_stack_bytes();
16963 unsigned int regno;
16965 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16966 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16967 saved = core_saved;
16969 /* We know that SP will be doubleword aligned on entry, and we must
16970 preserve that condition at any subroutine call. We also require the
16971 soft frame pointer to be doubleword aligned. */
16973 if (TARGET_REALLY_IWMMXT)
16975 /* Check for the call-saved iWMMXt registers. */
16976 for (regno = FIRST_IWMMXT_REGNUM;
16977 regno <= LAST_IWMMXT_REGNUM;
16979 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16983 func_type = arm_current_func_type ();
16984 /* Space for saved VFP registers. */
16985 if (! IS_VOLATILE (func_type)
16986 && TARGET_HARD_FLOAT && TARGET_VFP)
16987 saved += arm_get_vfp_saved_size ();
16989 else /* TARGET_THUMB1 */
16991 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16992 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16993 saved = core_saved;
16994 if (TARGET_BACKTRACE)
16998 /* Saved registers include the stack frame. */
16999 offsets->saved_regs = offsets->saved_args + saved +
17000 arm_compute_static_chain_stack_bytes();
17001 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17002 /* A leaf function does not need any stack alignment if it has nothing
17004 if (leaf && frame_size == 0
17005 /* However if it calls alloca(), we have a dynamically allocated
17006 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17007 && ! cfun->calls_alloca)
17009 offsets->outgoing_args = offsets->soft_frame;
17010 offsets->locals_base = offsets->soft_frame;
17014 /* Ensure SFP has the correct alignment. */
17015 if (ARM_DOUBLEWORD_ALIGN
17016 && (offsets->soft_frame & 7))
17018 offsets->soft_frame += 4;
17019 /* Try to align stack by pushing an extra reg. Don't bother doing this
17020 when there is a stack frame as the alignment will be rolled into
17021 the normal stack adjustment. */
17022 if (frame_size + crtl->outgoing_args_size == 0)
17026 /* If it is safe to use r3, then do so. This sometimes
17027 generates better code on Thumb-2 by avoiding the need to
17028 use 32-bit push/pop instructions. */
17029 if (! any_sibcall_uses_r3 ()
17030 && arm_size_return_regs () <= 12
17031 && (offsets->saved_regs_mask & (1 << 3)) == 0)
17036 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17038 /* Avoid fixed registers; they may be changed at
17039 arbitrary times so it's unsafe to restore them
17040 during the epilogue. */
17042 && (offsets->saved_regs_mask & (1 << i)) == 0)
17051 offsets->saved_regs += 4;
17052 offsets->saved_regs_mask |= (1 << reg);
17057 offsets->locals_base = offsets->soft_frame + frame_size;
17058 offsets->outgoing_args = (offsets->locals_base
17059 + crtl->outgoing_args_size);
17061 if (ARM_DOUBLEWORD_ALIGN)
17063 /* Ensure SP remains doubleword aligned. */
17064 if (offsets->outgoing_args & 7)
17065 offsets->outgoing_args += 4;
17066 gcc_assert (!(offsets->outgoing_args & 7));
17073 /* Calculate the relative offsets for the different stack pointers. Positive
17074 offsets are in the direction of stack growth. */
17077 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17079 arm_stack_offsets *offsets;
17081 offsets = arm_get_frame_offsets ();
17083 /* OK, now we have enough information to compute the distances.
17084 There must be an entry in these switch tables for each pair
17085 of registers in ELIMINABLE_REGS, even if some of the entries
17086 seem to be redundant or useless. */
17089 case ARG_POINTER_REGNUM:
17092 case THUMB_HARD_FRAME_POINTER_REGNUM:
17095 case FRAME_POINTER_REGNUM:
17096 /* This is the reverse of the soft frame pointer
17097 to hard frame pointer elimination below. */
17098 return offsets->soft_frame - offsets->saved_args;
17100 case ARM_HARD_FRAME_POINTER_REGNUM:
17101 /* This is only non-zero in the case where the static chain register
17102 is stored above the frame. */
17103 return offsets->frame - offsets->saved_args - 4;
17105 case STACK_POINTER_REGNUM:
17106 /* If nothing has been pushed on the stack at all
17107 then this will return -4. This *is* correct! */
17108 return offsets->outgoing_args - (offsets->saved_args + 4);
17111 gcc_unreachable ();
17113 gcc_unreachable ();
17115 case FRAME_POINTER_REGNUM:
17118 case THUMB_HARD_FRAME_POINTER_REGNUM:
17121 case ARM_HARD_FRAME_POINTER_REGNUM:
17122 /* The hard frame pointer points to the top entry in the
17123 stack frame. The soft frame pointer to the bottom entry
17124 in the stack frame. If there is no stack frame at all,
17125 then they are identical. */
17127 return offsets->frame - offsets->soft_frame;
17129 case STACK_POINTER_REGNUM:
17130 return offsets->outgoing_args - offsets->soft_frame;
17133 gcc_unreachable ();
17135 gcc_unreachable ();
17138 /* You cannot eliminate from the stack pointer.
17139 In theory you could eliminate from the hard frame
17140 pointer to the stack pointer, but this will never
17141 happen, since if a stack frame is not needed the
17142 hard frame pointer will never be used. */
17143 gcc_unreachable ();
17147 /* Given FROM and TO register numbers, say whether this elimination is
17148 allowed. Frame pointer elimination is automatically handled.
17150 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
17151 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
17152 pointer, we must eliminate FRAME_POINTER_REGNUM into
17153 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
17154 ARG_POINTER_REGNUM. */
17157 arm_can_eliminate (const int from, const int to)
17159 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
17160 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
17161 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
17162 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
17166 /* Emit RTL to save coprocessor registers on function entry. Returns the
17167 number of bytes pushed. */
17170 arm_save_coproc_regs(void)
17172 int saved_size = 0;
17174 unsigned start_reg;
17177 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
17178 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
17180 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
17181 insn = gen_rtx_MEM (V2SImode, insn);
17182 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
17183 RTX_FRAME_RELATED_P (insn) = 1;
17187 if (TARGET_HARD_FLOAT && TARGET_VFP)
17189 start_reg = FIRST_VFP_REGNUM;
17191 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
17193 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
17194 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
17196 if (start_reg != reg)
17197 saved_size += vfp_emit_fstmd (start_reg,
17198 (reg - start_reg) / 2);
17199 start_reg = reg + 2;
17202 if (start_reg != reg)
17203 saved_size += vfp_emit_fstmd (start_reg,
17204 (reg - start_reg) / 2);
17210 /* Set the Thumb frame pointer from the stack pointer. */
17213 thumb_set_frame_pointer (arm_stack_offsets *offsets)
17215 HOST_WIDE_INT amount;
17218 amount = offsets->outgoing_args - offsets->locals_base;
17220 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17221 stack_pointer_rtx, GEN_INT (amount)));
17224 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
17225 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
17226 expects the first two operands to be the same. */
17229 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17231 hard_frame_pointer_rtx));
17235 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17236 hard_frame_pointer_rtx,
17237 stack_pointer_rtx));
17239 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
17240 plus_constant (Pmode, stack_pointer_rtx, amount));
17241 RTX_FRAME_RELATED_P (dwarf) = 1;
17242 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17245 RTX_FRAME_RELATED_P (insn) = 1;
17248 /* Generate the prologue instructions for entry into an ARM or Thumb-2
17251 arm_expand_prologue (void)
17256 unsigned long live_regs_mask;
17257 unsigned long func_type;
17259 int saved_pretend_args = 0;
17260 int saved_regs = 0;
17261 unsigned HOST_WIDE_INT args_to_push;
17262 arm_stack_offsets *offsets;
17264 func_type = arm_current_func_type ();
17266 /* Naked functions don't have prologues. */
17267 if (IS_NAKED (func_type))
17270 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
17271 args_to_push = crtl->args.pretend_args_size;
17273 /* Compute which register we will have to save onto the stack. */
17274 offsets = arm_get_frame_offsets ();
17275 live_regs_mask = offsets->saved_regs_mask;
17277 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
17279 if (IS_STACKALIGN (func_type))
17283 /* Handle a word-aligned stack pointer. We generate the following:
17288 <save and restore r0 in normal prologue/epilogue>
17292 The unwinder doesn't need to know about the stack realignment.
17293 Just tell it we saved SP in r0. */
17294 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
17296 r0 = gen_rtx_REG (SImode, 0);
17297 r1 = gen_rtx_REG (SImode, 1);
17299 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
17300 RTX_FRAME_RELATED_P (insn) = 1;
17301 add_reg_note (insn, REG_CFA_REGISTER, NULL);
17303 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
17305 /* ??? The CFA changes here, which may cause GDB to conclude that it
17306 has entered a different function. That said, the unwind info is
17307 correct, individually, before and after this instruction because
17308 we've described the save of SP, which will override the default
17309 handling of SP as restoring from the CFA. */
17310 emit_insn (gen_movsi (stack_pointer_rtx, r1));
17313 /* For APCS frames, if IP register is clobbered
17314 when creating frame, save that register in a special
17316 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
17318 if (IS_INTERRUPT (func_type))
17320 /* Interrupt functions must not corrupt any registers.
17321 Creating a frame pointer however, corrupts the IP
17322 register, so we must push it first. */
17323 emit_multi_reg_push (1 << IP_REGNUM);
17325 /* Do not set RTX_FRAME_RELATED_P on this insn.
17326 The dwarf stack unwinding code only wants to see one
17327 stack decrement per function, and this is not it. If
17328 this instruction is labeled as being part of the frame
17329 creation sequence then dwarf2out_frame_debug_expr will
17330 die when it encounters the assignment of IP to FP
17331 later on, since the use of SP here establishes SP as
17332 the CFA register and not IP.
17334 Anyway this instruction is not really part of the stack
17335 frame creation although it is part of the prologue. */
17337 else if (IS_NESTED (func_type))
17339 /* The Static chain register is the same as the IP register
17340 used as a scratch register during stack frame creation.
17341 To get around this need to find somewhere to store IP
17342 whilst the frame is being created. We try the following
17345 1. The last argument register.
17346 2. A slot on the stack above the frame. (This only
17347 works if the function is not a varargs function).
17348 3. Register r3, after pushing the argument registers
17351 Note - we only need to tell the dwarf2 backend about the SP
17352 adjustment in the second variant; the static chain register
17353 doesn't need to be unwound, as it doesn't contain a value
17354 inherited from the caller. */
17356 if (df_regs_ever_live_p (3) == false)
17357 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17358 else if (args_to_push == 0)
17362 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
17365 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
17366 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
17369 /* Just tell the dwarf backend that we adjusted SP. */
17370 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17371 plus_constant (Pmode, stack_pointer_rtx,
17373 RTX_FRAME_RELATED_P (insn) = 1;
17374 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17378 /* Store the args on the stack. */
17379 if (cfun->machine->uses_anonymous_args)
17380 insn = emit_multi_reg_push
17381 ((0xf0 >> (args_to_push / 4)) & 0xf);
17384 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17385 GEN_INT (- args_to_push)));
17387 RTX_FRAME_RELATED_P (insn) = 1;
17389 saved_pretend_args = 1;
17390 fp_offset = args_to_push;
17393 /* Now reuse r3 to preserve IP. */
17394 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17398 insn = emit_set_insn (ip_rtx,
17399 plus_constant (Pmode, stack_pointer_rtx,
17401 RTX_FRAME_RELATED_P (insn) = 1;
17406 /* Push the argument registers, or reserve space for them. */
17407 if (cfun->machine->uses_anonymous_args)
17408 insn = emit_multi_reg_push
17409 ((0xf0 >> (args_to_push / 4)) & 0xf);
17412 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17413 GEN_INT (- args_to_push)));
17414 RTX_FRAME_RELATED_P (insn) = 1;
17417 /* If this is an interrupt service routine, and the link register
17418 is going to be pushed, and we're not generating extra
17419 push of IP (needed when frame is needed and frame layout if apcs),
17420 subtracting four from LR now will mean that the function return
17421 can be done with a single instruction. */
17422 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
17423 && (live_regs_mask & (1 << LR_REGNUM)) != 0
17424 && !(frame_pointer_needed && TARGET_APCS_FRAME)
17427 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
17429 emit_set_insn (lr, plus_constant (SImode, lr, -4));
17432 if (live_regs_mask)
17434 saved_regs += bit_count (live_regs_mask) * 4;
17435 if (optimize_size && !frame_pointer_needed
17436 && saved_regs == offsets->saved_regs - offsets->saved_args)
17438 /* If no coprocessor registers are being pushed and we don't have
17439 to worry about a frame pointer then push extra registers to
17440 create the stack frame. This is done is a way that does not
17441 alter the frame layout, so is independent of the epilogue. */
17445 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17447 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17448 if (frame && n * 4 >= frame)
17451 live_regs_mask |= (1 << n) - 1;
17452 saved_regs += frame;
17456 if (current_tune->prefer_ldrd_strd
17457 && !optimize_function_for_size_p (cfun))
17461 thumb2_emit_strd_push (live_regs_mask);
17465 insn = emit_multi_reg_push (live_regs_mask);
17466 RTX_FRAME_RELATED_P (insn) = 1;
17471 insn = emit_multi_reg_push (live_regs_mask);
17472 RTX_FRAME_RELATED_P (insn) = 1;
17476 if (! IS_VOLATILE (func_type))
17477 saved_regs += arm_save_coproc_regs ();
17479 if (frame_pointer_needed && TARGET_ARM)
17481 /* Create the new frame pointer. */
17482 if (TARGET_APCS_FRAME)
17484 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17485 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17486 RTX_FRAME_RELATED_P (insn) = 1;
17488 if (IS_NESTED (func_type))
17490 /* Recover the static chain register. */
17491 if (!df_regs_ever_live_p (3)
17492 || saved_pretend_args)
17493 insn = gen_rtx_REG (SImode, 3);
17494 else /* if (crtl->args.pretend_args_size == 0) */
17496 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
17497 insn = gen_frame_mem (SImode, insn);
17499 emit_set_insn (ip_rtx, insn);
17500 /* Add a USE to stop propagate_one_insn() from barfing. */
17501 emit_insn (gen_force_register_use (ip_rtx));
17506 insn = GEN_INT (saved_regs - 4);
17507 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17508 stack_pointer_rtx, insn));
17509 RTX_FRAME_RELATED_P (insn) = 1;
17513 if (flag_stack_usage_info)
17514 current_function_static_stack_size
17515 = offsets->outgoing_args - offsets->saved_args;
17517 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17519 /* This add can produce multiple insns for a large constant, so we
17520 need to get tricky. */
17521 rtx last = get_last_insn ();
17523 amount = GEN_INT (offsets->saved_args + saved_regs
17524 - offsets->outgoing_args);
17526 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17530 last = last ? NEXT_INSN (last) : get_insns ();
17531 RTX_FRAME_RELATED_P (last) = 1;
17533 while (last != insn);
17535 /* If the frame pointer is needed, emit a special barrier that
17536 will prevent the scheduler from moving stores to the frame
17537 before the stack adjustment. */
17538 if (frame_pointer_needed)
17539 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17540 hard_frame_pointer_rtx));
17544 if (frame_pointer_needed && TARGET_THUMB2)
17545 thumb_set_frame_pointer (offsets);
17547 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17549 unsigned long mask;
17551 mask = live_regs_mask;
17552 mask &= THUMB2_WORK_REGS;
17553 if (!IS_NESTED (func_type))
17554 mask |= (1 << IP_REGNUM);
17555 arm_load_pic_register (mask);
17558 /* If we are profiling, make sure no instructions are scheduled before
17559 the call to mcount. Similarly if the user has requested no
17560 scheduling in the prolog. Similarly if we want non-call exceptions
17561 using the EABI unwinder, to prevent faulting instructions from being
17562 swapped with a stack adjustment. */
17563 if (crtl->profile || !TARGET_SCHED_PROLOG
17564 || (arm_except_unwind_info (&global_options) == UI_TARGET
17565 && cfun->can_throw_non_call_exceptions))
17566 emit_insn (gen_blockage ());
17568 /* If the link register is being kept alive, with the return address in it,
17569 then make sure that it does not get reused by the ce2 pass. */
17570 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17571 cfun->machine->lr_save_eliminated = 1;
17574 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17576 arm_print_condition (FILE *stream)
17578 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17580 /* Branch conversion is not implemented for Thumb-2. */
17583 output_operand_lossage ("predicated Thumb instruction");
17586 if (current_insn_predicate != NULL)
17588 output_operand_lossage
17589 ("predicated instruction in conditional sequence");
17593 fputs (arm_condition_codes[arm_current_cc], stream);
17595 else if (current_insn_predicate)
17597 enum arm_cond_code code;
17601 output_operand_lossage ("predicated Thumb instruction");
17605 code = get_arm_condition_code (current_insn_predicate);
17606 fputs (arm_condition_codes[code], stream);
17611 /* If CODE is 'd', then the X is a condition operand and the instruction
17612 should only be executed if the condition is true.
17613 if CODE is 'D', then the X is a condition operand and the instruction
17614 should only be executed if the condition is false: however, if the mode
17615 of the comparison is CCFPEmode, then always execute the instruction -- we
17616 do this because in these circumstances !GE does not necessarily imply LT;
17617 in these cases the instruction pattern will take care to make sure that
17618 an instruction containing %d will follow, thereby undoing the effects of
17619 doing this instruction unconditionally.
17620 If CODE is 'N' then X is a floating point operand that must be negated
17622 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17623 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17625 arm_print_operand (FILE *stream, rtx x, int code)
17630 fputs (ASM_COMMENT_START, stream);
17634 fputs (user_label_prefix, stream);
17638 fputs (REGISTER_PREFIX, stream);
17642 arm_print_condition (stream);
17646 /* Nothing in unified syntax, otherwise the current condition code. */
17647 if (!TARGET_UNIFIED_ASM)
17648 arm_print_condition (stream);
17652 /* The current condition code in unified syntax, otherwise nothing. */
17653 if (TARGET_UNIFIED_ASM)
17654 arm_print_condition (stream);
17658 /* The current condition code for a condition code setting instruction.
17659 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17660 if (TARGET_UNIFIED_ASM)
17662 fputc('s', stream);
17663 arm_print_condition (stream);
17667 arm_print_condition (stream);
17668 fputc('s', stream);
17673 /* If the instruction is conditionally executed then print
17674 the current condition code, otherwise print 's'. */
17675 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17676 if (current_insn_predicate)
17677 arm_print_condition (stream);
17679 fputc('s', stream);
17682 /* %# is a "break" sequence. It doesn't output anything, but is used to
17683 separate e.g. operand numbers from following text, if that text consists
17684 of further digits which we don't want to be part of the operand
17692 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17693 r = real_value_negate (&r);
17694 fprintf (stream, "%s", fp_const_from_val (&r));
17698 /* An integer or symbol address without a preceding # sign. */
17700 switch (GET_CODE (x))
17703 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17707 output_addr_const (stream, x);
17711 if (GET_CODE (XEXP (x, 0)) == PLUS
17712 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17714 output_addr_const (stream, x);
17717 /* Fall through. */
17720 output_operand_lossage ("Unsupported operand for code '%c'", code);
17724 /* An integer that we want to print in HEX. */
17726 switch (GET_CODE (x))
17729 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17733 output_operand_lossage ("Unsupported operand for code '%c'", code);
17738 if (CONST_INT_P (x))
17741 val = ARM_SIGN_EXTEND (~INTVAL (x));
17742 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17746 putc ('~', stream);
17747 output_addr_const (stream, x);
17752 /* The low 16 bits of an immediate constant. */
17753 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17757 fprintf (stream, "%s", arithmetic_instr (x, 1));
17761 fprintf (stream, "%s", arithmetic_instr (x, 0));
17769 shift = shift_op (x, &val);
17773 fprintf (stream, ", %s ", shift);
17775 arm_print_operand (stream, XEXP (x, 1), 0);
17777 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17782 /* An explanation of the 'Q', 'R' and 'H' register operands:
17784 In a pair of registers containing a DI or DF value the 'Q'
17785 operand returns the register number of the register containing
17786 the least significant part of the value. The 'R' operand returns
17787 the register number of the register containing the most
17788 significant part of the value.
17790 The 'H' operand returns the higher of the two register numbers.
17791 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17792 same as the 'Q' operand, since the most significant part of the
17793 value is held in the lower number register. The reverse is true
17794 on systems where WORDS_BIG_ENDIAN is false.
17796 The purpose of these operands is to distinguish between cases
17797 where the endian-ness of the values is important (for example
17798 when they are added together), and cases where the endian-ness
17799 is irrelevant, but the order of register operations is important.
17800 For example when loading a value from memory into a register
17801 pair, the endian-ness does not matter. Provided that the value
17802 from the lower memory address is put into the lower numbered
17803 register, and the value from the higher address is put into the
17804 higher numbered register, the load will work regardless of whether
17805 the value being loaded is big-wordian or little-wordian. The
17806 order of the two register loads can matter however, if the address
17807 of the memory location is actually held in one of the registers
17808 being overwritten by the load.
17810 The 'Q' and 'R' constraints are also available for 64-bit
17813 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17815 rtx part = gen_lowpart (SImode, x);
17816 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17820 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17822 output_operand_lossage ("invalid operand for code '%c'", code);
17826 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17830 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17832 enum machine_mode mode = GET_MODE (x);
17835 if (mode == VOIDmode)
17837 part = gen_highpart_mode (SImode, mode, x);
17838 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17842 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17844 output_operand_lossage ("invalid operand for code '%c'", code);
17848 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17852 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17854 output_operand_lossage ("invalid operand for code '%c'", code);
17858 asm_fprintf (stream, "%r", REGNO (x) + 1);
17862 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17864 output_operand_lossage ("invalid operand for code '%c'", code);
17868 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17872 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17874 output_operand_lossage ("invalid operand for code '%c'", code);
17878 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17882 asm_fprintf (stream, "%r",
17883 REG_P (XEXP (x, 0))
17884 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17888 asm_fprintf (stream, "{%r-%r}",
17890 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17893 /* Like 'M', but writing doubleword vector registers, for use by Neon
17897 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17898 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17900 asm_fprintf (stream, "{d%d}", regno);
17902 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17907 /* CONST_TRUE_RTX means always -- that's the default. */
17908 if (x == const_true_rtx)
17911 if (!COMPARISON_P (x))
17913 output_operand_lossage ("invalid operand for code '%c'", code);
17917 fputs (arm_condition_codes[get_arm_condition_code (x)],
17922 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17923 want to do that. */
17924 if (x == const_true_rtx)
17926 output_operand_lossage ("instruction never executed");
17929 if (!COMPARISON_P (x))
17931 output_operand_lossage ("invalid operand for code '%c'", code);
17935 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17936 (get_arm_condition_code (x))],
17946 /* Former Maverick support, removed after GCC-4.7. */
17947 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17952 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17953 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17954 /* Bad value for wCG register number. */
17956 output_operand_lossage ("invalid operand for code '%c'", code);
17961 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17964 /* Print an iWMMXt control register name. */
17966 if (!CONST_INT_P (x)
17968 || INTVAL (x) >= 16)
17969 /* Bad value for wC register number. */
17971 output_operand_lossage ("invalid operand for code '%c'", code);
17977 static const char * wc_reg_names [16] =
17979 "wCID", "wCon", "wCSSF", "wCASF",
17980 "wC4", "wC5", "wC6", "wC7",
17981 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17982 "wC12", "wC13", "wC14", "wC15"
17985 fputs (wc_reg_names [INTVAL (x)], stream);
17989 /* Print the high single-precision register of a VFP double-precision
17993 int mode = GET_MODE (x);
17996 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
17998 output_operand_lossage ("invalid operand for code '%c'", code);
18003 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18005 output_operand_lossage ("invalid operand for code '%c'", code);
18009 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18013 /* Print a VFP/Neon double precision or quad precision register name. */
18017 int mode = GET_MODE (x);
18018 int is_quad = (code == 'q');
18021 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18023 output_operand_lossage ("invalid operand for code '%c'", code);
18028 || !IS_VFP_REGNUM (REGNO (x)))
18030 output_operand_lossage ("invalid operand for code '%c'", code);
18035 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18036 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18038 output_operand_lossage ("invalid operand for code '%c'", code);
18042 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18043 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18047 /* These two codes print the low/high doubleword register of a Neon quad
18048 register, respectively. For pair-structure types, can also print
18049 low/high quadword registers. */
18053 int mode = GET_MODE (x);
18056 if ((GET_MODE_SIZE (mode) != 16
18057 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18059 output_operand_lossage ("invalid operand for code '%c'", code);
18064 if (!NEON_REGNO_OK_FOR_QUAD (regno))
18066 output_operand_lossage ("invalid operand for code '%c'", code);
18070 if (GET_MODE_SIZE (mode) == 16)
18071 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18072 + (code == 'f' ? 1 : 0));
18074 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
18075 + (code == 'f' ? 1 : 0));
18079 /* Print a VFPv3 floating-point constant, represented as an integer
18083 int index = vfp3_const_double_index (x);
18084 gcc_assert (index != -1);
18085 fprintf (stream, "%d", index);
18089 /* Print bits representing opcode features for Neon.
18091 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
18092 and polynomials as unsigned.
18094 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
18096 Bit 2 is 1 for rounding functions, 0 otherwise. */
18098 /* Identify the type as 's', 'u', 'p' or 'f'. */
18101 HOST_WIDE_INT bits = INTVAL (x);
18102 fputc ("uspf"[bits & 3], stream);
18106 /* Likewise, but signed and unsigned integers are both 'i'. */
18109 HOST_WIDE_INT bits = INTVAL (x);
18110 fputc ("iipf"[bits & 3], stream);
18114 /* As for 'T', but emit 'u' instead of 'p'. */
18117 HOST_WIDE_INT bits = INTVAL (x);
18118 fputc ("usuf"[bits & 3], stream);
18122 /* Bit 2: rounding (vs none). */
18125 HOST_WIDE_INT bits = INTVAL (x);
18126 fputs ((bits & 4) != 0 ? "r" : "", stream);
18130 /* Memory operand for vld1/vst1 instruction. */
18134 bool postinc = FALSE;
18135 unsigned align, memsize, align_bits;
18137 gcc_assert (MEM_P (x));
18138 addr = XEXP (x, 0);
18139 if (GET_CODE (addr) == POST_INC)
18142 addr = XEXP (addr, 0);
18144 asm_fprintf (stream, "[%r", REGNO (addr));
18146 /* We know the alignment of this access, so we can emit a hint in the
18147 instruction (for some alignments) as an aid to the memory subsystem
18149 align = MEM_ALIGN (x) >> 3;
18150 memsize = MEM_SIZE (x);
18152 /* Only certain alignment specifiers are supported by the hardware. */
18153 if (memsize == 32 && (align % 32) == 0)
18155 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
18157 else if (memsize >= 8 && (align % 8) == 0)
18162 if (align_bits != 0)
18163 asm_fprintf (stream, ":%d", align_bits);
18165 asm_fprintf (stream, "]");
18168 fputs("!", stream);
18176 gcc_assert (MEM_P (x));
18177 addr = XEXP (x, 0);
18178 gcc_assert (REG_P (addr));
18179 asm_fprintf (stream, "[%r]", REGNO (addr));
18183 /* Translate an S register number into a D register number and element index. */
18186 int mode = GET_MODE (x);
18189 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
18191 output_operand_lossage ("invalid operand for code '%c'", code);
18196 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18198 output_operand_lossage ("invalid operand for code '%c'", code);
18202 regno = regno - FIRST_VFP_REGNUM;
18203 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
18208 gcc_assert (CONST_DOUBLE_P (x));
18209 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
18212 /* Register specifier for vld1.16/vst1.16. Translate the S register
18213 number into a D register number and element index. */
18216 int mode = GET_MODE (x);
18219 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
18221 output_operand_lossage ("invalid operand for code '%c'", code);
18226 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18228 output_operand_lossage ("invalid operand for code '%c'", code);
18232 regno = regno - FIRST_VFP_REGNUM;
18233 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
18240 output_operand_lossage ("missing operand");
18244 switch (GET_CODE (x))
18247 asm_fprintf (stream, "%r", REGNO (x));
18251 output_memory_reference_mode = GET_MODE (x);
18252 output_address (XEXP (x, 0));
18259 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
18260 sizeof (fpstr), 0, 1);
18261 fprintf (stream, "#%s", fpstr);
18264 fprintf (stream, "#%s", fp_immediate_constant (x));
18268 gcc_assert (GET_CODE (x) != NEG);
18269 fputc ('#', stream);
18270 if (GET_CODE (x) == HIGH)
18272 fputs (":lower16:", stream);
18276 output_addr_const (stream, x);
18282 /* Target hook for printing a memory address. */
18284 arm_print_operand_address (FILE *stream, rtx x)
18288 int is_minus = GET_CODE (x) == MINUS;
18291 asm_fprintf (stream, "[%r]", REGNO (x));
18292 else if (GET_CODE (x) == PLUS || is_minus)
18294 rtx base = XEXP (x, 0);
18295 rtx index = XEXP (x, 1);
18296 HOST_WIDE_INT offset = 0;
18298 || (REG_P (index) && REGNO (index) == SP_REGNUM))
18300 /* Ensure that BASE is a register. */
18301 /* (one of them must be). */
18302 /* Also ensure the SP is not used as in index register. */
18307 switch (GET_CODE (index))
18310 offset = INTVAL (index);
18313 asm_fprintf (stream, "[%r, #%wd]",
18314 REGNO (base), offset);
18318 asm_fprintf (stream, "[%r, %s%r]",
18319 REGNO (base), is_minus ? "-" : "",
18329 asm_fprintf (stream, "[%r, %s%r",
18330 REGNO (base), is_minus ? "-" : "",
18331 REGNO (XEXP (index, 0)));
18332 arm_print_operand (stream, index, 'S');
18333 fputs ("]", stream);
18338 gcc_unreachable ();
18341 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
18342 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
18344 extern enum machine_mode output_memory_reference_mode;
18346 gcc_assert (REG_P (XEXP (x, 0)));
18348 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
18349 asm_fprintf (stream, "[%r, #%s%d]!",
18350 REGNO (XEXP (x, 0)),
18351 GET_CODE (x) == PRE_DEC ? "-" : "",
18352 GET_MODE_SIZE (output_memory_reference_mode));
18354 asm_fprintf (stream, "[%r], #%s%d",
18355 REGNO (XEXP (x, 0)),
18356 GET_CODE (x) == POST_DEC ? "-" : "",
18357 GET_MODE_SIZE (output_memory_reference_mode));
18359 else if (GET_CODE (x) == PRE_MODIFY)
18361 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
18362 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18363 asm_fprintf (stream, "#%wd]!",
18364 INTVAL (XEXP (XEXP (x, 1), 1)));
18366 asm_fprintf (stream, "%r]!",
18367 REGNO (XEXP (XEXP (x, 1), 1)));
18369 else if (GET_CODE (x) == POST_MODIFY)
18371 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
18372 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18373 asm_fprintf (stream, "#%wd",
18374 INTVAL (XEXP (XEXP (x, 1), 1)));
18376 asm_fprintf (stream, "%r",
18377 REGNO (XEXP (XEXP (x, 1), 1)));
18379 else output_addr_const (stream, x);
18384 asm_fprintf (stream, "[%r]", REGNO (x));
18385 else if (GET_CODE (x) == POST_INC)
18386 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
18387 else if (GET_CODE (x) == PLUS)
18389 gcc_assert (REG_P (XEXP (x, 0)));
18390 if (CONST_INT_P (XEXP (x, 1)))
18391 asm_fprintf (stream, "[%r, #%wd]",
18392 REGNO (XEXP (x, 0)),
18393 INTVAL (XEXP (x, 1)));
18395 asm_fprintf (stream, "[%r, %r]",
18396 REGNO (XEXP (x, 0)),
18397 REGNO (XEXP (x, 1)));
18400 output_addr_const (stream, x);
18404 /* Target hook for indicating whether a punctuation character for
18405 TARGET_PRINT_OPERAND is valid. */
18407 arm_print_operand_punct_valid_p (unsigned char code)
18409 return (code == '@' || code == '|' || code == '.'
18410 || code == '(' || code == ')' || code == '#'
18411 || (TARGET_32BIT && (code == '?'))
18412 || (TARGET_THUMB2 && (code == '!'))
18413 || (TARGET_THUMB && (code == '_')));
18416 /* Target hook for assembling integer objects. The ARM version needs to
18417 handle word-sized values specially. */
18419 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
18421 enum machine_mode mode;
18423 if (size == UNITS_PER_WORD && aligned_p)
18425 fputs ("\t.word\t", asm_out_file);
18426 output_addr_const (asm_out_file, x);
18428 /* Mark symbols as position independent. We only do this in the
18429 .text segment, not in the .data segment. */
18430 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18431 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18433 /* See legitimize_pic_address for an explanation of the
18434 TARGET_VXWORKS_RTP check. */
18435 if (TARGET_VXWORKS_RTP
18436 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18437 fputs ("(GOT)", asm_out_file);
18439 fputs ("(GOTOFF)", asm_out_file);
18441 fputc ('\n', asm_out_file);
18445 mode = GET_MODE (x);
18447 if (arm_vector_mode_supported_p (mode))
18451 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18453 units = CONST_VECTOR_NUNITS (x);
18454 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18456 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18457 for (i = 0; i < units; i++)
18459 rtx elt = CONST_VECTOR_ELT (x, i);
18461 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18464 for (i = 0; i < units; i++)
18466 rtx elt = CONST_VECTOR_ELT (x, i);
18467 REAL_VALUE_TYPE rval;
18469 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18472 (rval, GET_MODE_INNER (mode),
18473 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18479 return default_assemble_integer (x, size, aligned_p);
18483 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18487 if (!TARGET_AAPCS_BASED)
18490 default_named_section_asm_out_constructor
18491 : default_named_section_asm_out_destructor) (symbol, priority);
18495 /* Put these in the .init_array section, using a special relocation. */
18496 if (priority != DEFAULT_INIT_PRIORITY)
18499 sprintf (buf, "%s.%.5u",
18500 is_ctor ? ".init_array" : ".fini_array",
18502 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18509 switch_to_section (s);
18510 assemble_align (POINTER_SIZE);
18511 fputs ("\t.word\t", asm_out_file);
18512 output_addr_const (asm_out_file, symbol);
18513 fputs ("(target1)\n", asm_out_file);
18516 /* Add a function to the list of static constructors. */
18519 arm_elf_asm_constructor (rtx symbol, int priority)
18521 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18524 /* Add a function to the list of static destructors. */
18527 arm_elf_asm_destructor (rtx symbol, int priority)
18529 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18532 /* A finite state machine takes care of noticing whether or not instructions
18533 can be conditionally executed, and thus decrease execution time and code
18534 size by deleting branch instructions. The fsm is controlled by
18535 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18537 /* The state of the fsm controlling condition codes are:
18538 0: normal, do nothing special
18539 1: make ASM_OUTPUT_OPCODE not output this instruction
18540 2: make ASM_OUTPUT_OPCODE not output this instruction
18541 3: make instructions conditional
18542 4: make instructions conditional
18544 State transitions (state->state by whom under condition):
18545 0 -> 1 final_prescan_insn if the `target' is a label
18546 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18547 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18548 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18549 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18550 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18551 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18552 (the target insn is arm_target_insn).
18554 If the jump clobbers the conditions then we use states 2 and 4.
18556 A similar thing can be done with conditional return insns.
18558 XXX In case the `target' is an unconditional branch, this conditionalising
18559 of the instructions always reduces code size, but not always execution
18560 time. But then, I want to reduce the code size to somewhere near what
18561 /bin/cc produces. */
18563 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18564 instructions. When a COND_EXEC instruction is seen the subsequent
18565 instructions are scanned so that multiple conditional instructions can be
18566 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18567 specify the length and true/false mask for the IT block. These will be
18568 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18570 /* Returns the index of the ARM condition code string in
18571 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18572 COMPARISON should be an rtx like `(eq (...) (...))'. */
18575 maybe_get_arm_condition_code (rtx comparison)
18577 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18578 enum arm_cond_code code;
18579 enum rtx_code comp_code = GET_CODE (comparison);
18581 if (GET_MODE_CLASS (mode) != MODE_CC)
18582 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18583 XEXP (comparison, 1));
18587 case CC_DNEmode: code = ARM_NE; goto dominance;
18588 case CC_DEQmode: code = ARM_EQ; goto dominance;
18589 case CC_DGEmode: code = ARM_GE; goto dominance;
18590 case CC_DGTmode: code = ARM_GT; goto dominance;
18591 case CC_DLEmode: code = ARM_LE; goto dominance;
18592 case CC_DLTmode: code = ARM_LT; goto dominance;
18593 case CC_DGEUmode: code = ARM_CS; goto dominance;
18594 case CC_DGTUmode: code = ARM_HI; goto dominance;
18595 case CC_DLEUmode: code = ARM_LS; goto dominance;
18596 case CC_DLTUmode: code = ARM_CC;
18599 if (comp_code == EQ)
18600 return ARM_INVERSE_CONDITION_CODE (code);
18601 if (comp_code == NE)
18608 case NE: return ARM_NE;
18609 case EQ: return ARM_EQ;
18610 case GE: return ARM_PL;
18611 case LT: return ARM_MI;
18612 default: return ARM_NV;
18618 case NE: return ARM_NE;
18619 case EQ: return ARM_EQ;
18620 default: return ARM_NV;
18626 case NE: return ARM_MI;
18627 case EQ: return ARM_PL;
18628 default: return ARM_NV;
18633 /* We can handle all cases except UNEQ and LTGT. */
18636 case GE: return ARM_GE;
18637 case GT: return ARM_GT;
18638 case LE: return ARM_LS;
18639 case LT: return ARM_MI;
18640 case NE: return ARM_NE;
18641 case EQ: return ARM_EQ;
18642 case ORDERED: return ARM_VC;
18643 case UNORDERED: return ARM_VS;
18644 case UNLT: return ARM_LT;
18645 case UNLE: return ARM_LE;
18646 case UNGT: return ARM_HI;
18647 case UNGE: return ARM_PL;
18648 /* UNEQ and LTGT do not have a representation. */
18649 case UNEQ: /* Fall through. */
18650 case LTGT: /* Fall through. */
18651 default: return ARM_NV;
18657 case NE: return ARM_NE;
18658 case EQ: return ARM_EQ;
18659 case GE: return ARM_LE;
18660 case GT: return ARM_LT;
18661 case LE: return ARM_GE;
18662 case LT: return ARM_GT;
18663 case GEU: return ARM_LS;
18664 case GTU: return ARM_CC;
18665 case LEU: return ARM_CS;
18666 case LTU: return ARM_HI;
18667 default: return ARM_NV;
18673 case LTU: return ARM_CS;
18674 case GEU: return ARM_CC;
18675 default: return ARM_NV;
18681 case NE: return ARM_NE;
18682 case EQ: return ARM_EQ;
18683 case GEU: return ARM_CS;
18684 case GTU: return ARM_HI;
18685 case LEU: return ARM_LS;
18686 case LTU: return ARM_CC;
18687 default: return ARM_NV;
18693 case GE: return ARM_GE;
18694 case LT: return ARM_LT;
18695 case GEU: return ARM_CS;
18696 case LTU: return ARM_CC;
18697 default: return ARM_NV;
18703 case NE: return ARM_NE;
18704 case EQ: return ARM_EQ;
18705 case GE: return ARM_GE;
18706 case GT: return ARM_GT;
18707 case LE: return ARM_LE;
18708 case LT: return ARM_LT;
18709 case GEU: return ARM_CS;
18710 case GTU: return ARM_HI;
18711 case LEU: return ARM_LS;
18712 case LTU: return ARM_CC;
18713 default: return ARM_NV;
18716 default: gcc_unreachable ();
18720 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18721 static enum arm_cond_code
18722 get_arm_condition_code (rtx comparison)
18724 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18725 gcc_assert (code != ARM_NV);
18729 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18732 thumb2_final_prescan_insn (rtx insn)
18734 rtx first_insn = insn;
18735 rtx body = PATTERN (insn);
18737 enum arm_cond_code code;
18741 /* Remove the previous insn from the count of insns to be output. */
18742 if (arm_condexec_count)
18743 arm_condexec_count--;
18745 /* Nothing to do if we are already inside a conditional block. */
18746 if (arm_condexec_count)
18749 if (GET_CODE (body) != COND_EXEC)
18752 /* Conditional jumps are implemented directly. */
18756 predicate = COND_EXEC_TEST (body);
18757 arm_current_cc = get_arm_condition_code (predicate);
18759 n = get_attr_ce_count (insn);
18760 arm_condexec_count = 1;
18761 arm_condexec_mask = (1 << n) - 1;
18762 arm_condexec_masklen = n;
18763 /* See if subsequent instructions can be combined into the same block. */
18766 insn = next_nonnote_insn (insn);
18768 /* Jumping into the middle of an IT block is illegal, so a label or
18769 barrier terminates the block. */
18770 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18773 body = PATTERN (insn);
18774 /* USE and CLOBBER aren't really insns, so just skip them. */
18775 if (GET_CODE (body) == USE
18776 || GET_CODE (body) == CLOBBER)
18779 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18780 if (GET_CODE (body) != COND_EXEC)
18782 /* Allow up to 4 conditionally executed instructions in a block. */
18783 n = get_attr_ce_count (insn);
18784 if (arm_condexec_masklen + n > 4)
18787 predicate = COND_EXEC_TEST (body);
18788 code = get_arm_condition_code (predicate);
18789 mask = (1 << n) - 1;
18790 if (arm_current_cc == code)
18791 arm_condexec_mask |= (mask << arm_condexec_masklen);
18792 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18795 arm_condexec_count++;
18796 arm_condexec_masklen += n;
18798 /* A jump must be the last instruction in a conditional block. */
18802 /* Restore recog_data (getting the attributes of other insns can
18803 destroy this array, but final.c assumes that it remains intact
18804 across this call). */
18805 extract_constrain_insn_cached (first_insn);
18809 arm_final_prescan_insn (rtx insn)
18811 /* BODY will hold the body of INSN. */
18812 rtx body = PATTERN (insn);
18814 /* This will be 1 if trying to repeat the trick, and things need to be
18815 reversed if it appears to fail. */
18818 /* If we start with a return insn, we only succeed if we find another one. */
18819 int seeking_return = 0;
18820 enum rtx_code return_code = UNKNOWN;
18822 /* START_INSN will hold the insn from where we start looking. This is the
18823 first insn after the following code_label if REVERSE is true. */
18824 rtx start_insn = insn;
18826 /* If in state 4, check if the target branch is reached, in order to
18827 change back to state 0. */
18828 if (arm_ccfsm_state == 4)
18830 if (insn == arm_target_insn)
18832 arm_target_insn = NULL;
18833 arm_ccfsm_state = 0;
18838 /* If in state 3, it is possible to repeat the trick, if this insn is an
18839 unconditional branch to a label, and immediately following this branch
18840 is the previous target label which is only used once, and the label this
18841 branch jumps to is not too far off. */
18842 if (arm_ccfsm_state == 3)
18844 if (simplejump_p (insn))
18846 start_insn = next_nonnote_insn (start_insn);
18847 if (BARRIER_P (start_insn))
18849 /* XXX Isn't this always a barrier? */
18850 start_insn = next_nonnote_insn (start_insn);
18852 if (LABEL_P (start_insn)
18853 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18854 && LABEL_NUSES (start_insn) == 1)
18859 else if (ANY_RETURN_P (body))
18861 start_insn = next_nonnote_insn (start_insn);
18862 if (BARRIER_P (start_insn))
18863 start_insn = next_nonnote_insn (start_insn);
18864 if (LABEL_P (start_insn)
18865 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18866 && LABEL_NUSES (start_insn) == 1)
18869 seeking_return = 1;
18870 return_code = GET_CODE (body);
18879 gcc_assert (!arm_ccfsm_state || reverse);
18880 if (!JUMP_P (insn))
18883 /* This jump might be paralleled with a clobber of the condition codes
18884 the jump should always come first */
18885 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18886 body = XVECEXP (body, 0, 0);
18889 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18890 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18893 int fail = FALSE, succeed = FALSE;
18894 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18895 int then_not_else = TRUE;
18896 rtx this_insn = start_insn, label = 0;
18898 /* Register the insn jumped to. */
18901 if (!seeking_return)
18902 label = XEXP (SET_SRC (body), 0);
18904 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18905 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18906 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18908 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18909 then_not_else = FALSE;
18911 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18913 seeking_return = 1;
18914 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18916 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18918 seeking_return = 1;
18919 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18920 then_not_else = FALSE;
18923 gcc_unreachable ();
18925 /* See how many insns this branch skips, and what kind of insns. If all
18926 insns are okay, and the label or unconditional branch to the same
18927 label is not too far away, succeed. */
18928 for (insns_skipped = 0;
18929 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18933 this_insn = next_nonnote_insn (this_insn);
18937 switch (GET_CODE (this_insn))
18940 /* Succeed if it is the target label, otherwise fail since
18941 control falls in from somewhere else. */
18942 if (this_insn == label)
18944 arm_ccfsm_state = 1;
18952 /* Succeed if the following insn is the target label.
18954 If return insns are used then the last insn in a function
18955 will be a barrier. */
18956 this_insn = next_nonnote_insn (this_insn);
18957 if (this_insn && this_insn == label)
18959 arm_ccfsm_state = 1;
18967 /* The AAPCS says that conditional calls should not be
18968 used since they make interworking inefficient (the
18969 linker can't transform BL<cond> into BLX). That's
18970 only a problem if the machine has BLX. */
18977 /* Succeed if the following insn is the target label, or
18978 if the following two insns are a barrier and the
18980 this_insn = next_nonnote_insn (this_insn);
18981 if (this_insn && BARRIER_P (this_insn))
18982 this_insn = next_nonnote_insn (this_insn);
18984 if (this_insn && this_insn == label
18985 && insns_skipped < max_insns_skipped)
18987 arm_ccfsm_state = 1;
18995 /* If this is an unconditional branch to the same label, succeed.
18996 If it is to another label, do nothing. If it is conditional,
18998 /* XXX Probably, the tests for SET and the PC are
19001 scanbody = PATTERN (this_insn);
19002 if (GET_CODE (scanbody) == SET
19003 && GET_CODE (SET_DEST (scanbody)) == PC)
19005 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19006 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19008 arm_ccfsm_state = 2;
19011 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19014 /* Fail if a conditional return is undesirable (e.g. on a
19015 StrongARM), but still allow this if optimizing for size. */
19016 else if (GET_CODE (scanbody) == return_code
19017 && !use_return_insn (TRUE, NULL)
19020 else if (GET_CODE (scanbody) == return_code)
19022 arm_ccfsm_state = 2;
19025 else if (GET_CODE (scanbody) == PARALLEL)
19027 switch (get_attr_conds (this_insn))
19037 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
19042 /* Instructions using or affecting the condition codes make it
19044 scanbody = PATTERN (this_insn);
19045 if (!(GET_CODE (scanbody) == SET
19046 || GET_CODE (scanbody) == PARALLEL)
19047 || get_attr_conds (this_insn) != CONDS_NOCOND)
19057 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19058 arm_target_label = CODE_LABEL_NUMBER (label);
19061 gcc_assert (seeking_return || arm_ccfsm_state == 2);
19063 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19065 this_insn = next_nonnote_insn (this_insn);
19066 gcc_assert (!this_insn
19067 || (!BARRIER_P (this_insn)
19068 && !LABEL_P (this_insn)));
19072 /* Oh, dear! we ran off the end.. give up. */
19073 extract_constrain_insn_cached (insn);
19074 arm_ccfsm_state = 0;
19075 arm_target_insn = NULL;
19078 arm_target_insn = this_insn;
19081 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
19084 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
19086 if (reverse || then_not_else)
19087 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
19090 /* Restore recog_data (getting the attributes of other insns can
19091 destroy this array, but final.c assumes that it remains intact
19092 across this call. */
19093 extract_constrain_insn_cached (insn);
19097 /* Output IT instructions. */
19099 thumb2_asm_output_opcode (FILE * stream)
19104 if (arm_condexec_mask)
19106 for (n = 0; n < arm_condexec_masklen; n++)
19107 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
19109 asm_fprintf(stream, "i%s\t%s\n\t", buff,
19110 arm_condition_codes[arm_current_cc]);
19111 arm_condexec_mask = 0;
19115 /* Returns true if REGNO is a valid register
19116 for holding a quantity of type MODE. */
19118 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
19120 if (GET_MODE_CLASS (mode) == MODE_CC)
19121 return (regno == CC_REGNUM
19122 || (TARGET_HARD_FLOAT && TARGET_VFP
19123 && regno == VFPCC_REGNUM));
19126 /* For the Thumb we only allow values bigger than SImode in
19127 registers 0 - 6, so that there is always a second low
19128 register available to hold the upper part of the value.
19129 We probably we ought to ensure that the register is the
19130 start of an even numbered register pair. */
19131 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
19133 if (TARGET_HARD_FLOAT && TARGET_VFP
19134 && IS_VFP_REGNUM (regno))
19136 if (mode == SFmode || mode == SImode)
19137 return VFP_REGNO_OK_FOR_SINGLE (regno);
19139 if (mode == DFmode)
19140 return VFP_REGNO_OK_FOR_DOUBLE (regno);
19142 /* VFP registers can hold HFmode values, but there is no point in
19143 putting them there unless we have hardware conversion insns. */
19144 if (mode == HFmode)
19145 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
19148 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
19149 || (VALID_NEON_QREG_MODE (mode)
19150 && NEON_REGNO_OK_FOR_QUAD (regno))
19151 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
19152 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
19153 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
19154 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
19155 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
19160 if (TARGET_REALLY_IWMMXT)
19162 if (IS_IWMMXT_GR_REGNUM (regno))
19163 return mode == SImode;
19165 if (IS_IWMMXT_REGNUM (regno))
19166 return VALID_IWMMXT_REG_MODE (mode);
19169 /* We allow almost any value to be stored in the general registers.
19170 Restrict doubleword quantities to even register pairs so that we can
19171 use ldrd. Do not allow very large Neon structure opaque modes in
19172 general registers; they would use too many. */
19173 if (regno <= LAST_ARM_REGNUM)
19174 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
19175 && ARM_NUM_REGS (mode) <= 4;
19177 if (regno == FRAME_POINTER_REGNUM
19178 || regno == ARG_POINTER_REGNUM)
19179 /* We only allow integers in the fake hard registers. */
19180 return GET_MODE_CLASS (mode) == MODE_INT;
19185 /* Implement MODES_TIEABLE_P. */
19188 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19190 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
19193 /* We specifically want to allow elements of "structure" modes to
19194 be tieable to the structure. This more general condition allows
19195 other rarer situations too. */
19197 && (VALID_NEON_DREG_MODE (mode1)
19198 || VALID_NEON_QREG_MODE (mode1)
19199 || VALID_NEON_STRUCT_MODE (mode1))
19200 && (VALID_NEON_DREG_MODE (mode2)
19201 || VALID_NEON_QREG_MODE (mode2)
19202 || VALID_NEON_STRUCT_MODE (mode2)))
19208 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
19209 not used in arm mode. */
19212 arm_regno_class (int regno)
19216 if (regno == STACK_POINTER_REGNUM)
19218 if (regno == CC_REGNUM)
19225 if (TARGET_THUMB2 && regno < 8)
19228 if ( regno <= LAST_ARM_REGNUM
19229 || regno == FRAME_POINTER_REGNUM
19230 || regno == ARG_POINTER_REGNUM)
19231 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
19233 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
19234 return TARGET_THUMB2 ? CC_REG : NO_REGS;
19236 if (IS_VFP_REGNUM (regno))
19238 if (regno <= D7_VFP_REGNUM)
19239 return VFP_D0_D7_REGS;
19240 else if (regno <= LAST_LO_VFP_REGNUM)
19241 return VFP_LO_REGS;
19243 return VFP_HI_REGS;
19246 if (IS_IWMMXT_REGNUM (regno))
19247 return IWMMXT_REGS;
19249 if (IS_IWMMXT_GR_REGNUM (regno))
19250 return IWMMXT_GR_REGS;
19255 /* Handle a special case when computing the offset
19256 of an argument from the frame pointer. */
19258 arm_debugger_arg_offset (int value, rtx addr)
19262 /* We are only interested if dbxout_parms() failed to compute the offset. */
19266 /* We can only cope with the case where the address is held in a register. */
19270 /* If we are using the frame pointer to point at the argument, then
19271 an offset of 0 is correct. */
19272 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
19275 /* If we are using the stack pointer to point at the
19276 argument, then an offset of 0 is correct. */
19277 /* ??? Check this is consistent with thumb2 frame layout. */
19278 if ((TARGET_THUMB || !frame_pointer_needed)
19279 && REGNO (addr) == SP_REGNUM)
19282 /* Oh dear. The argument is pointed to by a register rather
19283 than being held in a register, or being stored at a known
19284 offset from the frame pointer. Since GDB only understands
19285 those two kinds of argument we must translate the address
19286 held in the register into an offset from the frame pointer.
19287 We do this by searching through the insns for the function
19288 looking to see where this register gets its value. If the
19289 register is initialized from the frame pointer plus an offset
19290 then we are in luck and we can continue, otherwise we give up.
19292 This code is exercised by producing debugging information
19293 for a function with arguments like this:
19295 double func (double a, double b, int c, double d) {return d;}
19297 Without this code the stab for parameter 'd' will be set to
19298 an offset of 0 from the frame pointer, rather than 8. */
19300 /* The if() statement says:
19302 If the insn is a normal instruction
19303 and if the insn is setting the value in a register
19304 and if the register being set is the register holding the address of the argument
19305 and if the address is computing by an addition
19306 that involves adding to a register
19307 which is the frame pointer
19312 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19314 if ( NONJUMP_INSN_P (insn)
19315 && GET_CODE (PATTERN (insn)) == SET
19316 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
19317 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
19318 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
19319 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
19320 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
19323 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
19332 warning (0, "unable to compute real location of stacked parameter");
19333 value = 8; /* XXX magic hack */
19353 T_MAX /* Size of enum. Keep last. */
19354 } neon_builtin_type_mode;
19356 #define TYPE_MODE_BIT(X) (1 << (X))
19358 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
19359 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
19360 | TYPE_MODE_BIT (T_DI))
19361 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
19362 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
19363 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
19365 #define v8qi_UP T_V8QI
19366 #define v4hi_UP T_V4HI
19367 #define v2si_UP T_V2SI
19368 #define v2sf_UP T_V2SF
19370 #define v16qi_UP T_V16QI
19371 #define v8hi_UP T_V8HI
19372 #define v4si_UP T_V4SI
19373 #define v4sf_UP T_V4SF
19374 #define v2di_UP T_V2DI
19379 #define UP(X) X##_UP
19413 NEON_LOADSTRUCTLANE,
19415 NEON_STORESTRUCTLANE,
19424 const neon_itype itype;
19425 const neon_builtin_type_mode mode;
19426 const enum insn_code code;
19427 unsigned int fcode;
19428 } neon_builtin_datum;
19430 #define CF(N,X) CODE_FOR_neon_##N##X
19432 #define VAR1(T, N, A) \
19433 {#N, NEON_##T, UP (A), CF (N, A), 0}
19434 #define VAR2(T, N, A, B) \
19436 {#N, NEON_##T, UP (B), CF (N, B), 0}
19437 #define VAR3(T, N, A, B, C) \
19438 VAR2 (T, N, A, B), \
19439 {#N, NEON_##T, UP (C), CF (N, C), 0}
19440 #define VAR4(T, N, A, B, C, D) \
19441 VAR3 (T, N, A, B, C), \
19442 {#N, NEON_##T, UP (D), CF (N, D), 0}
19443 #define VAR5(T, N, A, B, C, D, E) \
19444 VAR4 (T, N, A, B, C, D), \
19445 {#N, NEON_##T, UP (E), CF (N, E), 0}
19446 #define VAR6(T, N, A, B, C, D, E, F) \
19447 VAR5 (T, N, A, B, C, D, E), \
19448 {#N, NEON_##T, UP (F), CF (N, F), 0}
19449 #define VAR7(T, N, A, B, C, D, E, F, G) \
19450 VAR6 (T, N, A, B, C, D, E, F), \
19451 {#N, NEON_##T, UP (G), CF (N, G), 0}
19452 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19453 VAR7 (T, N, A, B, C, D, E, F, G), \
19454 {#N, NEON_##T, UP (H), CF (N, H), 0}
19455 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19456 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19457 {#N, NEON_##T, UP (I), CF (N, I), 0}
19458 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19459 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19460 {#N, NEON_##T, UP (J), CF (N, J), 0}
19462 /* The mode entries in the following table correspond to the "key" type of the
19463 instruction variant, i.e. equivalent to that which would be specified after
19464 the assembler mnemonic, which usually refers to the last vector operand.
19465 (Signed/unsigned/polynomial types are not differentiated between though, and
19466 are all mapped onto the same mode for a given element size.) The modes
19467 listed per instruction should be the same as those defined for that
19468 instruction's pattern in neon.md. */
19470 static neon_builtin_datum neon_builtin_data[] =
19472 VAR10 (BINOP, vadd,
19473 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19474 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19475 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19476 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19477 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19478 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19479 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19480 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19481 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19482 VAR2 (TERNOP, vfma, v2sf, v4sf),
19483 VAR2 (TERNOP, vfms, v2sf, v4sf),
19484 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19485 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19486 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19487 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19488 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19489 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19490 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19491 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19492 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19493 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19494 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19495 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19496 VAR2 (BINOP, vqdmull, v4hi, v2si),
19497 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19498 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19499 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19500 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19501 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19502 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19503 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19504 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19505 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19506 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19507 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19508 VAR10 (BINOP, vsub,
19509 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19510 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19511 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19512 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19513 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19514 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19515 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19516 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19517 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19518 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19519 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19520 VAR2 (BINOP, vcage, v2sf, v4sf),
19521 VAR2 (BINOP, vcagt, v2sf, v4sf),
19522 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19523 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19524 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19525 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19526 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19527 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19528 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19529 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19530 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19531 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19532 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19533 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19534 VAR2 (BINOP, vrecps, v2sf, v4sf),
19535 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19536 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19537 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19538 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19539 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19540 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19541 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19542 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19543 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19544 VAR2 (UNOP, vcnt, v8qi, v16qi),
19545 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19546 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19547 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19548 /* FIXME: vget_lane supports more variants than this! */
19549 VAR10 (GETLANE, vget_lane,
19550 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19551 VAR10 (SETLANE, vset_lane,
19552 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19553 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19554 VAR10 (DUP, vdup_n,
19555 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19556 VAR10 (DUPLANE, vdup_lane,
19557 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19558 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19559 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19560 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19561 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19562 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19563 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19564 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19565 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19566 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19567 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19568 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19569 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19570 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19571 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19572 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19573 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19574 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19575 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19576 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19577 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19578 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19579 VAR10 (BINOP, vext,
19580 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19581 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19582 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19583 VAR2 (UNOP, vrev16, v8qi, v16qi),
19584 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19585 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19586 VAR10 (SELECT, vbsl,
19587 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19588 VAR2 (RINT, vrintn, v2sf, v4sf),
19589 VAR2 (RINT, vrinta, v2sf, v4sf),
19590 VAR2 (RINT, vrintp, v2sf, v4sf),
19591 VAR2 (RINT, vrintm, v2sf, v4sf),
19592 VAR2 (RINT, vrintz, v2sf, v4sf),
19593 VAR2 (RINT, vrintx, v2sf, v4sf),
19594 VAR1 (VTBL, vtbl1, v8qi),
19595 VAR1 (VTBL, vtbl2, v8qi),
19596 VAR1 (VTBL, vtbl3, v8qi),
19597 VAR1 (VTBL, vtbl4, v8qi),
19598 VAR1 (VTBX, vtbx1, v8qi),
19599 VAR1 (VTBX, vtbx2, v8qi),
19600 VAR1 (VTBX, vtbx3, v8qi),
19601 VAR1 (VTBX, vtbx4, v8qi),
19602 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19603 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19604 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19605 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19606 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19607 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19608 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19609 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19610 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19611 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19612 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19613 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19614 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19615 VAR10 (LOAD1, vld1,
19616 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19617 VAR10 (LOAD1LANE, vld1_lane,
19618 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19619 VAR10 (LOAD1, vld1_dup,
19620 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19621 VAR10 (STORE1, vst1,
19622 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19623 VAR10 (STORE1LANE, vst1_lane,
19624 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19626 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19627 VAR7 (LOADSTRUCTLANE, vld2_lane,
19628 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19629 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19630 VAR9 (STORESTRUCT, vst2,
19631 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19632 VAR7 (STORESTRUCTLANE, vst2_lane,
19633 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19635 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19636 VAR7 (LOADSTRUCTLANE, vld3_lane,
19637 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19638 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19639 VAR9 (STORESTRUCT, vst3,
19640 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19641 VAR7 (STORESTRUCTLANE, vst3_lane,
19642 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19643 VAR9 (LOADSTRUCT, vld4,
19644 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19645 VAR7 (LOADSTRUCTLANE, vld4_lane,
19646 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19647 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19648 VAR9 (STORESTRUCT, vst4,
19649 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19650 VAR7 (STORESTRUCTLANE, vst4_lane,
19651 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19652 VAR10 (LOGICBINOP, vand,
19653 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19654 VAR10 (LOGICBINOP, vorr,
19655 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19656 VAR10 (BINOP, veor,
19657 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19658 VAR10 (LOGICBINOP, vbic,
19659 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19660 VAR10 (LOGICBINOP, vorn,
19661 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19676 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19677 symbolic names defined here (which would require too much duplication).
19681 ARM_BUILTIN_GETWCGR0,
19682 ARM_BUILTIN_GETWCGR1,
19683 ARM_BUILTIN_GETWCGR2,
19684 ARM_BUILTIN_GETWCGR3,
19686 ARM_BUILTIN_SETWCGR0,
19687 ARM_BUILTIN_SETWCGR1,
19688 ARM_BUILTIN_SETWCGR2,
19689 ARM_BUILTIN_SETWCGR3,
19693 ARM_BUILTIN_WAVG2BR,
19694 ARM_BUILTIN_WAVG2HR,
19695 ARM_BUILTIN_WAVG2B,
19696 ARM_BUILTIN_WAVG2H,
19703 ARM_BUILTIN_WMACSZ,
19705 ARM_BUILTIN_WMACUZ,
19708 ARM_BUILTIN_WSADBZ,
19710 ARM_BUILTIN_WSADHZ,
19712 ARM_BUILTIN_WALIGNI,
19713 ARM_BUILTIN_WALIGNR0,
19714 ARM_BUILTIN_WALIGNR1,
19715 ARM_BUILTIN_WALIGNR2,
19716 ARM_BUILTIN_WALIGNR3,
19719 ARM_BUILTIN_TMIAPH,
19720 ARM_BUILTIN_TMIABB,
19721 ARM_BUILTIN_TMIABT,
19722 ARM_BUILTIN_TMIATB,
19723 ARM_BUILTIN_TMIATT,
19725 ARM_BUILTIN_TMOVMSKB,
19726 ARM_BUILTIN_TMOVMSKH,
19727 ARM_BUILTIN_TMOVMSKW,
19729 ARM_BUILTIN_TBCSTB,
19730 ARM_BUILTIN_TBCSTH,
19731 ARM_BUILTIN_TBCSTW,
19733 ARM_BUILTIN_WMADDS,
19734 ARM_BUILTIN_WMADDU,
19736 ARM_BUILTIN_WPACKHSS,
19737 ARM_BUILTIN_WPACKWSS,
19738 ARM_BUILTIN_WPACKDSS,
19739 ARM_BUILTIN_WPACKHUS,
19740 ARM_BUILTIN_WPACKWUS,
19741 ARM_BUILTIN_WPACKDUS,
19746 ARM_BUILTIN_WADDSSB,
19747 ARM_BUILTIN_WADDSSH,
19748 ARM_BUILTIN_WADDSSW,
19749 ARM_BUILTIN_WADDUSB,
19750 ARM_BUILTIN_WADDUSH,
19751 ARM_BUILTIN_WADDUSW,
19755 ARM_BUILTIN_WSUBSSB,
19756 ARM_BUILTIN_WSUBSSH,
19757 ARM_BUILTIN_WSUBSSW,
19758 ARM_BUILTIN_WSUBUSB,
19759 ARM_BUILTIN_WSUBUSH,
19760 ARM_BUILTIN_WSUBUSW,
19767 ARM_BUILTIN_WCMPEQB,
19768 ARM_BUILTIN_WCMPEQH,
19769 ARM_BUILTIN_WCMPEQW,
19770 ARM_BUILTIN_WCMPGTUB,
19771 ARM_BUILTIN_WCMPGTUH,
19772 ARM_BUILTIN_WCMPGTUW,
19773 ARM_BUILTIN_WCMPGTSB,
19774 ARM_BUILTIN_WCMPGTSH,
19775 ARM_BUILTIN_WCMPGTSW,
19777 ARM_BUILTIN_TEXTRMSB,
19778 ARM_BUILTIN_TEXTRMSH,
19779 ARM_BUILTIN_TEXTRMSW,
19780 ARM_BUILTIN_TEXTRMUB,
19781 ARM_BUILTIN_TEXTRMUH,
19782 ARM_BUILTIN_TEXTRMUW,
19783 ARM_BUILTIN_TINSRB,
19784 ARM_BUILTIN_TINSRH,
19785 ARM_BUILTIN_TINSRW,
19787 ARM_BUILTIN_WMAXSW,
19788 ARM_BUILTIN_WMAXSH,
19789 ARM_BUILTIN_WMAXSB,
19790 ARM_BUILTIN_WMAXUW,
19791 ARM_BUILTIN_WMAXUH,
19792 ARM_BUILTIN_WMAXUB,
19793 ARM_BUILTIN_WMINSW,
19794 ARM_BUILTIN_WMINSH,
19795 ARM_BUILTIN_WMINSB,
19796 ARM_BUILTIN_WMINUW,
19797 ARM_BUILTIN_WMINUH,
19798 ARM_BUILTIN_WMINUB,
19800 ARM_BUILTIN_WMULUM,
19801 ARM_BUILTIN_WMULSM,
19802 ARM_BUILTIN_WMULUL,
19804 ARM_BUILTIN_PSADBH,
19805 ARM_BUILTIN_WSHUFH,
19819 ARM_BUILTIN_WSLLHI,
19820 ARM_BUILTIN_WSLLWI,
19821 ARM_BUILTIN_WSLLDI,
19822 ARM_BUILTIN_WSRAHI,
19823 ARM_BUILTIN_WSRAWI,
19824 ARM_BUILTIN_WSRADI,
19825 ARM_BUILTIN_WSRLHI,
19826 ARM_BUILTIN_WSRLWI,
19827 ARM_BUILTIN_WSRLDI,
19828 ARM_BUILTIN_WRORHI,
19829 ARM_BUILTIN_WRORWI,
19830 ARM_BUILTIN_WRORDI,
19832 ARM_BUILTIN_WUNPCKIHB,
19833 ARM_BUILTIN_WUNPCKIHH,
19834 ARM_BUILTIN_WUNPCKIHW,
19835 ARM_BUILTIN_WUNPCKILB,
19836 ARM_BUILTIN_WUNPCKILH,
19837 ARM_BUILTIN_WUNPCKILW,
19839 ARM_BUILTIN_WUNPCKEHSB,
19840 ARM_BUILTIN_WUNPCKEHSH,
19841 ARM_BUILTIN_WUNPCKEHSW,
19842 ARM_BUILTIN_WUNPCKEHUB,
19843 ARM_BUILTIN_WUNPCKEHUH,
19844 ARM_BUILTIN_WUNPCKEHUW,
19845 ARM_BUILTIN_WUNPCKELSB,
19846 ARM_BUILTIN_WUNPCKELSH,
19847 ARM_BUILTIN_WUNPCKELSW,
19848 ARM_BUILTIN_WUNPCKELUB,
19849 ARM_BUILTIN_WUNPCKELUH,
19850 ARM_BUILTIN_WUNPCKELUW,
19856 ARM_BUILTIN_WADDSUBHX,
19857 ARM_BUILTIN_WSUBADDHX,
19859 ARM_BUILTIN_WABSDIFFB,
19860 ARM_BUILTIN_WABSDIFFH,
19861 ARM_BUILTIN_WABSDIFFW,
19863 ARM_BUILTIN_WADDCH,
19864 ARM_BUILTIN_WADDCW,
19867 ARM_BUILTIN_WAVG4R,
19869 ARM_BUILTIN_WMADDSX,
19870 ARM_BUILTIN_WMADDUX,
19872 ARM_BUILTIN_WMADDSN,
19873 ARM_BUILTIN_WMADDUN,
19875 ARM_BUILTIN_WMULWSM,
19876 ARM_BUILTIN_WMULWUM,
19878 ARM_BUILTIN_WMULWSMR,
19879 ARM_BUILTIN_WMULWUMR,
19881 ARM_BUILTIN_WMULWL,
19883 ARM_BUILTIN_WMULSMR,
19884 ARM_BUILTIN_WMULUMR,
19886 ARM_BUILTIN_WQMULM,
19887 ARM_BUILTIN_WQMULMR,
19889 ARM_BUILTIN_WQMULWM,
19890 ARM_BUILTIN_WQMULWMR,
19892 ARM_BUILTIN_WADDBHUSM,
19893 ARM_BUILTIN_WADDBHUSL,
19895 ARM_BUILTIN_WQMIABB,
19896 ARM_BUILTIN_WQMIABT,
19897 ARM_BUILTIN_WQMIATB,
19898 ARM_BUILTIN_WQMIATT,
19900 ARM_BUILTIN_WQMIABBN,
19901 ARM_BUILTIN_WQMIABTN,
19902 ARM_BUILTIN_WQMIATBN,
19903 ARM_BUILTIN_WQMIATTN,
19905 ARM_BUILTIN_WMIABB,
19906 ARM_BUILTIN_WMIABT,
19907 ARM_BUILTIN_WMIATB,
19908 ARM_BUILTIN_WMIATT,
19910 ARM_BUILTIN_WMIABBN,
19911 ARM_BUILTIN_WMIABTN,
19912 ARM_BUILTIN_WMIATBN,
19913 ARM_BUILTIN_WMIATTN,
19915 ARM_BUILTIN_WMIAWBB,
19916 ARM_BUILTIN_WMIAWBT,
19917 ARM_BUILTIN_WMIAWTB,
19918 ARM_BUILTIN_WMIAWTT,
19920 ARM_BUILTIN_WMIAWBBN,
19921 ARM_BUILTIN_WMIAWBTN,
19922 ARM_BUILTIN_WMIAWTBN,
19923 ARM_BUILTIN_WMIAWTTN,
19925 ARM_BUILTIN_WMERGE,
19927 ARM_BUILTIN_NEON_BASE,
19929 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19932 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19935 arm_init_neon_builtins (void)
19937 unsigned int i, fcode;
19940 tree neon_intQI_type_node;
19941 tree neon_intHI_type_node;
19942 tree neon_polyQI_type_node;
19943 tree neon_polyHI_type_node;
19944 tree neon_intSI_type_node;
19945 tree neon_intDI_type_node;
19946 tree neon_float_type_node;
19948 tree intQI_pointer_node;
19949 tree intHI_pointer_node;
19950 tree intSI_pointer_node;
19951 tree intDI_pointer_node;
19952 tree float_pointer_node;
19954 tree const_intQI_node;
19955 tree const_intHI_node;
19956 tree const_intSI_node;
19957 tree const_intDI_node;
19958 tree const_float_node;
19960 tree const_intQI_pointer_node;
19961 tree const_intHI_pointer_node;
19962 tree const_intSI_pointer_node;
19963 tree const_intDI_pointer_node;
19964 tree const_float_pointer_node;
19966 tree V8QI_type_node;
19967 tree V4HI_type_node;
19968 tree V2SI_type_node;
19969 tree V2SF_type_node;
19970 tree V16QI_type_node;
19971 tree V8HI_type_node;
19972 tree V4SI_type_node;
19973 tree V4SF_type_node;
19974 tree V2DI_type_node;
19976 tree intUQI_type_node;
19977 tree intUHI_type_node;
19978 tree intUSI_type_node;
19979 tree intUDI_type_node;
19981 tree intEI_type_node;
19982 tree intOI_type_node;
19983 tree intCI_type_node;
19984 tree intXI_type_node;
19986 tree V8QI_pointer_node;
19987 tree V4HI_pointer_node;
19988 tree V2SI_pointer_node;
19989 tree V2SF_pointer_node;
19990 tree V16QI_pointer_node;
19991 tree V8HI_pointer_node;
19992 tree V4SI_pointer_node;
19993 tree V4SF_pointer_node;
19994 tree V2DI_pointer_node;
19996 tree void_ftype_pv8qi_v8qi_v8qi;
19997 tree void_ftype_pv4hi_v4hi_v4hi;
19998 tree void_ftype_pv2si_v2si_v2si;
19999 tree void_ftype_pv2sf_v2sf_v2sf;
20000 tree void_ftype_pdi_di_di;
20001 tree void_ftype_pv16qi_v16qi_v16qi;
20002 tree void_ftype_pv8hi_v8hi_v8hi;
20003 tree void_ftype_pv4si_v4si_v4si;
20004 tree void_ftype_pv4sf_v4sf_v4sf;
20005 tree void_ftype_pv2di_v2di_v2di;
20007 tree reinterp_ftype_dreg[5][5];
20008 tree reinterp_ftype_qreg[5][5];
20009 tree dreg_types[5], qreg_types[5];
20011 /* Create distinguished type nodes for NEON vector element types,
20012 and pointers to values of such types, so we can detect them later. */
20013 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20014 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20015 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20016 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20017 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20018 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20019 neon_float_type_node = make_node (REAL_TYPE);
20020 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20021 layout_type (neon_float_type_node);
20023 /* Define typedefs which exactly correspond to the modes we are basing vector
20024 types on. If you change these names you'll need to change
20025 the table used by arm_mangle_type too. */
20026 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20027 "__builtin_neon_qi");
20028 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20029 "__builtin_neon_hi");
20030 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20031 "__builtin_neon_si");
20032 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20033 "__builtin_neon_sf");
20034 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20035 "__builtin_neon_di");
20036 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20037 "__builtin_neon_poly8");
20038 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20039 "__builtin_neon_poly16");
20041 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20042 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20043 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20044 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20045 float_pointer_node = build_pointer_type (neon_float_type_node);
20047 /* Next create constant-qualified versions of the above types. */
20048 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20050 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20052 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20054 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20056 const_float_node = build_qualified_type (neon_float_type_node,
20059 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20060 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20061 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20062 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20063 const_float_pointer_node = build_pointer_type (const_float_node);
20065 /* Now create vector types based on our NEON element types. */
20066 /* 64-bit vectors. */
20068 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20070 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20072 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20074 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20075 /* 128-bit vectors. */
20077 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20079 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20081 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20083 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20085 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20087 /* Unsigned integer types for various mode sizes. */
20088 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20089 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20090 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20091 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20093 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20094 "__builtin_neon_uqi");
20095 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20096 "__builtin_neon_uhi");
20097 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20098 "__builtin_neon_usi");
20099 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20100 "__builtin_neon_udi");
20102 /* Opaque integer types for structures of vectors. */
20103 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20104 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20105 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20106 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20108 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20109 "__builtin_neon_ti");
20110 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20111 "__builtin_neon_ei");
20112 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20113 "__builtin_neon_oi");
20114 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20115 "__builtin_neon_ci");
20116 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20117 "__builtin_neon_xi");
20119 /* Pointers to vector types. */
20120 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20121 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20122 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20123 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20124 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20125 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20126 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20127 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20128 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20130 /* Operations which return results as pairs. */
20131 void_ftype_pv8qi_v8qi_v8qi =
20132 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20133 V8QI_type_node, NULL);
20134 void_ftype_pv4hi_v4hi_v4hi =
20135 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20136 V4HI_type_node, NULL);
20137 void_ftype_pv2si_v2si_v2si =
20138 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20139 V2SI_type_node, NULL);
20140 void_ftype_pv2sf_v2sf_v2sf =
20141 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20142 V2SF_type_node, NULL);
20143 void_ftype_pdi_di_di =
20144 build_function_type_list (void_type_node, intDI_pointer_node,
20145 neon_intDI_type_node, neon_intDI_type_node, NULL);
20146 void_ftype_pv16qi_v16qi_v16qi =
20147 build_function_type_list (void_type_node, V16QI_pointer_node,
20148 V16QI_type_node, V16QI_type_node, NULL);
20149 void_ftype_pv8hi_v8hi_v8hi =
20150 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20151 V8HI_type_node, NULL);
20152 void_ftype_pv4si_v4si_v4si =
20153 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20154 V4SI_type_node, NULL);
20155 void_ftype_pv4sf_v4sf_v4sf =
20156 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20157 V4SF_type_node, NULL);
20158 void_ftype_pv2di_v2di_v2di =
20159 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20160 V2DI_type_node, NULL);
20162 dreg_types[0] = V8QI_type_node;
20163 dreg_types[1] = V4HI_type_node;
20164 dreg_types[2] = V2SI_type_node;
20165 dreg_types[3] = V2SF_type_node;
20166 dreg_types[4] = neon_intDI_type_node;
20168 qreg_types[0] = V16QI_type_node;
20169 qreg_types[1] = V8HI_type_node;
20170 qreg_types[2] = V4SI_type_node;
20171 qreg_types[3] = V4SF_type_node;
20172 qreg_types[4] = V2DI_type_node;
20174 for (i = 0; i < 5; i++)
20177 for (j = 0; j < 5; j++)
20179 reinterp_ftype_dreg[i][j]
20180 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
20181 reinterp_ftype_qreg[i][j]
20182 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
20186 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
20187 i < ARRAY_SIZE (neon_builtin_data);
20190 neon_builtin_datum *d = &neon_builtin_data[i];
20192 const char* const modenames[] = {
20193 "v8qi", "v4hi", "v2si", "v2sf", "di",
20194 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
20199 int is_load = 0, is_store = 0;
20201 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
20208 case NEON_LOAD1LANE:
20209 case NEON_LOADSTRUCT:
20210 case NEON_LOADSTRUCTLANE:
20212 /* Fall through. */
20214 case NEON_STORE1LANE:
20215 case NEON_STORESTRUCT:
20216 case NEON_STORESTRUCTLANE:
20219 /* Fall through. */
20223 case NEON_LOGICBINOP:
20224 case NEON_SHIFTINSERT:
20231 case NEON_SHIFTIMM:
20232 case NEON_SHIFTACC:
20238 case NEON_LANEMULL:
20239 case NEON_LANEMULH:
20241 case NEON_SCALARMUL:
20242 case NEON_SCALARMULL:
20243 case NEON_SCALARMULH:
20244 case NEON_SCALARMAC:
20250 tree return_type = void_type_node, args = void_list_node;
20252 /* Build a function type directly from the insn_data for
20253 this builtin. The build_function_type() function takes
20254 care of removing duplicates for us. */
20255 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
20259 if (is_load && k == 1)
20261 /* Neon load patterns always have the memory
20262 operand in the operand 1 position. */
20263 gcc_assert (insn_data[d->code].operand[k].predicate
20264 == neon_struct_operand);
20270 eltype = const_intQI_pointer_node;
20275 eltype = const_intHI_pointer_node;
20280 eltype = const_intSI_pointer_node;
20285 eltype = const_float_pointer_node;
20290 eltype = const_intDI_pointer_node;
20293 default: gcc_unreachable ();
20296 else if (is_store && k == 0)
20298 /* Similarly, Neon store patterns use operand 0 as
20299 the memory location to store to. */
20300 gcc_assert (insn_data[d->code].operand[k].predicate
20301 == neon_struct_operand);
20307 eltype = intQI_pointer_node;
20312 eltype = intHI_pointer_node;
20317 eltype = intSI_pointer_node;
20322 eltype = float_pointer_node;
20327 eltype = intDI_pointer_node;
20330 default: gcc_unreachable ();
20335 switch (insn_data[d->code].operand[k].mode)
20337 case VOIDmode: eltype = void_type_node; break;
20339 case QImode: eltype = neon_intQI_type_node; break;
20340 case HImode: eltype = neon_intHI_type_node; break;
20341 case SImode: eltype = neon_intSI_type_node; break;
20342 case SFmode: eltype = neon_float_type_node; break;
20343 case DImode: eltype = neon_intDI_type_node; break;
20344 case TImode: eltype = intTI_type_node; break;
20345 case EImode: eltype = intEI_type_node; break;
20346 case OImode: eltype = intOI_type_node; break;
20347 case CImode: eltype = intCI_type_node; break;
20348 case XImode: eltype = intXI_type_node; break;
20349 /* 64-bit vectors. */
20350 case V8QImode: eltype = V8QI_type_node; break;
20351 case V4HImode: eltype = V4HI_type_node; break;
20352 case V2SImode: eltype = V2SI_type_node; break;
20353 case V2SFmode: eltype = V2SF_type_node; break;
20354 /* 128-bit vectors. */
20355 case V16QImode: eltype = V16QI_type_node; break;
20356 case V8HImode: eltype = V8HI_type_node; break;
20357 case V4SImode: eltype = V4SI_type_node; break;
20358 case V4SFmode: eltype = V4SF_type_node; break;
20359 case V2DImode: eltype = V2DI_type_node; break;
20360 default: gcc_unreachable ();
20364 if (k == 0 && !is_store)
20365 return_type = eltype;
20367 args = tree_cons (NULL_TREE, eltype, args);
20370 ftype = build_function_type (return_type, args);
20374 case NEON_RESULTPAIR:
20376 switch (insn_data[d->code].operand[1].mode)
20378 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
20379 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
20380 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
20381 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
20382 case DImode: ftype = void_ftype_pdi_di_di; break;
20383 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
20384 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
20385 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
20386 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
20387 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
20388 default: gcc_unreachable ();
20393 case NEON_REINTERP:
20395 /* We iterate over 5 doubleword types, then 5 quadword
20397 int rhs = d->mode % 5;
20398 switch (insn_data[d->code].operand[0].mode)
20400 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
20401 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
20402 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
20403 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
20404 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
20405 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
20406 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
20407 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
20408 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
20409 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
20410 default: gcc_unreachable ();
20416 gcc_unreachable ();
20419 gcc_assert (ftype != NULL);
20421 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
20423 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
20425 arm_builtin_decls[fcode] = decl;
20429 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
20432 if ((MASK) & insn_flags) \
20435 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
20436 BUILT_IN_MD, NULL, NULL_TREE); \
20437 arm_builtin_decls[CODE] = bdecl; \
20442 struct builtin_description
20444 const unsigned int mask;
20445 const enum insn_code icode;
20446 const char * const name;
20447 const enum arm_builtins code;
20448 const enum rtx_code comparison;
20449 const unsigned int flag;
20452 static const struct builtin_description bdesc_2arg[] =
20454 #define IWMMXT_BUILTIN(code, string, builtin) \
20455 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20456 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20458 #define IWMMXT2_BUILTIN(code, string, builtin) \
20459 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
20460 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20462 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
20463 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
20464 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
20465 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
20466 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
20467 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
20468 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
20469 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
20470 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
20471 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
20472 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
20473 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
20474 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
20475 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
20476 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
20477 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
20478 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
20479 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
20480 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
20481 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
20482 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
20483 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
20484 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
20485 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
20486 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
20487 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
20488 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
20489 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
20490 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
20491 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
20492 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
20493 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
20494 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
20495 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
20496 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
20497 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20498 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20499 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20500 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20501 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20502 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20503 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20504 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20505 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20506 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20507 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20508 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20509 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20510 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20511 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20512 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20513 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20514 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20515 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20516 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20517 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20518 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
20519 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
20520 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
20521 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
20522 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
20523 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
20524 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
20525 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
20526 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
20527 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
20528 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
20529 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
20530 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
20531 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
20532 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
20533 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
20534 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
20535 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
20536 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
20537 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
20538 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
20539 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
20541 #define IWMMXT_BUILTIN2(code, builtin) \
20542 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20544 #define IWMMXT2_BUILTIN2(code, builtin) \
20545 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20547 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
20548 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
20549 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20550 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20551 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20552 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20553 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20554 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20555 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20556 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20559 static const struct builtin_description bdesc_1arg[] =
20561 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20562 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20563 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20564 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20565 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20566 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20567 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20568 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20569 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20570 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20571 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20572 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20573 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20574 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20575 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20576 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20577 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20578 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20579 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
20580 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
20581 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
20582 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
20583 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
20584 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
20587 /* Set up all the iWMMXt builtins. This is not called if
20588 TARGET_IWMMXT is zero. */
20591 arm_init_iwmmxt_builtins (void)
20593 const struct builtin_description * d;
20596 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20597 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20598 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20600 tree v8qi_ftype_v8qi_v8qi_int
20601 = build_function_type_list (V8QI_type_node,
20602 V8QI_type_node, V8QI_type_node,
20603 integer_type_node, NULL_TREE);
20604 tree v4hi_ftype_v4hi_int
20605 = build_function_type_list (V4HI_type_node,
20606 V4HI_type_node, integer_type_node, NULL_TREE);
20607 tree v2si_ftype_v2si_int
20608 = build_function_type_list (V2SI_type_node,
20609 V2SI_type_node, integer_type_node, NULL_TREE);
20610 tree v2si_ftype_di_di
20611 = build_function_type_list (V2SI_type_node,
20612 long_long_integer_type_node,
20613 long_long_integer_type_node,
20615 tree di_ftype_di_int
20616 = build_function_type_list (long_long_integer_type_node,
20617 long_long_integer_type_node,
20618 integer_type_node, NULL_TREE);
20619 tree di_ftype_di_int_int
20620 = build_function_type_list (long_long_integer_type_node,
20621 long_long_integer_type_node,
20623 integer_type_node, NULL_TREE);
20624 tree int_ftype_v8qi
20625 = build_function_type_list (integer_type_node,
20626 V8QI_type_node, NULL_TREE);
20627 tree int_ftype_v4hi
20628 = build_function_type_list (integer_type_node,
20629 V4HI_type_node, NULL_TREE);
20630 tree int_ftype_v2si
20631 = build_function_type_list (integer_type_node,
20632 V2SI_type_node, NULL_TREE);
20633 tree int_ftype_v8qi_int
20634 = build_function_type_list (integer_type_node,
20635 V8QI_type_node, integer_type_node, NULL_TREE);
20636 tree int_ftype_v4hi_int
20637 = build_function_type_list (integer_type_node,
20638 V4HI_type_node, integer_type_node, NULL_TREE);
20639 tree int_ftype_v2si_int
20640 = build_function_type_list (integer_type_node,
20641 V2SI_type_node, integer_type_node, NULL_TREE);
20642 tree v8qi_ftype_v8qi_int_int
20643 = build_function_type_list (V8QI_type_node,
20644 V8QI_type_node, integer_type_node,
20645 integer_type_node, NULL_TREE);
20646 tree v4hi_ftype_v4hi_int_int
20647 = build_function_type_list (V4HI_type_node,
20648 V4HI_type_node, integer_type_node,
20649 integer_type_node, NULL_TREE);
20650 tree v2si_ftype_v2si_int_int
20651 = build_function_type_list (V2SI_type_node,
20652 V2SI_type_node, integer_type_node,
20653 integer_type_node, NULL_TREE);
20654 /* Miscellaneous. */
20655 tree v8qi_ftype_v4hi_v4hi
20656 = build_function_type_list (V8QI_type_node,
20657 V4HI_type_node, V4HI_type_node, NULL_TREE);
20658 tree v4hi_ftype_v2si_v2si
20659 = build_function_type_list (V4HI_type_node,
20660 V2SI_type_node, V2SI_type_node, NULL_TREE);
20661 tree v8qi_ftype_v4hi_v8qi
20662 = build_function_type_list (V8QI_type_node,
20663 V4HI_type_node, V8QI_type_node, NULL_TREE);
20664 tree v2si_ftype_v4hi_v4hi
20665 = build_function_type_list (V2SI_type_node,
20666 V4HI_type_node, V4HI_type_node, NULL_TREE);
20667 tree v2si_ftype_v8qi_v8qi
20668 = build_function_type_list (V2SI_type_node,
20669 V8QI_type_node, V8QI_type_node, NULL_TREE);
20670 tree v4hi_ftype_v4hi_di
20671 = build_function_type_list (V4HI_type_node,
20672 V4HI_type_node, long_long_integer_type_node,
20674 tree v2si_ftype_v2si_di
20675 = build_function_type_list (V2SI_type_node,
20676 V2SI_type_node, long_long_integer_type_node,
20679 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20680 tree int_ftype_void
20681 = build_function_type_list (integer_type_node, NULL_TREE);
20683 = build_function_type_list (long_long_integer_type_node,
20684 V8QI_type_node, NULL_TREE);
20686 = build_function_type_list (long_long_integer_type_node,
20687 V4HI_type_node, NULL_TREE);
20689 = build_function_type_list (long_long_integer_type_node,
20690 V2SI_type_node, NULL_TREE);
20691 tree v2si_ftype_v4hi
20692 = build_function_type_list (V2SI_type_node,
20693 V4HI_type_node, NULL_TREE);
20694 tree v4hi_ftype_v8qi
20695 = build_function_type_list (V4HI_type_node,
20696 V8QI_type_node, NULL_TREE);
20697 tree v8qi_ftype_v8qi
20698 = build_function_type_list (V8QI_type_node,
20699 V8QI_type_node, NULL_TREE);
20700 tree v4hi_ftype_v4hi
20701 = build_function_type_list (V4HI_type_node,
20702 V4HI_type_node, NULL_TREE);
20703 tree v2si_ftype_v2si
20704 = build_function_type_list (V2SI_type_node,
20705 V2SI_type_node, NULL_TREE);
20707 tree di_ftype_di_v4hi_v4hi
20708 = build_function_type_list (long_long_unsigned_type_node,
20709 long_long_unsigned_type_node,
20710 V4HI_type_node, V4HI_type_node,
20713 tree di_ftype_v4hi_v4hi
20714 = build_function_type_list (long_long_unsigned_type_node,
20715 V4HI_type_node,V4HI_type_node,
20718 tree v2si_ftype_v2si_v4hi_v4hi
20719 = build_function_type_list (V2SI_type_node,
20720 V2SI_type_node, V4HI_type_node,
20721 V4HI_type_node, NULL_TREE);
20723 tree v2si_ftype_v2si_v8qi_v8qi
20724 = build_function_type_list (V2SI_type_node,
20725 V2SI_type_node, V8QI_type_node,
20726 V8QI_type_node, NULL_TREE);
20728 tree di_ftype_di_v2si_v2si
20729 = build_function_type_list (long_long_unsigned_type_node,
20730 long_long_unsigned_type_node,
20731 V2SI_type_node, V2SI_type_node,
20734 tree di_ftype_di_di_int
20735 = build_function_type_list (long_long_unsigned_type_node,
20736 long_long_unsigned_type_node,
20737 long_long_unsigned_type_node,
20738 integer_type_node, NULL_TREE);
20740 tree void_ftype_int
20741 = build_function_type_list (void_type_node,
20742 integer_type_node, NULL_TREE);
20744 tree v8qi_ftype_char
20745 = build_function_type_list (V8QI_type_node,
20746 signed_char_type_node, NULL_TREE);
20748 tree v4hi_ftype_short
20749 = build_function_type_list (V4HI_type_node,
20750 short_integer_type_node, NULL_TREE);
20752 tree v2si_ftype_int
20753 = build_function_type_list (V2SI_type_node,
20754 integer_type_node, NULL_TREE);
20756 /* Normal vector binops. */
20757 tree v8qi_ftype_v8qi_v8qi
20758 = build_function_type_list (V8QI_type_node,
20759 V8QI_type_node, V8QI_type_node, NULL_TREE);
20760 tree v4hi_ftype_v4hi_v4hi
20761 = build_function_type_list (V4HI_type_node,
20762 V4HI_type_node,V4HI_type_node, NULL_TREE);
20763 tree v2si_ftype_v2si_v2si
20764 = build_function_type_list (V2SI_type_node,
20765 V2SI_type_node, V2SI_type_node, NULL_TREE);
20766 tree di_ftype_di_di
20767 = build_function_type_list (long_long_unsigned_type_node,
20768 long_long_unsigned_type_node,
20769 long_long_unsigned_type_node,
20772 /* Add all builtins that are more or less simple operations on two
20774 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20776 /* Use one of the operands; the target can have a different mode for
20777 mask-generating compares. */
20778 enum machine_mode mode;
20784 mode = insn_data[d->icode].operand[1].mode;
20789 type = v8qi_ftype_v8qi_v8qi;
20792 type = v4hi_ftype_v4hi_v4hi;
20795 type = v2si_ftype_v2si_v2si;
20798 type = di_ftype_di_di;
20802 gcc_unreachable ();
20805 def_mbuiltin (d->mask, d->name, type, d->code);
20808 /* Add the remaining MMX insns with somewhat more complicated types. */
20809 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20810 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20811 ARM_BUILTIN_ ## CODE)
20813 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20814 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20815 ARM_BUILTIN_ ## CODE)
20817 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20818 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20819 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20820 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20821 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20822 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20823 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20824 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20825 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20827 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20828 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20829 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20830 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20831 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20832 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20834 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20835 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20836 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20837 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20838 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20839 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20841 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20842 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20843 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20844 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20845 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20846 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20848 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20849 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20850 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20851 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20852 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20853 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20855 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20857 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20858 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20859 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20860 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20861 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20862 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20863 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20864 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20865 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20866 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20868 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20869 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20870 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20871 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20872 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20873 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20874 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20875 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20876 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20878 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20879 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20880 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20882 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20883 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20884 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20886 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20887 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20889 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20890 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20891 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20892 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20893 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20894 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20896 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20897 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20898 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20899 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20900 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20901 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20902 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20903 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20904 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20905 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20906 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20907 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20909 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20910 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20911 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20912 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20914 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20915 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20916 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20917 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20918 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20919 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20920 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20922 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20923 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20924 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20926 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20927 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20928 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20929 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20931 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20932 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20933 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20934 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20936 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20937 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20938 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20939 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20941 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20942 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20943 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20944 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20946 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20947 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20948 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20949 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20951 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20952 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20953 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20954 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20956 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20958 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20959 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20960 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20962 #undef iwmmx_mbuiltin
20963 #undef iwmmx2_mbuiltin
20967 arm_init_fp16_builtins (void)
20969 tree fp16_type = make_node (REAL_TYPE);
20970 TYPE_PRECISION (fp16_type) = 16;
20971 layout_type (fp16_type);
20972 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20976 arm_init_builtins (void)
20978 if (TARGET_REALLY_IWMMXT)
20979 arm_init_iwmmxt_builtins ();
20982 arm_init_neon_builtins ();
20984 if (arm_fp16_format)
20985 arm_init_fp16_builtins ();
20988 /* Return the ARM builtin for CODE. */
20991 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20993 if (code >= ARM_BUILTIN_MAX)
20994 return error_mark_node;
20996 return arm_builtin_decls[code];
20999 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21001 static const char *
21002 arm_invalid_parameter_type (const_tree t)
21004 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21005 return N_("function parameters cannot have __fp16 type");
21009 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21011 static const char *
21012 arm_invalid_return_type (const_tree t)
21014 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21015 return N_("functions cannot return __fp16 type");
21019 /* Implement TARGET_PROMOTED_TYPE. */
21022 arm_promoted_type (const_tree t)
21024 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21025 return float_type_node;
21029 /* Implement TARGET_CONVERT_TO_TYPE.
21030 Specifically, this hook implements the peculiarity of the ARM
21031 half-precision floating-point C semantics that requires conversions between
21032 __fp16 to or from double to do an intermediate conversion to float. */
21035 arm_convert_to_type (tree type, tree expr)
21037 tree fromtype = TREE_TYPE (expr);
21038 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
21040 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
21041 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
21042 return convert (type, convert (float_type_node, expr));
21046 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
21047 This simply adds HFmode as a supported mode; even though we don't
21048 implement arithmetic on this type directly, it's supported by
21049 optabs conversions, much the way the double-word arithmetic is
21050 special-cased in the default hook. */
21053 arm_scalar_mode_supported_p (enum machine_mode mode)
21055 if (mode == HFmode)
21056 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
21057 else if (ALL_FIXED_POINT_MODE_P (mode))
21060 return default_scalar_mode_supported_p (mode);
21063 /* Errors in the source file can cause expand_expr to return const0_rtx
21064 where we expect a vector. To avoid crashing, use one of the vector
21065 clear instructions. */
21068 safe_vector_operand (rtx x, enum machine_mode mode)
21070 if (x != const0_rtx)
21072 x = gen_reg_rtx (mode);
21074 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
21075 : gen_rtx_SUBREG (DImode, x, 0)));
21079 /* Subroutine of arm_expand_builtin to take care of binop insns. */
21082 arm_expand_binop_builtin (enum insn_code icode,
21083 tree exp, rtx target)
21086 tree arg0 = CALL_EXPR_ARG (exp, 0);
21087 tree arg1 = CALL_EXPR_ARG (exp, 1);
21088 rtx op0 = expand_normal (arg0);
21089 rtx op1 = expand_normal (arg1);
21090 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21091 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21092 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21094 if (VECTOR_MODE_P (mode0))
21095 op0 = safe_vector_operand (op0, mode0);
21096 if (VECTOR_MODE_P (mode1))
21097 op1 = safe_vector_operand (op1, mode1);
21100 || GET_MODE (target) != tmode
21101 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21102 target = gen_reg_rtx (tmode);
21104 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
21105 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
21107 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21108 op0 = copy_to_mode_reg (mode0, op0);
21109 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21110 op1 = copy_to_mode_reg (mode1, op1);
21112 pat = GEN_FCN (icode) (target, op0, op1);
21119 /* Subroutine of arm_expand_builtin to take care of unop insns. */
21122 arm_expand_unop_builtin (enum insn_code icode,
21123 tree exp, rtx target, int do_load)
21126 tree arg0 = CALL_EXPR_ARG (exp, 0);
21127 rtx op0 = expand_normal (arg0);
21128 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21129 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21132 || GET_MODE (target) != tmode
21133 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21134 target = gen_reg_rtx (tmode);
21136 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
21139 if (VECTOR_MODE_P (mode0))
21140 op0 = safe_vector_operand (op0, mode0);
21142 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21143 op0 = copy_to_mode_reg (mode0, op0);
21146 pat = GEN_FCN (icode) (target, op0);
21154 NEON_ARG_COPY_TO_REG,
21160 #define NEON_MAX_BUILTIN_ARGS 5
21162 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
21163 and return an expression for the accessed memory.
21165 The intrinsic function operates on a block of registers that has
21166 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
21167 function references the memory at EXP of type TYPE and in mode
21168 MEM_MODE; this mode may be BLKmode if no more suitable mode is
21172 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
21173 enum machine_mode reg_mode,
21174 neon_builtin_type_mode type_mode)
21176 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
21177 tree elem_type, upper_bound, array_type;
21179 /* Work out the size of the register block in bytes. */
21180 reg_size = GET_MODE_SIZE (reg_mode);
21182 /* Work out the size of each vector in bytes. */
21183 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
21184 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
21186 /* Work out how many vectors there are. */
21187 gcc_assert (reg_size % vector_size == 0);
21188 nvectors = reg_size / vector_size;
21190 /* Work out the type of each element. */
21191 gcc_assert (POINTER_TYPE_P (type));
21192 elem_type = TREE_TYPE (type);
21194 /* Work out how many elements are being loaded or stored.
21195 MEM_MODE == REG_MODE implies a one-to-one mapping between register
21196 and memory elements; anything else implies a lane load or store. */
21197 if (mem_mode == reg_mode)
21198 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
21202 /* Create a type that describes the full access. */
21203 upper_bound = build_int_cst (size_type_node, nelems - 1);
21204 array_type = build_array_type (elem_type, build_index_type (upper_bound));
21206 /* Dereference EXP using that type. */
21207 return fold_build2 (MEM_REF, array_type, exp,
21208 build_int_cst (build_pointer_type (array_type), 0));
21211 /* Expand a Neon builtin. */
21213 arm_expand_neon_args (rtx target, int icode, int have_retval,
21214 neon_builtin_type_mode type_mode,
21215 tree exp, int fcode, ...)
21219 tree arg[NEON_MAX_BUILTIN_ARGS];
21220 rtx op[NEON_MAX_BUILTIN_ARGS];
21223 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21224 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
21225 enum machine_mode other_mode;
21231 || GET_MODE (target) != tmode
21232 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
21233 target = gen_reg_rtx (tmode);
21235 va_start (ap, fcode);
21237 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
21241 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
21243 if (thisarg == NEON_ARG_STOP)
21247 opno = argc + have_retval;
21248 mode[argc] = insn_data[icode].operand[opno].mode;
21249 arg[argc] = CALL_EXPR_ARG (exp, argc);
21250 arg_type = TREE_VALUE (formals);
21251 if (thisarg == NEON_ARG_MEMORY)
21253 other_mode = insn_data[icode].operand[1 - opno].mode;
21254 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
21255 mode[argc], other_mode,
21259 op[argc] = expand_normal (arg[argc]);
21263 case NEON_ARG_COPY_TO_REG:
21264 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
21265 if (!(*insn_data[icode].operand[opno].predicate)
21266 (op[argc], mode[argc]))
21267 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
21270 case NEON_ARG_CONSTANT:
21271 /* FIXME: This error message is somewhat unhelpful. */
21272 if (!(*insn_data[icode].operand[opno].predicate)
21273 (op[argc], mode[argc]))
21274 error ("argument must be a constant");
21277 case NEON_ARG_MEMORY:
21278 gcc_assert (MEM_P (op[argc]));
21279 PUT_MODE (op[argc], mode[argc]);
21280 /* ??? arm_neon.h uses the same built-in functions for signed
21281 and unsigned accesses, casting where necessary. This isn't
21283 set_mem_alias_set (op[argc], 0);
21284 if (!(*insn_data[icode].operand[opno].predicate)
21285 (op[argc], mode[argc]))
21286 op[argc] = (replace_equiv_address
21287 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
21290 case NEON_ARG_STOP:
21291 gcc_unreachable ();
21295 formals = TREE_CHAIN (formals);
21305 pat = GEN_FCN (icode) (target, op[0]);
21309 pat = GEN_FCN (icode) (target, op[0], op[1]);
21313 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
21317 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
21321 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
21325 gcc_unreachable ();
21331 pat = GEN_FCN (icode) (op[0]);
21335 pat = GEN_FCN (icode) (op[0], op[1]);
21339 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
21343 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
21347 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
21351 gcc_unreachable ();
21362 /* Expand a Neon builtin. These are "special" because they don't have symbolic
21363 constants defined per-instruction or per instruction-variant. Instead, the
21364 required info is looked up in the table neon_builtin_data. */
21366 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
21368 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
21369 neon_itype itype = d->itype;
21370 enum insn_code icode = d->code;
21371 neon_builtin_type_mode type_mode = d->mode;
21378 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21379 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21383 case NEON_SCALARMUL:
21384 case NEON_SCALARMULL:
21385 case NEON_SCALARMULH:
21386 case NEON_SHIFTINSERT:
21387 case NEON_LOGICBINOP:
21388 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21389 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21393 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21394 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21395 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21399 case NEON_SHIFTIMM:
21400 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21401 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
21405 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21406 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21411 case NEON_REINTERP:
21412 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21413 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21417 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21418 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21420 case NEON_RESULTPAIR:
21421 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21422 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21426 case NEON_LANEMULL:
21427 case NEON_LANEMULH:
21428 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21429 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21430 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21433 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21434 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21435 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21437 case NEON_SHIFTACC:
21438 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21439 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21440 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21442 case NEON_SCALARMAC:
21443 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21444 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21445 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21449 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21450 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21454 case NEON_LOADSTRUCT:
21455 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21456 NEON_ARG_MEMORY, NEON_ARG_STOP);
21458 case NEON_LOAD1LANE:
21459 case NEON_LOADSTRUCTLANE:
21460 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21461 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21465 case NEON_STORESTRUCT:
21466 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21467 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21469 case NEON_STORE1LANE:
21470 case NEON_STORESTRUCTLANE:
21471 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21472 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21476 gcc_unreachable ();
21479 /* Emit code to reinterpret one Neon type as another, without altering bits. */
21481 neon_reinterpret (rtx dest, rtx src)
21483 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
21486 /* Emit code to place a Neon pair result in memory locations (with equal
21489 neon_emit_pair_result_insn (enum machine_mode mode,
21490 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
21493 rtx mem = gen_rtx_MEM (mode, destaddr);
21494 rtx tmp1 = gen_reg_rtx (mode);
21495 rtx tmp2 = gen_reg_rtx (mode);
21497 emit_insn (intfn (tmp1, op1, op2, tmp2));
21499 emit_move_insn (mem, tmp1);
21500 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
21501 emit_move_insn (mem, tmp2);
21504 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21505 not to early-clobber SRC registers in the process.
21507 We assume that the operands described by SRC and DEST represent a
21508 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
21509 number of components into which the copy has been decomposed. */
21511 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
21515 if (!reg_overlap_mentioned_p (operands[0], operands[1])
21516 || REGNO (operands[0]) < REGNO (operands[1]))
21518 for (i = 0; i < count; i++)
21520 operands[2 * i] = dest[i];
21521 operands[2 * i + 1] = src[i];
21526 for (i = 0; i < count; i++)
21528 operands[2 * i] = dest[count - i - 1];
21529 operands[2 * i + 1] = src[count - i - 1];
21534 /* Split operands into moves from op[1] + op[2] into op[0]. */
21537 neon_split_vcombine (rtx operands[3])
21539 unsigned int dest = REGNO (operands[0]);
21540 unsigned int src1 = REGNO (operands[1]);
21541 unsigned int src2 = REGNO (operands[2]);
21542 enum machine_mode halfmode = GET_MODE (operands[1]);
21543 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
21544 rtx destlo, desthi;
21546 if (src1 == dest && src2 == dest + halfregs)
21548 /* No-op move. Can't split to nothing; emit something. */
21549 emit_note (NOTE_INSN_DELETED);
21553 /* Preserve register attributes for variable tracking. */
21554 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
21555 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
21556 GET_MODE_SIZE (halfmode));
21558 /* Special case of reversed high/low parts. Use VSWP. */
21559 if (src2 == dest && src1 == dest + halfregs)
21561 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
21562 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
21563 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
21567 if (!reg_overlap_mentioned_p (operands[2], destlo))
21569 /* Try to avoid unnecessary moves if part of the result
21570 is in the right place already. */
21572 emit_move_insn (destlo, operands[1]);
21573 if (src2 != dest + halfregs)
21574 emit_move_insn (desthi, operands[2]);
21578 if (src2 != dest + halfregs)
21579 emit_move_insn (desthi, operands[2]);
21581 emit_move_insn (destlo, operands[1]);
21585 /* Expand an expression EXP that calls a built-in function,
21586 with result going to TARGET if that's convenient
21587 (and in mode MODE if that's convenient).
21588 SUBTARGET may be used as the target for computing one of EXP's operands.
21589 IGNORE is nonzero if the value is to be ignored. */
21592 arm_expand_builtin (tree exp,
21594 rtx subtarget ATTRIBUTE_UNUSED,
21595 enum machine_mode mode ATTRIBUTE_UNUSED,
21596 int ignore ATTRIBUTE_UNUSED)
21598 const struct builtin_description * d;
21599 enum insn_code icode;
21600 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21608 int fcode = DECL_FUNCTION_CODE (fndecl);
21610 enum machine_mode tmode;
21611 enum machine_mode mode0;
21612 enum machine_mode mode1;
21613 enum machine_mode mode2;
21619 if (fcode >= ARM_BUILTIN_NEON_BASE)
21620 return arm_expand_neon_builtin (fcode, exp, target);
21624 case ARM_BUILTIN_TEXTRMSB:
21625 case ARM_BUILTIN_TEXTRMUB:
21626 case ARM_BUILTIN_TEXTRMSH:
21627 case ARM_BUILTIN_TEXTRMUH:
21628 case ARM_BUILTIN_TEXTRMSW:
21629 case ARM_BUILTIN_TEXTRMUW:
21630 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21631 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21632 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21633 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21634 : CODE_FOR_iwmmxt_textrmw);
21636 arg0 = CALL_EXPR_ARG (exp, 0);
21637 arg1 = CALL_EXPR_ARG (exp, 1);
21638 op0 = expand_normal (arg0);
21639 op1 = expand_normal (arg1);
21640 tmode = insn_data[icode].operand[0].mode;
21641 mode0 = insn_data[icode].operand[1].mode;
21642 mode1 = insn_data[icode].operand[2].mode;
21644 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21645 op0 = copy_to_mode_reg (mode0, op0);
21646 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21648 /* @@@ better error message */
21649 error ("selector must be an immediate");
21650 return gen_reg_rtx (tmode);
21653 opint = INTVAL (op1);
21654 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
21656 if (opint > 7 || opint < 0)
21657 error ("the range of selector should be in 0 to 7");
21659 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
21661 if (opint > 3 || opint < 0)
21662 error ("the range of selector should be in 0 to 3");
21664 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
21666 if (opint > 1 || opint < 0)
21667 error ("the range of selector should be in 0 to 1");
21671 || GET_MODE (target) != tmode
21672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21673 target = gen_reg_rtx (tmode);
21674 pat = GEN_FCN (icode) (target, op0, op1);
21680 case ARM_BUILTIN_WALIGNI:
21681 /* If op2 is immediate, call walighi, else call walighr. */
21682 arg0 = CALL_EXPR_ARG (exp, 0);
21683 arg1 = CALL_EXPR_ARG (exp, 1);
21684 arg2 = CALL_EXPR_ARG (exp, 2);
21685 op0 = expand_normal (arg0);
21686 op1 = expand_normal (arg1);
21687 op2 = expand_normal (arg2);
21688 if (CONST_INT_P (op2))
21690 icode = CODE_FOR_iwmmxt_waligni;
21691 tmode = insn_data[icode].operand[0].mode;
21692 mode0 = insn_data[icode].operand[1].mode;
21693 mode1 = insn_data[icode].operand[2].mode;
21694 mode2 = insn_data[icode].operand[3].mode;
21695 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21696 op0 = copy_to_mode_reg (mode0, op0);
21697 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21698 op1 = copy_to_mode_reg (mode1, op1);
21699 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
21700 selector = INTVAL (op2);
21701 if (selector > 7 || selector < 0)
21702 error ("the range of selector should be in 0 to 7");
21706 icode = CODE_FOR_iwmmxt_walignr;
21707 tmode = insn_data[icode].operand[0].mode;
21708 mode0 = insn_data[icode].operand[1].mode;
21709 mode1 = insn_data[icode].operand[2].mode;
21710 mode2 = insn_data[icode].operand[3].mode;
21711 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21712 op0 = copy_to_mode_reg (mode0, op0);
21713 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21714 op1 = copy_to_mode_reg (mode1, op1);
21715 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
21716 op2 = copy_to_mode_reg (mode2, op2);
21719 || GET_MODE (target) != tmode
21720 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21721 target = gen_reg_rtx (tmode);
21722 pat = GEN_FCN (icode) (target, op0, op1, op2);
21728 case ARM_BUILTIN_TINSRB:
21729 case ARM_BUILTIN_TINSRH:
21730 case ARM_BUILTIN_TINSRW:
21731 case ARM_BUILTIN_WMERGE:
21732 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21733 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21734 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
21735 : CODE_FOR_iwmmxt_tinsrw);
21736 arg0 = CALL_EXPR_ARG (exp, 0);
21737 arg1 = CALL_EXPR_ARG (exp, 1);
21738 arg2 = CALL_EXPR_ARG (exp, 2);
21739 op0 = expand_normal (arg0);
21740 op1 = expand_normal (arg1);
21741 op2 = expand_normal (arg2);
21742 tmode = insn_data[icode].operand[0].mode;
21743 mode0 = insn_data[icode].operand[1].mode;
21744 mode1 = insn_data[icode].operand[2].mode;
21745 mode2 = insn_data[icode].operand[3].mode;
21747 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21748 op0 = copy_to_mode_reg (mode0, op0);
21749 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21750 op1 = copy_to_mode_reg (mode1, op1);
21751 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21753 error ("selector must be an immediate");
21756 if (icode == CODE_FOR_iwmmxt_wmerge)
21758 selector = INTVAL (op2);
21759 if (selector > 7 || selector < 0)
21760 error ("the range of selector should be in 0 to 7");
21762 if ((icode == CODE_FOR_iwmmxt_tinsrb)
21763 || (icode == CODE_FOR_iwmmxt_tinsrh)
21764 || (icode == CODE_FOR_iwmmxt_tinsrw))
21767 selector= INTVAL (op2);
21768 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21769 error ("the range of selector should be in 0 to 7");
21770 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21771 error ("the range of selector should be in 0 to 3");
21772 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21773 error ("the range of selector should be in 0 to 1");
21775 op2 = GEN_INT (mask);
21778 || GET_MODE (target) != tmode
21779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21780 target = gen_reg_rtx (tmode);
21781 pat = GEN_FCN (icode) (target, op0, op1, op2);
21787 case ARM_BUILTIN_SETWCGR0:
21788 case ARM_BUILTIN_SETWCGR1:
21789 case ARM_BUILTIN_SETWCGR2:
21790 case ARM_BUILTIN_SETWCGR3:
21791 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21792 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21793 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21794 : CODE_FOR_iwmmxt_setwcgr3);
21795 arg0 = CALL_EXPR_ARG (exp, 0);
21796 op0 = expand_normal (arg0);
21797 mode0 = insn_data[icode].operand[0].mode;
21798 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21799 op0 = copy_to_mode_reg (mode0, op0);
21800 pat = GEN_FCN (icode) (op0);
21806 case ARM_BUILTIN_GETWCGR0:
21807 case ARM_BUILTIN_GETWCGR1:
21808 case ARM_BUILTIN_GETWCGR2:
21809 case ARM_BUILTIN_GETWCGR3:
21810 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21811 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21812 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21813 : CODE_FOR_iwmmxt_getwcgr3);
21814 tmode = insn_data[icode].operand[0].mode;
21816 || GET_MODE (target) != tmode
21817 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21818 target = gen_reg_rtx (tmode);
21819 pat = GEN_FCN (icode) (target);
21825 case ARM_BUILTIN_WSHUFH:
21826 icode = CODE_FOR_iwmmxt_wshufh;
21827 arg0 = CALL_EXPR_ARG (exp, 0);
21828 arg1 = CALL_EXPR_ARG (exp, 1);
21829 op0 = expand_normal (arg0);
21830 op1 = expand_normal (arg1);
21831 tmode = insn_data[icode].operand[0].mode;
21832 mode1 = insn_data[icode].operand[1].mode;
21833 mode2 = insn_data[icode].operand[2].mode;
21835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21836 op0 = copy_to_mode_reg (mode1, op0);
21837 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21839 error ("mask must be an immediate");
21842 selector = INTVAL (op1);
21843 if (selector < 0 || selector > 255)
21844 error ("the range of mask should be in 0 to 255");
21846 || GET_MODE (target) != tmode
21847 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21848 target = gen_reg_rtx (tmode);
21849 pat = GEN_FCN (icode) (target, op0, op1);
21855 case ARM_BUILTIN_WMADDS:
21856 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21857 case ARM_BUILTIN_WMADDSX:
21858 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21859 case ARM_BUILTIN_WMADDSN:
21860 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21861 case ARM_BUILTIN_WMADDU:
21862 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21863 case ARM_BUILTIN_WMADDUX:
21864 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21865 case ARM_BUILTIN_WMADDUN:
21866 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21867 case ARM_BUILTIN_WSADBZ:
21868 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21869 case ARM_BUILTIN_WSADHZ:
21870 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21872 /* Several three-argument builtins. */
21873 case ARM_BUILTIN_WMACS:
21874 case ARM_BUILTIN_WMACU:
21875 case ARM_BUILTIN_TMIA:
21876 case ARM_BUILTIN_TMIAPH:
21877 case ARM_BUILTIN_TMIATT:
21878 case ARM_BUILTIN_TMIATB:
21879 case ARM_BUILTIN_TMIABT:
21880 case ARM_BUILTIN_TMIABB:
21881 case ARM_BUILTIN_WQMIABB:
21882 case ARM_BUILTIN_WQMIABT:
21883 case ARM_BUILTIN_WQMIATB:
21884 case ARM_BUILTIN_WQMIATT:
21885 case ARM_BUILTIN_WQMIABBN:
21886 case ARM_BUILTIN_WQMIABTN:
21887 case ARM_BUILTIN_WQMIATBN:
21888 case ARM_BUILTIN_WQMIATTN:
21889 case ARM_BUILTIN_WMIABB:
21890 case ARM_BUILTIN_WMIABT:
21891 case ARM_BUILTIN_WMIATB:
21892 case ARM_BUILTIN_WMIATT:
21893 case ARM_BUILTIN_WMIABBN:
21894 case ARM_BUILTIN_WMIABTN:
21895 case ARM_BUILTIN_WMIATBN:
21896 case ARM_BUILTIN_WMIATTN:
21897 case ARM_BUILTIN_WMIAWBB:
21898 case ARM_BUILTIN_WMIAWBT:
21899 case ARM_BUILTIN_WMIAWTB:
21900 case ARM_BUILTIN_WMIAWTT:
21901 case ARM_BUILTIN_WMIAWBBN:
21902 case ARM_BUILTIN_WMIAWBTN:
21903 case ARM_BUILTIN_WMIAWTBN:
21904 case ARM_BUILTIN_WMIAWTTN:
21905 case ARM_BUILTIN_WSADB:
21906 case ARM_BUILTIN_WSADH:
21907 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21908 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21909 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21910 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21911 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21912 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21913 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21914 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21915 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21916 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21917 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21918 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21919 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21920 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21921 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21922 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21923 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21924 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21925 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21926 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21927 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21928 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21929 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21930 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21931 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21932 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21933 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21934 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21935 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21936 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21937 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21938 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21939 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21940 : CODE_FOR_iwmmxt_wsadh);
21941 arg0 = CALL_EXPR_ARG (exp, 0);
21942 arg1 = CALL_EXPR_ARG (exp, 1);
21943 arg2 = CALL_EXPR_ARG (exp, 2);
21944 op0 = expand_normal (arg0);
21945 op1 = expand_normal (arg1);
21946 op2 = expand_normal (arg2);
21947 tmode = insn_data[icode].operand[0].mode;
21948 mode0 = insn_data[icode].operand[1].mode;
21949 mode1 = insn_data[icode].operand[2].mode;
21950 mode2 = insn_data[icode].operand[3].mode;
21952 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21953 op0 = copy_to_mode_reg (mode0, op0);
21954 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21955 op1 = copy_to_mode_reg (mode1, op1);
21956 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21957 op2 = copy_to_mode_reg (mode2, op2);
21959 || GET_MODE (target) != tmode
21960 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21961 target = gen_reg_rtx (tmode);
21962 pat = GEN_FCN (icode) (target, op0, op1, op2);
21968 case ARM_BUILTIN_WZERO:
21969 target = gen_reg_rtx (DImode);
21970 emit_insn (gen_iwmmxt_clrdi (target));
21973 case ARM_BUILTIN_WSRLHI:
21974 case ARM_BUILTIN_WSRLWI:
21975 case ARM_BUILTIN_WSRLDI:
21976 case ARM_BUILTIN_WSLLHI:
21977 case ARM_BUILTIN_WSLLWI:
21978 case ARM_BUILTIN_WSLLDI:
21979 case ARM_BUILTIN_WSRAHI:
21980 case ARM_BUILTIN_WSRAWI:
21981 case ARM_BUILTIN_WSRADI:
21982 case ARM_BUILTIN_WRORHI:
21983 case ARM_BUILTIN_WRORWI:
21984 case ARM_BUILTIN_WRORDI:
21985 case ARM_BUILTIN_WSRLH:
21986 case ARM_BUILTIN_WSRLW:
21987 case ARM_BUILTIN_WSRLD:
21988 case ARM_BUILTIN_WSLLH:
21989 case ARM_BUILTIN_WSLLW:
21990 case ARM_BUILTIN_WSLLD:
21991 case ARM_BUILTIN_WSRAH:
21992 case ARM_BUILTIN_WSRAW:
21993 case ARM_BUILTIN_WSRAD:
21994 case ARM_BUILTIN_WRORH:
21995 case ARM_BUILTIN_WRORW:
21996 case ARM_BUILTIN_WRORD:
21997 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21998 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21999 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
22000 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
22001 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
22002 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
22003 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
22004 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
22005 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
22006 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
22007 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
22008 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
22009 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
22010 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
22011 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
22012 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
22013 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
22014 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
22015 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
22016 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
22017 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
22018 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
22019 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
22020 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
22021 : CODE_FOR_nothing);
22022 arg1 = CALL_EXPR_ARG (exp, 1);
22023 op1 = expand_normal (arg1);
22024 if (GET_MODE (op1) == VOIDmode)
22026 imm = INTVAL (op1);
22027 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
22028 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
22029 && (imm < 0 || imm > 32))
22031 if (fcode == ARM_BUILTIN_WRORHI)
22032 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
22033 else if (fcode == ARM_BUILTIN_WRORWI)
22034 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
22035 else if (fcode == ARM_BUILTIN_WRORH)
22036 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
22038 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
22040 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
22041 && (imm < 0 || imm > 64))
22043 if (fcode == ARM_BUILTIN_WRORDI)
22044 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
22046 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
22050 if (fcode == ARM_BUILTIN_WSRLHI)
22051 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
22052 else if (fcode == ARM_BUILTIN_WSRLWI)
22053 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
22054 else if (fcode == ARM_BUILTIN_WSRLDI)
22055 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
22056 else if (fcode == ARM_BUILTIN_WSLLHI)
22057 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
22058 else if (fcode == ARM_BUILTIN_WSLLWI)
22059 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
22060 else if (fcode == ARM_BUILTIN_WSLLDI)
22061 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
22062 else if (fcode == ARM_BUILTIN_WSRAHI)
22063 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
22064 else if (fcode == ARM_BUILTIN_WSRAWI)
22065 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
22066 else if (fcode == ARM_BUILTIN_WSRADI)
22067 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
22068 else if (fcode == ARM_BUILTIN_WSRLH)
22069 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
22070 else if (fcode == ARM_BUILTIN_WSRLW)
22071 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
22072 else if (fcode == ARM_BUILTIN_WSRLD)
22073 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
22074 else if (fcode == ARM_BUILTIN_WSLLH)
22075 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
22076 else if (fcode == ARM_BUILTIN_WSLLW)
22077 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
22078 else if (fcode == ARM_BUILTIN_WSLLD)
22079 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
22080 else if (fcode == ARM_BUILTIN_WSRAH)
22081 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
22082 else if (fcode == ARM_BUILTIN_WSRAW)
22083 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
22085 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
22088 return arm_expand_binop_builtin (icode, exp, target);
22094 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
22095 if (d->code == (const enum arm_builtins) fcode)
22096 return arm_expand_binop_builtin (d->icode, exp, target);
22098 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
22099 if (d->code == (const enum arm_builtins) fcode)
22100 return arm_expand_unop_builtin (d->icode, exp, target, 0);
22102 /* @@@ Should really do something sensible here. */
22106 /* Return the number (counting from 0) of
22107 the least significant set bit in MASK. */
22110 number_of_first_bit_set (unsigned mask)
22112 return ctz_hwi (mask);
22115 /* Like emit_multi_reg_push, but allowing for a different set of
22116 registers to be described as saved. MASK is the set of registers
22117 to be saved; REAL_REGS is the set of registers to be described as
22118 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22121 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22123 unsigned long regno;
22124 rtx par[10], tmp, reg, insn;
22127 /* Build the parallel of the registers actually being stored. */
22128 for (i = 0; mask; ++i, mask &= mask - 1)
22130 regno = ctz_hwi (mask);
22131 reg = gen_rtx_REG (SImode, regno);
22134 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22136 tmp = gen_rtx_USE (VOIDmode, reg);
22141 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22142 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22143 tmp = gen_frame_mem (BLKmode, tmp);
22144 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
22147 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22148 insn = emit_insn (tmp);
22150 /* Always build the stack adjustment note for unwind info. */
22151 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22152 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
22155 /* Build the parallel of the registers recorded as saved for unwind. */
22156 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22158 regno = ctz_hwi (real_regs);
22159 reg = gen_rtx_REG (SImode, regno);
22161 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22162 tmp = gen_frame_mem (SImode, tmp);
22163 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
22164 RTX_FRAME_RELATED_P (tmp) = 1;
22172 RTX_FRAME_RELATED_P (par[0]) = 1;
22173 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22176 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22181 /* Emit code to push or pop registers to or from the stack. F is the
22182 assembly file. MASK is the registers to pop. */
22184 thumb_pop (FILE *f, unsigned long mask)
22187 int lo_mask = mask & 0xFF;
22188 int pushed_words = 0;
22192 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22194 /* Special case. Do not generate a POP PC statement here, do it in
22196 thumb_exit (f, -1);
22200 fprintf (f, "\tpop\t{");
22202 /* Look at the low registers first. */
22203 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22207 asm_fprintf (f, "%r", regno);
22209 if ((lo_mask & ~1) != 0)
22216 if (mask & (1 << PC_REGNUM))
22218 /* Catch popping the PC. */
22219 if (TARGET_INTERWORK || TARGET_BACKTRACE
22220 || crtl->calls_eh_return)
22222 /* The PC is never poped directly, instead
22223 it is popped into r3 and then BX is used. */
22224 fprintf (f, "}\n");
22226 thumb_exit (f, -1);
22235 asm_fprintf (f, "%r", PC_REGNUM);
22239 fprintf (f, "}\n");
22242 /* Generate code to return from a thumb function.
22243 If 'reg_containing_return_addr' is -1, then the return address is
22244 actually on the stack, at the stack pointer. */
22246 thumb_exit (FILE *f, int reg_containing_return_addr)
22248 unsigned regs_available_for_popping;
22249 unsigned regs_to_pop;
22251 unsigned available;
22255 int restore_a4 = FALSE;
22257 /* Compute the registers we need to pop. */
22261 if (reg_containing_return_addr == -1)
22263 regs_to_pop |= 1 << LR_REGNUM;
22267 if (TARGET_BACKTRACE)
22269 /* Restore the (ARM) frame pointer and stack pointer. */
22270 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22274 /* If there is nothing to pop then just emit the BX instruction and
22276 if (pops_needed == 0)
22278 if (crtl->calls_eh_return)
22279 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22281 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22284 /* Otherwise if we are not supporting interworking and we have not created
22285 a backtrace structure and the function was not entered in ARM mode then
22286 just pop the return address straight into the PC. */
22287 else if (!TARGET_INTERWORK
22288 && !TARGET_BACKTRACE
22289 && !is_called_in_ARM_mode (current_function_decl)
22290 && !crtl->calls_eh_return)
22292 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
22296 /* Find out how many of the (return) argument registers we can corrupt. */
22297 regs_available_for_popping = 0;
22299 /* If returning via __builtin_eh_return, the bottom three registers
22300 all contain information needed for the return. */
22301 if (crtl->calls_eh_return)
22305 /* If we can deduce the registers used from the function's
22306 return value. This is more reliable that examining
22307 df_regs_ever_live_p () because that will be set if the register is
22308 ever used in the function, not just if the register is used
22309 to hold a return value. */
22311 if (crtl->return_rtx != 0)
22312 mode = GET_MODE (crtl->return_rtx);
22314 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22316 size = GET_MODE_SIZE (mode);
22320 /* In a void function we can use any argument register.
22321 In a function that returns a structure on the stack
22322 we can use the second and third argument registers. */
22323 if (mode == VOIDmode)
22324 regs_available_for_popping =
22325 (1 << ARG_REGISTER (1))
22326 | (1 << ARG_REGISTER (2))
22327 | (1 << ARG_REGISTER (3));
22329 regs_available_for_popping =
22330 (1 << ARG_REGISTER (2))
22331 | (1 << ARG_REGISTER (3));
22333 else if (size <= 4)
22334 regs_available_for_popping =
22335 (1 << ARG_REGISTER (2))
22336 | (1 << ARG_REGISTER (3));
22337 else if (size <= 8)
22338 regs_available_for_popping =
22339 (1 << ARG_REGISTER (3));
22342 /* Match registers to be popped with registers into which we pop them. */
22343 for (available = regs_available_for_popping,
22344 required = regs_to_pop;
22345 required != 0 && available != 0;
22346 available &= ~(available & - available),
22347 required &= ~(required & - required))
22350 /* If we have any popping registers left over, remove them. */
22352 regs_available_for_popping &= ~available;
22354 /* Otherwise if we need another popping register we can use
22355 the fourth argument register. */
22356 else if (pops_needed)
22358 /* If we have not found any free argument registers and
22359 reg a4 contains the return address, we must move it. */
22360 if (regs_available_for_popping == 0
22361 && reg_containing_return_addr == LAST_ARG_REGNUM)
22363 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22364 reg_containing_return_addr = LR_REGNUM;
22366 else if (size > 12)
22368 /* Register a4 is being used to hold part of the return value,
22369 but we have dire need of a free, low register. */
22372 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
22375 if (reg_containing_return_addr != LAST_ARG_REGNUM)
22377 /* The fourth argument register is available. */
22378 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
22384 /* Pop as many registers as we can. */
22385 thumb_pop (f, regs_available_for_popping);
22387 /* Process the registers we popped. */
22388 if (reg_containing_return_addr == -1)
22390 /* The return address was popped into the lowest numbered register. */
22391 regs_to_pop &= ~(1 << LR_REGNUM);
22393 reg_containing_return_addr =
22394 number_of_first_bit_set (regs_available_for_popping);
22396 /* Remove this register for the mask of available registers, so that
22397 the return address will not be corrupted by further pops. */
22398 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
22401 /* If we popped other registers then handle them here. */
22402 if (regs_available_for_popping)
22406 /* Work out which register currently contains the frame pointer. */
22407 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
22409 /* Move it into the correct place. */
22410 asm_fprintf (f, "\tmov\t%r, %r\n",
22411 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
22413 /* (Temporarily) remove it from the mask of popped registers. */
22414 regs_available_for_popping &= ~(1 << frame_pointer);
22415 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
22417 if (regs_available_for_popping)
22421 /* We popped the stack pointer as well,
22422 find the register that contains it. */
22423 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22425 /* Move it into the stack register. */
22426 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22428 /* At this point we have popped all necessary registers, so
22429 do not worry about restoring regs_available_for_popping
22430 to its correct value:
22432 assert (pops_needed == 0)
22433 assert (regs_available_for_popping == (1 << frame_pointer))
22434 assert (regs_to_pop == (1 << STACK_POINTER)) */
22438 /* Since we have just move the popped value into the frame
22439 pointer, the popping register is available for reuse, and
22440 we know that we still have the stack pointer left to pop. */
22441 regs_available_for_popping |= (1 << frame_pointer);
22445 /* If we still have registers left on the stack, but we no longer have
22446 any registers into which we can pop them, then we must move the return
22447 address into the link register and make available the register that
22449 if (regs_available_for_popping == 0 && pops_needed > 0)
22451 regs_available_for_popping |= 1 << reg_containing_return_addr;
22453 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22454 reg_containing_return_addr);
22456 reg_containing_return_addr = LR_REGNUM;
22459 /* If we have registers left on the stack then pop some more.
22460 We know that at most we will want to pop FP and SP. */
22461 if (pops_needed > 0)
22466 thumb_pop (f, regs_available_for_popping);
22468 /* We have popped either FP or SP.
22469 Move whichever one it is into the correct register. */
22470 popped_into = number_of_first_bit_set (regs_available_for_popping);
22471 move_to = number_of_first_bit_set (regs_to_pop);
22473 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
22475 regs_to_pop &= ~(1 << move_to);
22480 /* If we still have not popped everything then we must have only
22481 had one register available to us and we are now popping the SP. */
22482 if (pops_needed > 0)
22486 thumb_pop (f, regs_available_for_popping);
22488 popped_into = number_of_first_bit_set (regs_available_for_popping);
22490 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
22492 assert (regs_to_pop == (1 << STACK_POINTER))
22493 assert (pops_needed == 1)
22497 /* If necessary restore the a4 register. */
22500 if (reg_containing_return_addr != LR_REGNUM)
22502 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22503 reg_containing_return_addr = LR_REGNUM;
22506 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
22509 if (crtl->calls_eh_return)
22510 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22512 /* Return to caller. */
22513 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22516 /* Scan INSN just before assembler is output for it.
22517 For Thumb-1, we track the status of the condition codes; this
22518 information is used in the cbranchsi4_insn pattern. */
22520 thumb1_final_prescan_insn (rtx insn)
22522 if (flag_print_asm_name)
22523 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
22524 INSN_ADDRESSES (INSN_UID (insn)));
22525 /* Don't overwrite the previous setter when we get to a cbranch. */
22526 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
22528 enum attr_conds conds;
22530 if (cfun->machine->thumb1_cc_insn)
22532 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
22533 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
22536 conds = get_attr_conds (insn);
22537 if (conds == CONDS_SET)
22539 rtx set = single_set (insn);
22540 cfun->machine->thumb1_cc_insn = insn;
22541 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
22542 cfun->machine->thumb1_cc_op1 = const0_rtx;
22543 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
22544 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
22546 rtx src1 = XEXP (SET_SRC (set), 1);
22547 if (src1 == const0_rtx)
22548 cfun->machine->thumb1_cc_mode = CCmode;
22550 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
22552 /* Record the src register operand instead of dest because
22553 cprop_hardreg pass propagates src. */
22554 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
22557 else if (conds != CONDS_NOCOND)
22558 cfun->machine->thumb1_cc_insn = NULL_RTX;
22563 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
22565 unsigned HOST_WIDE_INT mask = 0xff;
22568 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
22569 if (val == 0) /* XXX */
22572 for (i = 0; i < 25; i++)
22573 if ((val & (mask << i)) == val)
22579 /* Returns nonzero if the current function contains,
22580 or might contain a far jump. */
22582 thumb_far_jump_used_p (void)
22586 /* This test is only important for leaf functions. */
22587 /* assert (!leaf_function_p ()); */
22589 /* If we have already decided that far jumps may be used,
22590 do not bother checking again, and always return true even if
22591 it turns out that they are not being used. Once we have made
22592 the decision that far jumps are present (and that hence the link
22593 register will be pushed onto the stack) we cannot go back on it. */
22594 if (cfun->machine->far_jump_used)
22597 /* If this function is not being called from the prologue/epilogue
22598 generation code then it must be being called from the
22599 INITIAL_ELIMINATION_OFFSET macro. */
22600 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
22602 /* In this case we know that we are being asked about the elimination
22603 of the arg pointer register. If that register is not being used,
22604 then there are no arguments on the stack, and we do not have to
22605 worry that a far jump might force the prologue to push the link
22606 register, changing the stack offsets. In this case we can just
22607 return false, since the presence of far jumps in the function will
22608 not affect stack offsets.
22610 If the arg pointer is live (or if it was live, but has now been
22611 eliminated and so set to dead) then we do have to test to see if
22612 the function might contain a far jump. This test can lead to some
22613 false negatives, since before reload is completed, then length of
22614 branch instructions is not known, so gcc defaults to returning their
22615 longest length, which in turn sets the far jump attribute to true.
22617 A false negative will not result in bad code being generated, but it
22618 will result in a needless push and pop of the link register. We
22619 hope that this does not occur too often.
22621 If we need doubleword stack alignment this could affect the other
22622 elimination offsets so we can't risk getting it wrong. */
22623 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
22624 cfun->machine->arg_pointer_live = 1;
22625 else if (!cfun->machine->arg_pointer_live)
22629 /* Check to see if the function contains a branch
22630 insn with the far jump attribute set. */
22631 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22634 /* Ignore tablejump patterns. */
22635 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22636 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
22637 && get_attr_far_jump (insn) == FAR_JUMP_YES
22640 /* Record the fact that we have decided that
22641 the function does use far jumps. */
22642 cfun->machine->far_jump_used = 1;
22650 /* Return nonzero if FUNC must be entered in ARM mode. */
22652 is_called_in_ARM_mode (tree func)
22654 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
22656 /* Ignore the problem about functions whose address is taken. */
22657 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
22661 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
22667 /* Given the stack offsets and register mask in OFFSETS, decide how
22668 many additional registers to push instead of subtracting a constant
22669 from SP. For epilogues the principle is the same except we use pop.
22670 FOR_PROLOGUE indicates which we're generating. */
22672 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
22674 HOST_WIDE_INT amount;
22675 unsigned long live_regs_mask = offsets->saved_regs_mask;
22676 /* Extract a mask of the ones we can give to the Thumb's push/pop
22678 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
22679 /* Then count how many other high registers will need to be pushed. */
22680 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22681 int n_free, reg_base, size;
22683 if (!for_prologue && frame_pointer_needed)
22684 amount = offsets->locals_base - offsets->saved_regs;
22686 amount = offsets->outgoing_args - offsets->saved_regs;
22688 /* If the stack frame size is 512 exactly, we can save one load
22689 instruction, which should make this a win even when optimizing
22691 if (!optimize_size && amount != 512)
22694 /* Can't do this if there are high registers to push. */
22695 if (high_regs_pushed != 0)
22698 /* Shouldn't do it in the prologue if no registers would normally
22699 be pushed at all. In the epilogue, also allow it if we'll have
22700 a pop insn for the PC. */
22703 || TARGET_BACKTRACE
22704 || (live_regs_mask & 1 << LR_REGNUM) == 0
22705 || TARGET_INTERWORK
22706 || crtl->args.pretend_args_size != 0))
22709 /* Don't do this if thumb_expand_prologue wants to emit instructions
22710 between the push and the stack frame allocation. */
22712 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
22713 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
22720 size = arm_size_return_regs ();
22721 reg_base = ARM_NUM_INTS (size);
22722 live_regs_mask >>= reg_base;
22725 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
22726 && (for_prologue || call_used_regs[reg_base + n_free]))
22728 live_regs_mask >>= 1;
22734 gcc_assert (amount / 4 * 4 == amount);
22736 if (amount >= 512 && (amount - n_free * 4) < 512)
22737 return (amount - 508) / 4;
22738 if (amount <= n_free * 4)
22743 /* The bits which aren't usefully expanded as rtl. */
22745 thumb1_unexpanded_epilogue (void)
22747 arm_stack_offsets *offsets;
22749 unsigned long live_regs_mask = 0;
22750 int high_regs_pushed = 0;
22752 int had_to_push_lr;
22755 if (cfun->machine->return_used_this_function != 0)
22758 if (IS_NAKED (arm_current_func_type ()))
22761 offsets = arm_get_frame_offsets ();
22762 live_regs_mask = offsets->saved_regs_mask;
22763 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22765 /* If we can deduce the registers used from the function's return value.
22766 This is more reliable that examining df_regs_ever_live_p () because that
22767 will be set if the register is ever used in the function, not just if
22768 the register is used to hold a return value. */
22769 size = arm_size_return_regs ();
22771 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22774 unsigned long extra_mask = (1 << extra_pop) - 1;
22775 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22778 /* The prolog may have pushed some high registers to use as
22779 work registers. e.g. the testsuite file:
22780 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22781 compiles to produce:
22782 push {r4, r5, r6, r7, lr}
22786 as part of the prolog. We have to undo that pushing here. */
22788 if (high_regs_pushed)
22790 unsigned long mask = live_regs_mask & 0xff;
22793 /* The available low registers depend on the size of the value we are
22801 /* Oh dear! We have no low registers into which we can pop
22804 ("no low registers available for popping high registers");
22806 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22807 if (live_regs_mask & (1 << next_hi_reg))
22810 while (high_regs_pushed)
22812 /* Find lo register(s) into which the high register(s) can
22814 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22816 if (mask & (1 << regno))
22817 high_regs_pushed--;
22818 if (high_regs_pushed == 0)
22822 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22824 /* Pop the values into the low register(s). */
22825 thumb_pop (asm_out_file, mask);
22827 /* Move the value(s) into the high registers. */
22828 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22830 if (mask & (1 << regno))
22832 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22835 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22836 if (live_regs_mask & (1 << next_hi_reg))
22841 live_regs_mask &= ~0x0f00;
22844 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22845 live_regs_mask &= 0xff;
22847 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22849 /* Pop the return address into the PC. */
22850 if (had_to_push_lr)
22851 live_regs_mask |= 1 << PC_REGNUM;
22853 /* Either no argument registers were pushed or a backtrace
22854 structure was created which includes an adjusted stack
22855 pointer, so just pop everything. */
22856 if (live_regs_mask)
22857 thumb_pop (asm_out_file, live_regs_mask);
22859 /* We have either just popped the return address into the
22860 PC or it is was kept in LR for the entire function.
22861 Note that thumb_pop has already called thumb_exit if the
22862 PC was in the list. */
22863 if (!had_to_push_lr)
22864 thumb_exit (asm_out_file, LR_REGNUM);
22868 /* Pop everything but the return address. */
22869 if (live_regs_mask)
22870 thumb_pop (asm_out_file, live_regs_mask);
22872 if (had_to_push_lr)
22876 /* We have no free low regs, so save one. */
22877 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22881 /* Get the return address into a temporary register. */
22882 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22886 /* Move the return address to lr. */
22887 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22889 /* Restore the low register. */
22890 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22895 regno = LAST_ARG_REGNUM;
22900 /* Remove the argument registers that were pushed onto the stack. */
22901 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22902 SP_REGNUM, SP_REGNUM,
22903 crtl->args.pretend_args_size);
22905 thumb_exit (asm_out_file, regno);
22911 /* Functions to save and restore machine-specific function data. */
22912 static struct machine_function *
22913 arm_init_machine_status (void)
22915 struct machine_function *machine;
22916 machine = ggc_alloc_cleared_machine_function ();
22918 #if ARM_FT_UNKNOWN != 0
22919 machine->func_type = ARM_FT_UNKNOWN;
22924 /* Return an RTX indicating where the return address to the
22925 calling function can be found. */
22927 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22932 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22935 /* Do anything needed before RTL is emitted for each function. */
22937 arm_init_expanders (void)
22939 /* Arrange to initialize and mark the machine per-function status. */
22940 init_machine_status = arm_init_machine_status;
22942 /* This is to stop the combine pass optimizing away the alignment
22943 adjustment of va_arg. */
22944 /* ??? It is claimed that this should not be necessary. */
22946 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22950 /* Like arm_compute_initial_elimination offset. Simpler because there
22951 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22952 to point at the base of the local variables after static stack
22953 space for a function has been allocated. */
22956 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22958 arm_stack_offsets *offsets;
22960 offsets = arm_get_frame_offsets ();
22964 case ARG_POINTER_REGNUM:
22967 case STACK_POINTER_REGNUM:
22968 return offsets->outgoing_args - offsets->saved_args;
22970 case FRAME_POINTER_REGNUM:
22971 return offsets->soft_frame - offsets->saved_args;
22973 case ARM_HARD_FRAME_POINTER_REGNUM:
22974 return offsets->saved_regs - offsets->saved_args;
22976 case THUMB_HARD_FRAME_POINTER_REGNUM:
22977 return offsets->locals_base - offsets->saved_args;
22980 gcc_unreachable ();
22984 case FRAME_POINTER_REGNUM:
22987 case STACK_POINTER_REGNUM:
22988 return offsets->outgoing_args - offsets->soft_frame;
22990 case ARM_HARD_FRAME_POINTER_REGNUM:
22991 return offsets->saved_regs - offsets->soft_frame;
22993 case THUMB_HARD_FRAME_POINTER_REGNUM:
22994 return offsets->locals_base - offsets->soft_frame;
22997 gcc_unreachable ();
23002 gcc_unreachable ();
23006 /* Generate the function's prologue. */
23009 thumb1_expand_prologue (void)
23013 HOST_WIDE_INT amount;
23014 arm_stack_offsets *offsets;
23015 unsigned long func_type;
23017 unsigned long live_regs_mask;
23018 unsigned long l_mask;
23019 unsigned high_regs_pushed = 0;
23021 func_type = arm_current_func_type ();
23023 /* Naked functions don't have prologues. */
23024 if (IS_NAKED (func_type))
23027 if (IS_INTERRUPT (func_type))
23029 error ("interrupt Service Routines cannot be coded in Thumb mode");
23033 if (is_called_in_ARM_mode (current_function_decl))
23034 emit_insn (gen_prologue_thumb1_interwork ());
23036 offsets = arm_get_frame_offsets ();
23037 live_regs_mask = offsets->saved_regs_mask;
23039 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23040 l_mask = live_regs_mask & 0x40ff;
23041 /* Then count how many other high registers will need to be pushed. */
23042 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23044 if (crtl->args.pretend_args_size)
23046 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23048 if (cfun->machine->uses_anonymous_args)
23050 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23051 unsigned long mask;
23053 mask = 1ul << (LAST_ARG_REGNUM + 1);
23054 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23056 insn = thumb1_emit_multi_reg_push (mask, 0);
23060 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23061 stack_pointer_rtx, x));
23063 RTX_FRAME_RELATED_P (insn) = 1;
23066 if (TARGET_BACKTRACE)
23068 HOST_WIDE_INT offset = 0;
23069 unsigned work_register;
23070 rtx work_reg, x, arm_hfp_rtx;
23072 /* We have been asked to create a stack backtrace structure.
23073 The code looks like this:
23077 0 sub SP, #16 Reserve space for 4 registers.
23078 2 push {R7} Push low registers.
23079 4 add R7, SP, #20 Get the stack pointer before the push.
23080 6 str R7, [SP, #8] Store the stack pointer
23081 (before reserving the space).
23082 8 mov R7, PC Get hold of the start of this code + 12.
23083 10 str R7, [SP, #16] Store it.
23084 12 mov R7, FP Get hold of the current frame pointer.
23085 14 str R7, [SP, #4] Store it.
23086 16 mov R7, LR Get hold of the current return address.
23087 18 str R7, [SP, #12] Store it.
23088 20 add R7, SP, #16 Point at the start of the
23089 backtrace structure.
23090 22 mov FP, R7 Put this value into the frame pointer. */
23092 work_register = thumb_find_work_register (live_regs_mask);
23093 work_reg = gen_rtx_REG (SImode, work_register);
23094 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23096 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23097 stack_pointer_rtx, GEN_INT (-16)));
23098 RTX_FRAME_RELATED_P (insn) = 1;
23102 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23103 RTX_FRAME_RELATED_P (insn) = 1;
23105 offset = bit_count (l_mask) * UNITS_PER_WORD;
23108 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23109 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23111 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23112 x = gen_frame_mem (SImode, x);
23113 emit_move_insn (x, work_reg);
23115 /* Make sure that the instruction fetching the PC is in the right place
23116 to calculate "start of backtrace creation code + 12". */
23117 /* ??? The stores using the common WORK_REG ought to be enough to
23118 prevent the scheduler from doing anything weird. Failing that
23119 we could always move all of the following into an UNSPEC_VOLATILE. */
23122 x = gen_rtx_REG (SImode, PC_REGNUM);
23123 emit_move_insn (work_reg, x);
23125 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23126 x = gen_frame_mem (SImode, x);
23127 emit_move_insn (x, work_reg);
23129 emit_move_insn (work_reg, arm_hfp_rtx);
23131 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23132 x = gen_frame_mem (SImode, x);
23133 emit_move_insn (x, work_reg);
23137 emit_move_insn (work_reg, arm_hfp_rtx);
23139 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23140 x = gen_frame_mem (SImode, x);
23141 emit_move_insn (x, work_reg);
23143 x = gen_rtx_REG (SImode, PC_REGNUM);
23144 emit_move_insn (work_reg, x);
23146 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23147 x = gen_frame_mem (SImode, x);
23148 emit_move_insn (x, work_reg);
23151 x = gen_rtx_REG (SImode, LR_REGNUM);
23152 emit_move_insn (work_reg, x);
23154 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23155 x = gen_frame_mem (SImode, x);
23156 emit_move_insn (x, work_reg);
23158 x = GEN_INT (offset + 12);
23159 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23161 emit_move_insn (arm_hfp_rtx, work_reg);
23163 /* Optimization: If we are not pushing any low registers but we are going
23164 to push some high registers then delay our first push. This will just
23165 be a push of LR and we can combine it with the push of the first high
23167 else if ((l_mask & 0xff) != 0
23168 || (high_regs_pushed == 0 && l_mask))
23170 unsigned long mask = l_mask;
23171 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23172 insn = thumb1_emit_multi_reg_push (mask, mask);
23173 RTX_FRAME_RELATED_P (insn) = 1;
23176 if (high_regs_pushed)
23178 unsigned pushable_regs;
23179 unsigned next_hi_reg;
23180 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23181 : crtl->args.info.nregs;
23182 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23184 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23185 if (live_regs_mask & (1 << next_hi_reg))
23188 /* Here we need to mask out registers used for passing arguments
23189 even if they can be pushed. This is to avoid using them to stash the high
23190 registers. Such kind of stash may clobber the use of arguments. */
23191 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23193 if (pushable_regs == 0)
23194 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23196 while (high_regs_pushed > 0)
23198 unsigned long real_regs_mask = 0;
23200 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23202 if (pushable_regs & (1 << regno))
23204 emit_move_insn (gen_rtx_REG (SImode, regno),
23205 gen_rtx_REG (SImode, next_hi_reg));
23207 high_regs_pushed --;
23208 real_regs_mask |= (1 << next_hi_reg);
23210 if (high_regs_pushed)
23212 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23214 if (live_regs_mask & (1 << next_hi_reg))
23219 pushable_regs &= ~((1 << regno) - 1);
23225 /* If we had to find a work register and we have not yet
23226 saved the LR then add it to the list of regs to push. */
23227 if (l_mask == (1 << LR_REGNUM))
23229 pushable_regs |= l_mask;
23230 real_regs_mask |= l_mask;
23234 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23235 RTX_FRAME_RELATED_P (insn) = 1;
23239 /* Load the pic register before setting the frame pointer,
23240 so we can use r7 as a temporary work register. */
23241 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23242 arm_load_pic_register (live_regs_mask);
23244 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23245 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23246 stack_pointer_rtx);
23248 if (flag_stack_usage_info)
23249 current_function_static_stack_size
23250 = offsets->outgoing_args - offsets->saved_args;
23252 amount = offsets->outgoing_args - offsets->saved_regs;
23253 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23258 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23259 GEN_INT (- amount)));
23260 RTX_FRAME_RELATED_P (insn) = 1;
23266 /* The stack decrement is too big for an immediate value in a single
23267 insn. In theory we could issue multiple subtracts, but after
23268 three of them it becomes more space efficient to place the full
23269 value in the constant pool and load into a register. (Also the
23270 ARM debugger really likes to see only one stack decrement per
23271 function). So instead we look for a scratch register into which
23272 we can load the decrement, and then we subtract this from the
23273 stack pointer. Unfortunately on the thumb the only available
23274 scratch registers are the argument registers, and we cannot use
23275 these as they may hold arguments to the function. Instead we
23276 attempt to locate a call preserved register which is used by this
23277 function. If we can find one, then we know that it will have
23278 been pushed at the start of the prologue and so we can corrupt
23280 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
23281 if (live_regs_mask & (1 << regno))
23284 gcc_assert(regno <= LAST_LO_REGNUM);
23286 reg = gen_rtx_REG (SImode, regno);
23288 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
23290 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23291 stack_pointer_rtx, reg));
23293 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23294 plus_constant (Pmode, stack_pointer_rtx,
23296 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23297 RTX_FRAME_RELATED_P (insn) = 1;
23301 if (frame_pointer_needed)
23302 thumb_set_frame_pointer (offsets);
23304 /* If we are profiling, make sure no instructions are scheduled before
23305 the call to mcount. Similarly if the user has requested no
23306 scheduling in the prolog. Similarly if we want non-call exceptions
23307 using the EABI unwinder, to prevent faulting instructions from being
23308 swapped with a stack adjustment. */
23309 if (crtl->profile || !TARGET_SCHED_PROLOG
23310 || (arm_except_unwind_info (&global_options) == UI_TARGET
23311 && cfun->can_throw_non_call_exceptions))
23312 emit_insn (gen_blockage ());
23314 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
23315 if (live_regs_mask & 0xff)
23316 cfun->machine->lr_save_eliminated = 0;
23319 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23320 POP instruction can be generated. LR should be replaced by PC. All
23321 the checks required are already done by USE_RETURN_INSN (). Hence,
23322 all we really need to check here is if single register is to be
23323 returned, or multiple register return. */
23325 thumb2_expand_return (void)
23328 unsigned long saved_regs_mask;
23329 arm_stack_offsets *offsets;
23331 offsets = arm_get_frame_offsets ();
23332 saved_regs_mask = offsets->saved_regs_mask;
23334 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
23335 if (saved_regs_mask & (1 << i))
23338 if (saved_regs_mask)
23342 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23343 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
23344 rtx addr = gen_rtx_MEM (SImode,
23345 gen_rtx_POST_INC (SImode,
23346 stack_pointer_rtx));
23347 set_mem_alias_set (addr, get_frame_alias_set ());
23348 XVECEXP (par, 0, 0) = ret_rtx;
23349 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
23350 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
23351 emit_jump_insn (par);
23355 saved_regs_mask &= ~ (1 << LR_REGNUM);
23356 saved_regs_mask |= (1 << PC_REGNUM);
23357 arm_emit_multi_reg_pop (saved_regs_mask);
23362 emit_jump_insn (simple_return_rtx);
23367 thumb1_expand_epilogue (void)
23369 HOST_WIDE_INT amount;
23370 arm_stack_offsets *offsets;
23373 /* Naked functions don't have prologues. */
23374 if (IS_NAKED (arm_current_func_type ()))
23377 offsets = arm_get_frame_offsets ();
23378 amount = offsets->outgoing_args - offsets->saved_regs;
23380 if (frame_pointer_needed)
23382 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23383 amount = offsets->locals_base - offsets->saved_regs;
23385 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
23387 gcc_assert (amount >= 0);
23390 emit_insn (gen_blockage ());
23393 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23394 GEN_INT (amount)));
23397 /* r3 is always free in the epilogue. */
23398 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
23400 emit_insn (gen_movsi (reg, GEN_INT (amount)));
23401 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
23405 /* Emit a USE (stack_pointer_rtx), so that
23406 the stack adjustment will not be deleted. */
23407 emit_insn (gen_force_register_use (stack_pointer_rtx));
23409 if (crtl->profile || !TARGET_SCHED_PROLOG)
23410 emit_insn (gen_blockage ());
23412 /* Emit a clobber for each insn that will be restored in the epilogue,
23413 so that flow2 will get register lifetimes correct. */
23414 for (regno = 0; regno < 13; regno++)
23415 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
23416 emit_clobber (gen_rtx_REG (SImode, regno));
23418 if (! df_regs_ever_live_p (LR_REGNUM))
23419 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
23422 /* Epilogue code for APCS frame. */
23424 arm_expand_epilogue_apcs_frame (bool really_return)
23426 unsigned long func_type;
23427 unsigned long saved_regs_mask;
23430 int floats_from_frame = 0;
23431 arm_stack_offsets *offsets;
23433 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
23434 func_type = arm_current_func_type ();
23436 /* Get frame offsets for ARM. */
23437 offsets = arm_get_frame_offsets ();
23438 saved_regs_mask = offsets->saved_regs_mask;
23440 /* Find the offset of the floating-point save area in the frame. */
23441 floats_from_frame = offsets->saved_args - offsets->frame;
23443 /* Compute how many core registers saved and how far away the floats are. */
23444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23445 if (saved_regs_mask & (1 << i))
23448 floats_from_frame += 4;
23451 if (TARGET_HARD_FLOAT && TARGET_VFP)
23455 /* The offset is from IP_REGNUM. */
23456 int saved_size = arm_get_vfp_saved_size ();
23457 if (saved_size > 0)
23459 floats_from_frame += saved_size;
23460 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
23461 hard_frame_pointer_rtx,
23462 GEN_INT (-floats_from_frame)));
23465 /* Generate VFP register multi-pop. */
23466 start_reg = FIRST_VFP_REGNUM;
23468 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
23469 /* Look for a case where a reg does not need restoring. */
23470 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23471 && (!df_regs_ever_live_p (i + 1)
23472 || call_used_regs[i + 1]))
23474 if (start_reg != i)
23475 arm_emit_vfp_multi_reg_pop (start_reg,
23476 (i - start_reg) / 2,
23477 gen_rtx_REG (SImode,
23482 /* Restore the remaining regs that we have discovered (or possibly
23483 even all of them, if the conditional in the for loop never
23485 if (start_reg != i)
23486 arm_emit_vfp_multi_reg_pop (start_reg,
23487 (i - start_reg) / 2,
23488 gen_rtx_REG (SImode, IP_REGNUM));
23493 /* The frame pointer is guaranteed to be non-double-word aligned, as
23494 it is set to double-word-aligned old_stack_pointer - 4. */
23496 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
23498 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
23499 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23501 rtx addr = gen_frame_mem (V2SImode,
23502 plus_constant (Pmode, hard_frame_pointer_rtx,
23504 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23505 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23506 gen_rtx_REG (V2SImode, i),
23512 /* saved_regs_mask should contain IP which contains old stack pointer
23513 at the time of activation creation. Since SP and IP are adjacent registers,
23514 we can restore the value directly into SP. */
23515 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
23516 saved_regs_mask &= ~(1 << IP_REGNUM);
23517 saved_regs_mask |= (1 << SP_REGNUM);
23519 /* There are two registers left in saved_regs_mask - LR and PC. We
23520 only need to restore LR (the return address), but to
23521 save time we can load it directly into PC, unless we need a
23522 special function exit sequence, or we are not really returning. */
23524 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
23525 && !crtl->calls_eh_return)
23526 /* Delete LR from the register mask, so that LR on
23527 the stack is loaded into the PC in the register mask. */
23528 saved_regs_mask &= ~(1 << LR_REGNUM);
23530 saved_regs_mask &= ~(1 << PC_REGNUM);
23532 num_regs = bit_count (saved_regs_mask);
23533 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
23535 /* Unwind the stack to just below the saved registers. */
23536 emit_insn (gen_addsi3 (stack_pointer_rtx,
23537 hard_frame_pointer_rtx,
23538 GEN_INT (- 4 * num_regs)));
23541 arm_emit_multi_reg_pop (saved_regs_mask);
23543 if (IS_INTERRUPT (func_type))
23545 /* Interrupt handlers will have pushed the
23546 IP onto the stack, so restore it now. */
23548 rtx addr = gen_rtx_MEM (SImode,
23549 gen_rtx_POST_INC (SImode,
23550 stack_pointer_rtx));
23551 set_mem_alias_set (addr, get_frame_alias_set ());
23552 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
23553 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23554 gen_rtx_REG (SImode, IP_REGNUM),
23558 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
23561 if (crtl->calls_eh_return)
23562 emit_insn (gen_addsi3 (stack_pointer_rtx,
23564 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
23566 if (IS_STACKALIGN (func_type))
23567 /* Restore the original stack pointer. Before prologue, the stack was
23568 realigned and the original stack pointer saved in r0. For details,
23569 see comment in arm_expand_prologue. */
23570 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23572 emit_jump_insn (simple_return_rtx);
23575 /* Generate RTL to represent ARM epilogue. Really_return is true if the
23576 function is not a sibcall. */
23578 arm_expand_epilogue (bool really_return)
23580 unsigned long func_type;
23581 unsigned long saved_regs_mask;
23585 arm_stack_offsets *offsets;
23587 func_type = arm_current_func_type ();
23589 /* Naked functions don't have epilogue. Hence, generate return pattern, and
23590 let output_return_instruction take care of instruction emition if any. */
23591 if (IS_NAKED (func_type)
23592 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
23595 emit_jump_insn (simple_return_rtx);
23599 /* If we are throwing an exception, then we really must be doing a
23600 return, so we can't tail-call. */
23601 gcc_assert (!crtl->calls_eh_return || really_return);
23603 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23605 arm_expand_epilogue_apcs_frame (really_return);
23609 /* Get frame offsets for ARM. */
23610 offsets = arm_get_frame_offsets ();
23611 saved_regs_mask = offsets->saved_regs_mask;
23612 num_regs = bit_count (saved_regs_mask);
23614 if (frame_pointer_needed)
23616 /* Restore stack pointer if necessary. */
23619 /* In ARM mode, frame pointer points to first saved register.
23620 Restore stack pointer to last saved register. */
23621 amount = offsets->frame - offsets->saved_regs;
23623 /* Force out any pending memory operations that reference stacked data
23624 before stack de-allocation occurs. */
23625 emit_insn (gen_blockage ());
23626 emit_insn (gen_addsi3 (stack_pointer_rtx,
23627 hard_frame_pointer_rtx,
23628 GEN_INT (amount)));
23630 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23632 emit_insn (gen_force_register_use (stack_pointer_rtx));
23636 /* In Thumb-2 mode, the frame pointer points to the last saved
23638 amount = offsets->locals_base - offsets->saved_regs;
23640 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23641 hard_frame_pointer_rtx,
23642 GEN_INT (amount)));
23644 /* Force out any pending memory operations that reference stacked data
23645 before stack de-allocation occurs. */
23646 emit_insn (gen_blockage ());
23647 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23648 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23650 emit_insn (gen_force_register_use (stack_pointer_rtx));
23655 /* Pop off outgoing args and local frame to adjust stack pointer to
23656 last saved register. */
23657 amount = offsets->outgoing_args - offsets->saved_regs;
23660 /* Force out any pending memory operations that reference stacked data
23661 before stack de-allocation occurs. */
23662 emit_insn (gen_blockage ());
23663 emit_insn (gen_addsi3 (stack_pointer_rtx,
23665 GEN_INT (amount)));
23666 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23668 emit_insn (gen_force_register_use (stack_pointer_rtx));
23672 if (TARGET_HARD_FLOAT && TARGET_VFP)
23674 /* Generate VFP register multi-pop. */
23675 int end_reg = LAST_VFP_REGNUM + 1;
23677 /* Scan the registers in reverse order. We need to match
23678 any groupings made in the prologue and generate matching
23679 vldm operations. The need to match groups is because,
23680 unlike pop, vldm can only do consecutive regs. */
23681 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
23682 /* Look for a case where a reg does not need restoring. */
23683 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23684 && (!df_regs_ever_live_p (i + 1)
23685 || call_used_regs[i + 1]))
23687 /* Restore the regs discovered so far (from reg+2 to
23689 if (end_reg > i + 2)
23690 arm_emit_vfp_multi_reg_pop (i + 2,
23691 (end_reg - (i + 2)) / 2,
23692 stack_pointer_rtx);
23696 /* Restore the remaining regs that we have discovered (or possibly
23697 even all of them, if the conditional in the for loop never
23699 if (end_reg > i + 2)
23700 arm_emit_vfp_multi_reg_pop (i + 2,
23701 (end_reg - (i + 2)) / 2,
23702 stack_pointer_rtx);
23706 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
23707 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23710 rtx addr = gen_rtx_MEM (V2SImode,
23711 gen_rtx_POST_INC (SImode,
23712 stack_pointer_rtx));
23713 set_mem_alias_set (addr, get_frame_alias_set ());
23714 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23715 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23716 gen_rtx_REG (V2SImode, i),
23720 if (saved_regs_mask)
23723 bool return_in_pc = false;
23725 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
23726 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
23727 && !IS_STACKALIGN (func_type)
23729 && crtl->args.pretend_args_size == 0
23730 && saved_regs_mask & (1 << LR_REGNUM)
23731 && !crtl->calls_eh_return)
23733 saved_regs_mask &= ~(1 << LR_REGNUM);
23734 saved_regs_mask |= (1 << PC_REGNUM);
23735 return_in_pc = true;
23738 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
23740 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23741 if (saved_regs_mask & (1 << i))
23743 rtx addr = gen_rtx_MEM (SImode,
23744 gen_rtx_POST_INC (SImode,
23745 stack_pointer_rtx));
23746 set_mem_alias_set (addr, get_frame_alias_set ());
23748 if (i == PC_REGNUM)
23750 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23751 XVECEXP (insn, 0, 0) = ret_rtx;
23752 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
23753 gen_rtx_REG (SImode, i),
23755 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
23756 insn = emit_jump_insn (insn);
23760 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
23762 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23763 gen_rtx_REG (SImode, i),
23770 if (current_tune->prefer_ldrd_strd
23771 && !optimize_function_for_size_p (cfun))
23774 thumb2_emit_ldrd_pop (saved_regs_mask);
23776 arm_emit_multi_reg_pop (saved_regs_mask);
23779 arm_emit_multi_reg_pop (saved_regs_mask);
23782 if (return_in_pc == true)
23786 if (crtl->args.pretend_args_size)
23787 emit_insn (gen_addsi3 (stack_pointer_rtx,
23789 GEN_INT (crtl->args.pretend_args_size)));
23791 if (!really_return)
23794 if (crtl->calls_eh_return)
23795 emit_insn (gen_addsi3 (stack_pointer_rtx,
23797 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23799 if (IS_STACKALIGN (func_type))
23800 /* Restore the original stack pointer. Before prologue, the stack was
23801 realigned and the original stack pointer saved in r0. For details,
23802 see comment in arm_expand_prologue. */
23803 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23805 emit_jump_insn (simple_return_rtx);
23808 /* Implementation of insn prologue_thumb1_interwork. This is the first
23809 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23812 thumb1_output_interwork (void)
23815 FILE *f = asm_out_file;
23817 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23818 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23820 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23822 /* Generate code sequence to switch us into Thumb mode. */
23823 /* The .code 32 directive has already been emitted by
23824 ASM_DECLARE_FUNCTION_NAME. */
23825 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23826 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23828 /* Generate a label, so that the debugger will notice the
23829 change in instruction sets. This label is also used by
23830 the assembler to bypass the ARM code when this function
23831 is called from a Thumb encoded function elsewhere in the
23832 same file. Hence the definition of STUB_NAME here must
23833 agree with the definition in gas/config/tc-arm.c. */
23835 #define STUB_NAME ".real_start_of"
23837 fprintf (f, "\t.code\t16\n");
23839 if (arm_dllexport_name_p (name))
23840 name = arm_strip_name_encoding (name);
23842 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23843 fprintf (f, "\t.thumb_func\n");
23844 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23849 /* Handle the case of a double word load into a low register from
23850 a computed memory address. The computed address may involve a
23851 register which is overwritten by the load. */
23853 thumb_load_double_from_address (rtx *operands)
23861 gcc_assert (REG_P (operands[0]));
23862 gcc_assert (MEM_P (operands[1]));
23864 /* Get the memory address. */
23865 addr = XEXP (operands[1], 0);
23867 /* Work out how the memory address is computed. */
23868 switch (GET_CODE (addr))
23871 operands[2] = adjust_address (operands[1], SImode, 4);
23873 if (REGNO (operands[0]) == REGNO (addr))
23875 output_asm_insn ("ldr\t%H0, %2", operands);
23876 output_asm_insn ("ldr\t%0, %1", operands);
23880 output_asm_insn ("ldr\t%0, %1", operands);
23881 output_asm_insn ("ldr\t%H0, %2", operands);
23886 /* Compute <address> + 4 for the high order load. */
23887 operands[2] = adjust_address (operands[1], SImode, 4);
23889 output_asm_insn ("ldr\t%0, %1", operands);
23890 output_asm_insn ("ldr\t%H0, %2", operands);
23894 arg1 = XEXP (addr, 0);
23895 arg2 = XEXP (addr, 1);
23897 if (CONSTANT_P (arg1))
23898 base = arg2, offset = arg1;
23900 base = arg1, offset = arg2;
23902 gcc_assert (REG_P (base));
23904 /* Catch the case of <address> = <reg> + <reg> */
23905 if (REG_P (offset))
23907 int reg_offset = REGNO (offset);
23908 int reg_base = REGNO (base);
23909 int reg_dest = REGNO (operands[0]);
23911 /* Add the base and offset registers together into the
23912 higher destination register. */
23913 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23914 reg_dest + 1, reg_base, reg_offset);
23916 /* Load the lower destination register from the address in
23917 the higher destination register. */
23918 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23919 reg_dest, reg_dest + 1);
23921 /* Load the higher destination register from its own address
23923 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23924 reg_dest + 1, reg_dest + 1);
23928 /* Compute <address> + 4 for the high order load. */
23929 operands[2] = adjust_address (operands[1], SImode, 4);
23931 /* If the computed address is held in the low order register
23932 then load the high order register first, otherwise always
23933 load the low order register first. */
23934 if (REGNO (operands[0]) == REGNO (base))
23936 output_asm_insn ("ldr\t%H0, %2", operands);
23937 output_asm_insn ("ldr\t%0, %1", operands);
23941 output_asm_insn ("ldr\t%0, %1", operands);
23942 output_asm_insn ("ldr\t%H0, %2", operands);
23948 /* With no registers to worry about we can just load the value
23950 operands[2] = adjust_address (operands[1], SImode, 4);
23952 output_asm_insn ("ldr\t%H0, %2", operands);
23953 output_asm_insn ("ldr\t%0, %1", operands);
23957 gcc_unreachable ();
23964 thumb_output_move_mem_multiple (int n, rtx *operands)
23971 if (REGNO (operands[4]) > REGNO (operands[5]))
23974 operands[4] = operands[5];
23977 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23978 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23982 if (REGNO (operands[4]) > REGNO (operands[5]))
23985 operands[4] = operands[5];
23988 if (REGNO (operands[5]) > REGNO (operands[6]))
23991 operands[5] = operands[6];
23994 if (REGNO (operands[4]) > REGNO (operands[5]))
23997 operands[4] = operands[5];
24001 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24002 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24006 gcc_unreachable ();
24012 /* Output a call-via instruction for thumb state. */
24014 thumb_call_via_reg (rtx reg)
24016 int regno = REGNO (reg);
24019 gcc_assert (regno < LR_REGNUM);
24021 /* If we are in the normal text section we can use a single instance
24022 per compilation unit. If we are doing function sections, then we need
24023 an entry per section, since we can't rely on reachability. */
24024 if (in_section == text_section)
24026 thumb_call_reg_needed = 1;
24028 if (thumb_call_via_label[regno] == NULL)
24029 thumb_call_via_label[regno] = gen_label_rtx ();
24030 labelp = thumb_call_via_label + regno;
24034 if (cfun->machine->call_via[regno] == NULL)
24035 cfun->machine->call_via[regno] = gen_label_rtx ();
24036 labelp = cfun->machine->call_via + regno;
24039 output_asm_insn ("bl\t%a0", labelp);
24043 /* Routines for generating rtl. */
24045 thumb_expand_movmemqi (rtx *operands)
24047 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24048 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24049 HOST_WIDE_INT len = INTVAL (operands[2]);
24050 HOST_WIDE_INT offset = 0;
24054 emit_insn (gen_movmem12b (out, in, out, in));
24060 emit_insn (gen_movmem8b (out, in, out, in));
24066 rtx reg = gen_reg_rtx (SImode);
24067 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24068 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24075 rtx reg = gen_reg_rtx (HImode);
24076 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24077 plus_constant (Pmode, in,
24079 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24088 rtx reg = gen_reg_rtx (QImode);
24089 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24090 plus_constant (Pmode, in,
24092 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24099 thumb_reload_out_hi (rtx *operands)
24101 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24104 /* Handle reading a half-word from memory during reload. */
24106 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
24108 gcc_unreachable ();
24111 /* Return the length of a function name prefix
24112 that starts with the character 'c'. */
24114 arm_get_strip_length (int c)
24118 ARM_NAME_ENCODING_LENGTHS
24123 /* Return a pointer to a function's name with any
24124 and all prefix encodings stripped from it. */
24126 arm_strip_name_encoding (const char *name)
24130 while ((skip = arm_get_strip_length (* name)))
24136 /* If there is a '*' anywhere in the name's prefix, then
24137 emit the stripped name verbatim, otherwise prepend an
24138 underscore if leading underscores are being used. */
24140 arm_asm_output_labelref (FILE *stream, const char *name)
24145 while ((skip = arm_get_strip_length (* name)))
24147 verbatim |= (*name == '*');
24152 fputs (name, stream);
24154 asm_fprintf (stream, "%U%s", name);
24157 /* This function is used to emit an EABI tag and its associated value.
24158 We emit the numerical value of the tag in case the assembler does not
24159 support textual tags. (Eg gas prior to 2.20). If requested we include
24160 the tag name in a comment so that anyone reading the assembler output
24161 will know which tag is being set.
24163 This function is not static because arm-c.c needs it too. */
24166 arm_emit_eabi_attribute (const char *name, int num, int val)
24168 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24169 if (flag_verbose_asm || flag_debug_asm)
24170 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24171 asm_fprintf (asm_out_file, "\n");
24175 arm_file_start (void)
24179 if (TARGET_UNIFIED_ASM)
24180 asm_fprintf (asm_out_file, "\t.syntax unified\n");
24184 const char *fpu_name;
24185 if (arm_selected_arch)
24186 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24187 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24188 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24190 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
24192 if (TARGET_SOFT_FLOAT)
24194 fpu_name = "softvfp";
24198 fpu_name = arm_fpu_desc->name;
24199 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
24201 if (TARGET_HARD_FLOAT)
24202 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
24203 if (TARGET_HARD_FLOAT_ABI)
24204 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24207 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
24209 /* Some of these attributes only apply when the corresponding features
24210 are used. However we don't have any easy way of figuring this out.
24211 Conservatively record the setting that would have been used. */
24213 if (flag_rounding_math)
24214 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24216 if (!flag_unsafe_math_optimizations)
24218 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24219 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24221 if (flag_signaling_nans)
24222 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24224 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24225 flag_finite_math_only ? 1 : 3);
24227 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24228 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24229 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24230 flag_short_enums ? 1 : 2);
24232 /* Tag_ABI_optimization_goals. */
24235 else if (optimize >= 2)
24241 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
24243 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24246 if (arm_fp16_format)
24247 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24248 (int) arm_fp16_format);
24250 if (arm_lang_output_object_attributes_hook)
24251 arm_lang_output_object_attributes_hook();
24254 default_file_start ();
24258 arm_file_end (void)
24262 if (NEED_INDICATE_EXEC_STACK)
24263 /* Add .note.GNU-stack. */
24264 file_end_indicate_exec_stack ();
24266 if (! thumb_call_reg_needed)
24269 switch_to_section (text_section);
24270 asm_fprintf (asm_out_file, "\t.code 16\n");
24271 ASM_OUTPUT_ALIGN (asm_out_file, 1);
24273 for (regno = 0; regno < LR_REGNUM; regno++)
24275 rtx label = thumb_call_via_label[regno];
24279 targetm.asm_out.internal_label (asm_out_file, "L",
24280 CODE_LABEL_NUMBER (label));
24281 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
24287 /* Symbols in the text segment can be accessed without indirecting via the
24288 constant pool; it may take an extra binary operation, but this is still
24289 faster than indirecting via memory. Don't do this when not optimizing,
24290 since we won't be calculating al of the offsets necessary to do this
24294 arm_encode_section_info (tree decl, rtx rtl, int first)
24296 if (optimize > 0 && TREE_CONSTANT (decl))
24297 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
24299 default_encode_section_info (decl, rtl, first);
24301 #endif /* !ARM_PE */
24304 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
24306 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
24307 && !strcmp (prefix, "L"))
24309 arm_ccfsm_state = 0;
24310 arm_target_insn = NULL;
24312 default_internal_label (stream, prefix, labelno);
24315 /* Output code to add DELTA to the first argument, and then jump
24316 to FUNCTION. Used for C++ multiple inheritance. */
24318 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
24319 HOST_WIDE_INT delta,
24320 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
24323 static int thunk_label = 0;
24326 int mi_delta = delta;
24327 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
24329 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
24332 mi_delta = - mi_delta;
24334 final_start_function (emit_barrier (), file, 1);
24338 int labelno = thunk_label++;
24339 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
24340 /* Thunks are entered in arm mode when avaiable. */
24341 if (TARGET_THUMB1_ONLY)
24343 /* push r3 so we can use it as a temporary. */
24344 /* TODO: Omit this save if r3 is not used. */
24345 fputs ("\tpush {r3}\n", file);
24346 fputs ("\tldr\tr3, ", file);
24350 fputs ("\tldr\tr12, ", file);
24352 assemble_name (file, label);
24353 fputc ('\n', file);
24356 /* If we are generating PIC, the ldr instruction below loads
24357 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
24358 the address of the add + 8, so we have:
24360 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
24363 Note that we have "+ 1" because some versions of GNU ld
24364 don't set the low bit of the result for R_ARM_REL32
24365 relocations against thumb function symbols.
24366 On ARMv6M this is +4, not +8. */
24367 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
24368 assemble_name (file, labelpc);
24369 fputs (":\n", file);
24370 if (TARGET_THUMB1_ONLY)
24372 /* This is 2 insns after the start of the thunk, so we know it
24373 is 4-byte aligned. */
24374 fputs ("\tadd\tr3, pc, r3\n", file);
24375 fputs ("\tmov r12, r3\n", file);
24378 fputs ("\tadd\tr12, pc, r12\n", file);
24380 else if (TARGET_THUMB1_ONLY)
24381 fputs ("\tmov r12, r3\n", file);
24383 if (TARGET_THUMB1_ONLY)
24385 if (mi_delta > 255)
24387 fputs ("\tldr\tr3, ", file);
24388 assemble_name (file, label);
24389 fputs ("+4\n", file);
24390 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
24391 mi_op, this_regno, this_regno);
24393 else if (mi_delta != 0)
24395 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24396 mi_op, this_regno, this_regno,
24402 /* TODO: Use movw/movt for large constants when available. */
24403 while (mi_delta != 0)
24405 if ((mi_delta & (3 << shift)) == 0)
24409 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24410 mi_op, this_regno, this_regno,
24411 mi_delta & (0xff << shift));
24412 mi_delta &= ~(0xff << shift);
24419 if (TARGET_THUMB1_ONLY)
24420 fputs ("\tpop\t{r3}\n", file);
24422 fprintf (file, "\tbx\tr12\n");
24423 ASM_OUTPUT_ALIGN (file, 2);
24424 assemble_name (file, label);
24425 fputs (":\n", file);
24428 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
24429 rtx tem = XEXP (DECL_RTL (function), 0);
24430 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
24431 tem = gen_rtx_MINUS (GET_MODE (tem),
24433 gen_rtx_SYMBOL_REF (Pmode,
24434 ggc_strdup (labelpc)));
24435 assemble_integer (tem, 4, BITS_PER_WORD, 1);
24438 /* Output ".word .LTHUNKn". */
24439 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
24441 if (TARGET_THUMB1_ONLY && mi_delta > 255)
24442 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
24446 fputs ("\tb\t", file);
24447 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
24448 if (NEED_PLT_RELOC)
24449 fputs ("(PLT)", file);
24450 fputc ('\n', file);
24453 final_end_function ();
24457 arm_emit_vector_const (FILE *file, rtx x)
24460 const char * pattern;
24462 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24464 switch (GET_MODE (x))
24466 case V2SImode: pattern = "%08x"; break;
24467 case V4HImode: pattern = "%04x"; break;
24468 case V8QImode: pattern = "%02x"; break;
24469 default: gcc_unreachable ();
24472 fprintf (file, "0x");
24473 for (i = CONST_VECTOR_NUNITS (x); i--;)
24477 element = CONST_VECTOR_ELT (x, i);
24478 fprintf (file, pattern, INTVAL (element));
24484 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
24485 HFmode constant pool entries are actually loaded with ldr. */
24487 arm_emit_fp16_const (rtx c)
24492 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
24493 bits = real_to_target (NULL, &r, HFmode);
24494 if (WORDS_BIG_ENDIAN)
24495 assemble_zeros (2);
24496 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
24497 if (!WORDS_BIG_ENDIAN)
24498 assemble_zeros (2);
24502 arm_output_load_gr (rtx *operands)
24509 if (!MEM_P (operands [1])
24510 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
24511 || !REG_P (reg = XEXP (sum, 0))
24512 || !CONST_INT_P (offset = XEXP (sum, 1))
24513 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
24514 return "wldrw%?\t%0, %1";
24516 /* Fix up an out-of-range load of a GR register. */
24517 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
24518 wcgr = operands[0];
24520 output_asm_insn ("ldr%?\t%0, %1", operands);
24522 operands[0] = wcgr;
24524 output_asm_insn ("tmcr%?\t%0, %1", operands);
24525 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
24530 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
24532 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
24533 named arg and all anonymous args onto the stack.
24534 XXX I know the prologue shouldn't be pushing registers, but it is faster
24538 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
24539 enum machine_mode mode,
24542 int second_time ATTRIBUTE_UNUSED)
24544 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
24547 cfun->machine->uses_anonymous_args = 1;
24548 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
24550 nregs = pcum->aapcs_ncrn;
24551 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
24555 nregs = pcum->nregs;
24557 if (nregs < NUM_ARG_REGS)
24558 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
24561 /* Return nonzero if the CONSUMER instruction (a store) does not need
24562 PRODUCER's value to calculate the address. */
24565 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
24567 rtx value = PATTERN (producer);
24568 rtx addr = PATTERN (consumer);
24570 if (GET_CODE (value) == COND_EXEC)
24571 value = COND_EXEC_CODE (value);
24572 if (GET_CODE (value) == PARALLEL)
24573 value = XVECEXP (value, 0, 0);
24574 value = XEXP (value, 0);
24575 if (GET_CODE (addr) == COND_EXEC)
24576 addr = COND_EXEC_CODE (addr);
24577 if (GET_CODE (addr) == PARALLEL)
24578 addr = XVECEXP (addr, 0, 0);
24579 addr = XEXP (addr, 0);
24581 return !reg_overlap_mentioned_p (value, addr);
24584 /* Return nonzero if the CONSUMER instruction (a store) does need
24585 PRODUCER's value to calculate the address. */
24588 arm_early_store_addr_dep (rtx producer, rtx consumer)
24590 return !arm_no_early_store_addr_dep (producer, consumer);
24593 /* Return nonzero if the CONSUMER instruction (a load) does need
24594 PRODUCER's value to calculate the address. */
24597 arm_early_load_addr_dep (rtx producer, rtx consumer)
24599 rtx value = PATTERN (producer);
24600 rtx addr = PATTERN (consumer);
24602 if (GET_CODE (value) == COND_EXEC)
24603 value = COND_EXEC_CODE (value);
24604 if (GET_CODE (value) == PARALLEL)
24605 value = XVECEXP (value, 0, 0);
24606 value = XEXP (value, 0);
24607 if (GET_CODE (addr) == COND_EXEC)
24608 addr = COND_EXEC_CODE (addr);
24609 if (GET_CODE (addr) == PARALLEL)
24611 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
24612 addr = XVECEXP (addr, 0, 1);
24614 addr = XVECEXP (addr, 0, 0);
24616 addr = XEXP (addr, 1);
24618 return reg_overlap_mentioned_p (value, addr);
24621 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24622 have an early register shift value or amount dependency on the
24623 result of PRODUCER. */
24626 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
24628 rtx value = PATTERN (producer);
24629 rtx op = PATTERN (consumer);
24632 if (GET_CODE (value) == COND_EXEC)
24633 value = COND_EXEC_CODE (value);
24634 if (GET_CODE (value) == PARALLEL)
24635 value = XVECEXP (value, 0, 0);
24636 value = XEXP (value, 0);
24637 if (GET_CODE (op) == COND_EXEC)
24638 op = COND_EXEC_CODE (op);
24639 if (GET_CODE (op) == PARALLEL)
24640 op = XVECEXP (op, 0, 0);
24643 early_op = XEXP (op, 0);
24644 /* This is either an actual independent shift, or a shift applied to
24645 the first operand of another operation. We want the whole shift
24647 if (REG_P (early_op))
24650 return !reg_overlap_mentioned_p (value, early_op);
24653 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24654 have an early register shift value dependency on the result of
24658 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
24660 rtx value = PATTERN (producer);
24661 rtx op = PATTERN (consumer);
24664 if (GET_CODE (value) == COND_EXEC)
24665 value = COND_EXEC_CODE (value);
24666 if (GET_CODE (value) == PARALLEL)
24667 value = XVECEXP (value, 0, 0);
24668 value = XEXP (value, 0);
24669 if (GET_CODE (op) == COND_EXEC)
24670 op = COND_EXEC_CODE (op);
24671 if (GET_CODE (op) == PARALLEL)
24672 op = XVECEXP (op, 0, 0);
24675 early_op = XEXP (op, 0);
24677 /* This is either an actual independent shift, or a shift applied to
24678 the first operand of another operation. We want the value being
24679 shifted, in either case. */
24680 if (!REG_P (early_op))
24681 early_op = XEXP (early_op, 0);
24683 return !reg_overlap_mentioned_p (value, early_op);
24686 /* Return nonzero if the CONSUMER (a mul or mac op) does not
24687 have an early register mult dependency on the result of
24691 arm_no_early_mul_dep (rtx producer, rtx consumer)
24693 rtx value = PATTERN (producer);
24694 rtx op = PATTERN (consumer);
24696 if (GET_CODE (value) == COND_EXEC)
24697 value = COND_EXEC_CODE (value);
24698 if (GET_CODE (value) == PARALLEL)
24699 value = XVECEXP (value, 0, 0);
24700 value = XEXP (value, 0);
24701 if (GET_CODE (op) == COND_EXEC)
24702 op = COND_EXEC_CODE (op);
24703 if (GET_CODE (op) == PARALLEL)
24704 op = XVECEXP (op, 0, 0);
24707 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
24709 if (GET_CODE (XEXP (op, 0)) == MULT)
24710 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
24712 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
24718 /* We can't rely on the caller doing the proper promotion when
24719 using APCS or ATPCS. */
24722 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
24724 return !TARGET_AAPCS_BASED;
24727 static enum machine_mode
24728 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
24729 enum machine_mode mode,
24730 int *punsignedp ATTRIBUTE_UNUSED,
24731 const_tree fntype ATTRIBUTE_UNUSED,
24732 int for_return ATTRIBUTE_UNUSED)
24734 if (GET_MODE_CLASS (mode) == MODE_INT
24735 && GET_MODE_SIZE (mode) < 4)
24741 /* AAPCS based ABIs use short enums by default. */
24744 arm_default_short_enums (void)
24746 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
24750 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24753 arm_align_anon_bitfield (void)
24755 return TARGET_AAPCS_BASED;
24759 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24762 arm_cxx_guard_type (void)
24764 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
24767 /* Return non-zero iff the consumer (a multiply-accumulate or a
24768 multiple-subtract instruction) has an accumulator dependency on the
24769 result of the producer and no other dependency on that result. It
24770 does not check if the producer is multiply-accumulate instruction. */
24772 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
24777 producer = PATTERN (producer);
24778 consumer = PATTERN (consumer);
24780 if (GET_CODE (producer) == COND_EXEC)
24781 producer = COND_EXEC_CODE (producer);
24782 if (GET_CODE (consumer) == COND_EXEC)
24783 consumer = COND_EXEC_CODE (consumer);
24785 if (GET_CODE (producer) != SET)
24788 result = XEXP (producer, 0);
24790 if (GET_CODE (consumer) != SET)
24793 /* Check that the consumer is of the form
24794 (set (...) (plus (mult ...) (...)))
24796 (set (...) (minus (...) (mult ...))). */
24797 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
24799 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
24802 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
24803 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
24804 acc = XEXP (XEXP (consumer, 1), 1);
24806 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
24808 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
24811 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
24812 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
24813 acc = XEXP (XEXP (consumer, 1), 0);
24818 return (reg_overlap_mentioned_p (result, acc)
24819 && !reg_overlap_mentioned_p (result, op0)
24820 && !reg_overlap_mentioned_p (result, op1));
24823 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24824 has an accumulator dependency on the result of the producer (a
24825 multiplication instruction) and no other dependency on that result. */
24827 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
24829 rtx mul = PATTERN (producer);
24830 rtx mac = PATTERN (consumer);
24832 rtx mac_op0, mac_op1, mac_acc;
24834 if (GET_CODE (mul) == COND_EXEC)
24835 mul = COND_EXEC_CODE (mul);
24836 if (GET_CODE (mac) == COND_EXEC)
24837 mac = COND_EXEC_CODE (mac);
24839 /* Check that mul is of the form (set (...) (mult ...))
24840 and mla is of the form (set (...) (plus (mult ...) (...))). */
24841 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24842 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24843 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24846 mul_result = XEXP (mul, 0);
24847 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24848 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24849 mac_acc = XEXP (XEXP (mac, 1), 1);
24851 return (reg_overlap_mentioned_p (mul_result, mac_acc)
24852 && !reg_overlap_mentioned_p (mul_result, mac_op0)
24853 && !reg_overlap_mentioned_p (mul_result, mac_op1));
24857 /* The EABI says test the least significant bit of a guard variable. */
24860 arm_cxx_guard_mask_bit (void)
24862 return TARGET_AAPCS_BASED;
24866 /* The EABI specifies that all array cookies are 8 bytes long. */
24869 arm_get_cookie_size (tree type)
24873 if (!TARGET_AAPCS_BASED)
24874 return default_cxx_get_cookie_size (type);
24876 size = build_int_cst (sizetype, 8);
24881 /* The EABI says that array cookies should also contain the element size. */
24884 arm_cookie_has_size (void)
24886 return TARGET_AAPCS_BASED;
24890 /* The EABI says constructors and destructors should return a pointer to
24891 the object constructed/destroyed. */
24894 arm_cxx_cdtor_returns_this (void)
24896 return TARGET_AAPCS_BASED;
24899 /* The EABI says that an inline function may never be the key
24903 arm_cxx_key_method_may_be_inline (void)
24905 return !TARGET_AAPCS_BASED;
24909 arm_cxx_determine_class_data_visibility (tree decl)
24911 if (!TARGET_AAPCS_BASED
24912 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24915 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24916 is exported. However, on systems without dynamic vague linkage,
24917 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24918 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24919 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24921 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24922 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24926 arm_cxx_class_data_always_comdat (void)
24928 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24929 vague linkage if the class has no key function. */
24930 return !TARGET_AAPCS_BASED;
24934 /* The EABI says __aeabi_atexit should be used to register static
24938 arm_cxx_use_aeabi_atexit (void)
24940 return TARGET_AAPCS_BASED;
24945 arm_set_return_address (rtx source, rtx scratch)
24947 arm_stack_offsets *offsets;
24948 HOST_WIDE_INT delta;
24950 unsigned long saved_regs;
24952 offsets = arm_get_frame_offsets ();
24953 saved_regs = offsets->saved_regs_mask;
24955 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24956 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24959 if (frame_pointer_needed)
24960 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24963 /* LR will be the first saved register. */
24964 delta = offsets->outgoing_args - (offsets->frame + 4);
24969 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24970 GEN_INT (delta & ~4095)));
24975 addr = stack_pointer_rtx;
24977 addr = plus_constant (Pmode, addr, delta);
24979 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24985 thumb_set_return_address (rtx source, rtx scratch)
24987 arm_stack_offsets *offsets;
24988 HOST_WIDE_INT delta;
24989 HOST_WIDE_INT limit;
24992 unsigned long mask;
24996 offsets = arm_get_frame_offsets ();
24997 mask = offsets->saved_regs_mask;
24998 if (mask & (1 << LR_REGNUM))
25001 /* Find the saved regs. */
25002 if (frame_pointer_needed)
25004 delta = offsets->soft_frame - offsets->saved_args;
25005 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25011 delta = offsets->outgoing_args - offsets->saved_args;
25014 /* Allow for the stack frame. */
25015 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25017 /* The link register is always the first saved register. */
25020 /* Construct the address. */
25021 addr = gen_rtx_REG (SImode, reg);
25024 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25025 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25029 addr = plus_constant (Pmode, addr, delta);
25031 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25034 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25037 /* Implements target hook vector_mode_supported_p. */
25039 arm_vector_mode_supported_p (enum machine_mode mode)
25041 /* Neon also supports V2SImode, etc. listed in the clause below. */
25042 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25043 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
25046 if ((TARGET_NEON || TARGET_IWMMXT)
25047 && ((mode == V2SImode)
25048 || (mode == V4HImode)
25049 || (mode == V8QImode)))
25052 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25053 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25054 || mode == V2HAmode))
25060 /* Implements target hook array_mode_supported_p. */
25063 arm_array_mode_supported_p (enum machine_mode mode,
25064 unsigned HOST_WIDE_INT nelems)
25067 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25068 && (nelems >= 2 && nelems <= 4))
25074 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25075 registers when autovectorizing for Neon, at least until multiple vector
25076 widths are supported properly by the middle-end. */
25078 static enum machine_mode
25079 arm_preferred_simd_mode (enum machine_mode mode)
25085 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25087 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25089 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25091 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25093 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25100 if (TARGET_REALLY_IWMMXT)
25116 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25118 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25119 using r0-r4 for function arguments, r7 for the stack frame and don't have
25120 enough left over to do doubleword arithmetic. For Thumb-2 all the
25121 potentially problematic instructions accept high registers so this is not
25122 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25123 that require many low registers. */
25125 arm_class_likely_spilled_p (reg_class_t rclass)
25127 if ((TARGET_THUMB1 && rclass == LO_REGS)
25128 || rclass == CC_REG)
25134 /* Implements target hook small_register_classes_for_mode_p. */
25136 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
25138 return TARGET_THUMB1;
25141 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25142 ARM insns and therefore guarantee that the shift count is modulo 256.
25143 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25144 guarantee no particular behavior for out-of-range counts. */
25146 static unsigned HOST_WIDE_INT
25147 arm_shift_truncation_mask (enum machine_mode mode)
25149 return mode == SImode ? 255 : 0;
25153 /* Map internal gcc register numbers to DWARF2 register numbers. */
25156 arm_dbx_register_number (unsigned int regno)
25161 if (IS_VFP_REGNUM (regno))
25163 /* See comment in arm_dwarf_register_span. */
25164 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25165 return 64 + regno - FIRST_VFP_REGNUM;
25167 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25170 if (IS_IWMMXT_GR_REGNUM (regno))
25171 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25173 if (IS_IWMMXT_REGNUM (regno))
25174 return 112 + regno - FIRST_IWMMXT_REGNUM;
25176 gcc_unreachable ();
25179 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25180 GCC models tham as 64 32-bit registers, so we need to describe this to
25181 the DWARF generation code. Other registers can use the default. */
25183 arm_dwarf_register_span (rtx rtl)
25190 regno = REGNO (rtl);
25191 if (!IS_VFP_REGNUM (regno))
25194 /* XXX FIXME: The EABI defines two VFP register ranges:
25195 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25197 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25198 corresponding D register. Until GDB supports this, we shall use the
25199 legacy encodings. We also use these encodings for D0-D15 for
25200 compatibility with older debuggers. */
25201 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25204 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
25205 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
25206 regno = (regno - FIRST_VFP_REGNUM) / 2;
25207 for (i = 0; i < nregs; i++)
25208 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
25213 #if ARM_UNWIND_INFO
25214 /* Emit unwind directives for a store-multiple instruction or stack pointer
25215 push during alignment.
25216 These should only ever be generated by the function prologue code, so
25217 expect them to have a particular form. */
25220 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
25223 HOST_WIDE_INT offset;
25224 HOST_WIDE_INT nregs;
25230 e = XVECEXP (p, 0, 0);
25231 if (GET_CODE (e) != SET)
25234 /* First insn will adjust the stack pointer. */
25235 if (GET_CODE (e) != SET
25236 || !REG_P (XEXP (e, 0))
25237 || REGNO (XEXP (e, 0)) != SP_REGNUM
25238 || GET_CODE (XEXP (e, 1)) != PLUS)
25241 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
25242 nregs = XVECLEN (p, 0) - 1;
25244 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
25247 /* The function prologue may also push pc, but not annotate it as it is
25248 never restored. We turn this into a stack pointer adjustment. */
25249 if (nregs * 4 == offset - 4)
25251 fprintf (asm_out_file, "\t.pad #4\n");
25255 fprintf (asm_out_file, "\t.save {");
25257 else if (IS_VFP_REGNUM (reg))
25260 fprintf (asm_out_file, "\t.vsave {");
25263 /* Unknown register type. */
25266 /* If the stack increment doesn't match the size of the saved registers,
25267 something has gone horribly wrong. */
25268 if (offset != nregs * reg_size)
25273 /* The remaining insns will describe the stores. */
25274 for (i = 1; i <= nregs; i++)
25276 /* Expect (set (mem <addr>) (reg)).
25277 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25278 e = XVECEXP (p, 0, i);
25279 if (GET_CODE (e) != SET
25280 || !MEM_P (XEXP (e, 0))
25281 || !REG_P (XEXP (e, 1)))
25284 reg = REGNO (XEXP (e, 1));
25289 fprintf (asm_out_file, ", ");
25290 /* We can't use %r for vfp because we need to use the
25291 double precision register names. */
25292 if (IS_VFP_REGNUM (reg))
25293 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
25295 asm_fprintf (asm_out_file, "%r", reg);
25297 #ifdef ENABLE_CHECKING
25298 /* Check that the addresses are consecutive. */
25299 e = XEXP (XEXP (e, 0), 0);
25300 if (GET_CODE (e) == PLUS)
25302 offset += reg_size;
25303 if (!REG_P (XEXP (e, 0))
25304 || REGNO (XEXP (e, 0)) != SP_REGNUM
25305 || !CONST_INT_P (XEXP (e, 1))
25306 || offset != INTVAL (XEXP (e, 1)))
25311 || REGNO (e) != SP_REGNUM)
25315 fprintf (asm_out_file, "}\n");
25318 /* Emit unwind directives for a SET. */
25321 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
25329 switch (GET_CODE (e0))
25332 /* Pushing a single register. */
25333 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
25334 || !REG_P (XEXP (XEXP (e0, 0), 0))
25335 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
25338 asm_fprintf (asm_out_file, "\t.save ");
25339 if (IS_VFP_REGNUM (REGNO (e1)))
25340 asm_fprintf(asm_out_file, "{d%d}\n",
25341 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
25343 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
25347 if (REGNO (e0) == SP_REGNUM)
25349 /* A stack increment. */
25350 if (GET_CODE (e1) != PLUS
25351 || !REG_P (XEXP (e1, 0))
25352 || REGNO (XEXP (e1, 0)) != SP_REGNUM
25353 || !CONST_INT_P (XEXP (e1, 1)))
25356 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
25357 -INTVAL (XEXP (e1, 1)));
25359 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
25361 HOST_WIDE_INT offset;
25363 if (GET_CODE (e1) == PLUS)
25365 if (!REG_P (XEXP (e1, 0))
25366 || !CONST_INT_P (XEXP (e1, 1)))
25368 reg = REGNO (XEXP (e1, 0));
25369 offset = INTVAL (XEXP (e1, 1));
25370 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
25371 HARD_FRAME_POINTER_REGNUM, reg,
25374 else if (REG_P (e1))
25377 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
25378 HARD_FRAME_POINTER_REGNUM, reg);
25383 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
25385 /* Move from sp to reg. */
25386 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
25388 else if (GET_CODE (e1) == PLUS
25389 && REG_P (XEXP (e1, 0))
25390 && REGNO (XEXP (e1, 0)) == SP_REGNUM
25391 && CONST_INT_P (XEXP (e1, 1)))
25393 /* Set reg to offset from sp. */
25394 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
25395 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
25407 /* Emit unwind directives for the given insn. */
25410 arm_unwind_emit (FILE * asm_out_file, rtx insn)
25413 bool handled_one = false;
25415 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25418 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25419 && (TREE_NOTHROW (current_function_decl)
25420 || crtl->all_throwers_are_sibcalls))
25423 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
25426 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
25428 pat = XEXP (note, 0);
25429 switch (REG_NOTE_KIND (note))
25431 case REG_FRAME_RELATED_EXPR:
25434 case REG_CFA_REGISTER:
25437 pat = PATTERN (insn);
25438 if (GET_CODE (pat) == PARALLEL)
25439 pat = XVECEXP (pat, 0, 0);
25442 /* Only emitted for IS_STACKALIGN re-alignment. */
25447 src = SET_SRC (pat);
25448 dest = SET_DEST (pat);
25450 gcc_assert (src == stack_pointer_rtx);
25451 reg = REGNO (dest);
25452 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
25455 handled_one = true;
25458 case REG_CFA_DEF_CFA:
25459 case REG_CFA_EXPRESSION:
25460 case REG_CFA_ADJUST_CFA:
25461 case REG_CFA_OFFSET:
25462 /* ??? Only handling here what we actually emit. */
25463 gcc_unreachable ();
25471 pat = PATTERN (insn);
25474 switch (GET_CODE (pat))
25477 arm_unwind_emit_set (asm_out_file, pat);
25481 /* Store multiple. */
25482 arm_unwind_emit_sequence (asm_out_file, pat);
25491 /* Output a reference from a function exception table to the type_info
25492 object X. The EABI specifies that the symbol should be relocated by
25493 an R_ARM_TARGET2 relocation. */
25496 arm_output_ttype (rtx x)
25498 fputs ("\t.word\t", asm_out_file);
25499 output_addr_const (asm_out_file, x);
25500 /* Use special relocations for symbol references. */
25501 if (!CONST_INT_P (x))
25502 fputs ("(TARGET2)", asm_out_file);
25503 fputc ('\n', asm_out_file);
25508 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
25511 arm_asm_emit_except_personality (rtx personality)
25513 fputs ("\t.personality\t", asm_out_file);
25514 output_addr_const (asm_out_file, personality);
25515 fputc ('\n', asm_out_file);
25518 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
25521 arm_asm_init_sections (void)
25523 exception_section = get_unnamed_section (0, output_section_asm_op,
25526 #endif /* ARM_UNWIND_INFO */
25528 /* Output unwind directives for the start/end of a function. */
25531 arm_output_fn_unwind (FILE * f, bool prologue)
25533 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25537 fputs ("\t.fnstart\n", f);
25540 /* If this function will never be unwound, then mark it as such.
25541 The came condition is used in arm_unwind_emit to suppress
25542 the frame annotations. */
25543 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25544 && (TREE_NOTHROW (current_function_decl)
25545 || crtl->all_throwers_are_sibcalls))
25546 fputs("\t.cantunwind\n", f);
25548 fputs ("\t.fnend\n", f);
25553 arm_emit_tls_decoration (FILE *fp, rtx x)
25555 enum tls_reloc reloc;
25558 val = XVECEXP (x, 0, 0);
25559 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
25561 output_addr_const (fp, val);
25566 fputs ("(tlsgd)", fp);
25569 fputs ("(tlsldm)", fp);
25572 fputs ("(tlsldo)", fp);
25575 fputs ("(gottpoff)", fp);
25578 fputs ("(tpoff)", fp);
25581 fputs ("(tlsdesc)", fp);
25584 gcc_unreachable ();
25593 fputs (" + (. - ", fp);
25594 output_addr_const (fp, XVECEXP (x, 0, 2));
25595 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
25596 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
25597 output_addr_const (fp, XVECEXP (x, 0, 3));
25607 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
25610 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
25612 gcc_assert (size == 4);
25613 fputs ("\t.word\t", file);
25614 output_addr_const (file, x);
25615 fputs ("(tlsldo)", file);
25618 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
25621 arm_output_addr_const_extra (FILE *fp, rtx x)
25623 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
25624 return arm_emit_tls_decoration (fp, x);
25625 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
25628 int labelno = INTVAL (XVECEXP (x, 0, 0));
25630 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
25631 assemble_name_raw (fp, label);
25635 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
25637 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
25641 output_addr_const (fp, XVECEXP (x, 0, 0));
25645 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
25647 output_addr_const (fp, XVECEXP (x, 0, 0));
25651 output_addr_const (fp, XVECEXP (x, 0, 1));
25655 else if (GET_CODE (x) == CONST_VECTOR)
25656 return arm_emit_vector_const (fp, x);
25661 /* Output assembly for a shift instruction.
25662 SET_FLAGS determines how the instruction modifies the condition codes.
25663 0 - Do not set condition codes.
25664 1 - Set condition codes.
25665 2 - Use smallest instruction. */
25667 arm_output_shift(rtx * operands, int set_flags)
25670 static const char flag_chars[3] = {'?', '.', '!'};
25675 c = flag_chars[set_flags];
25676 if (TARGET_UNIFIED_ASM)
25678 shift = shift_op(operands[3], &val);
25682 operands[2] = GEN_INT(val);
25683 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
25686 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
25689 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
25690 output_asm_insn (pattern, operands);
25694 /* Output assembly for a WMMX immediate shift instruction. */
25696 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
25698 int shift = INTVAL (operands[2]);
25700 enum machine_mode opmode = GET_MODE (operands[0]);
25702 gcc_assert (shift >= 0);
25704 /* If the shift value in the register versions is > 63 (for D qualifier),
25705 31 (for W qualifier) or 15 (for H qualifier). */
25706 if (((opmode == V4HImode) && (shift > 15))
25707 || ((opmode == V2SImode) && (shift > 31))
25708 || ((opmode == DImode) && (shift > 63)))
25712 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25713 output_asm_insn (templ, operands);
25714 if (opmode == DImode)
25716 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
25717 output_asm_insn (templ, operands);
25722 /* The destination register will contain all zeros. */
25723 sprintf (templ, "wzero\t%%0");
25724 output_asm_insn (templ, operands);
25729 if ((opmode == DImode) && (shift > 32))
25731 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25732 output_asm_insn (templ, operands);
25733 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
25734 output_asm_insn (templ, operands);
25738 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
25739 output_asm_insn (templ, operands);
25744 /* Output assembly for a WMMX tinsr instruction. */
25746 arm_output_iwmmxt_tinsr (rtx *operands)
25748 int mask = INTVAL (operands[3]);
25751 int units = mode_nunits[GET_MODE (operands[0])];
25752 gcc_assert ((mask & (mask - 1)) == 0);
25753 for (i = 0; i < units; ++i)
25755 if ((mask & 0x01) == 1)
25761 gcc_assert (i < units);
25763 switch (GET_MODE (operands[0]))
25766 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
25769 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
25772 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
25775 gcc_unreachable ();
25778 output_asm_insn (templ, operands);
25783 /* Output a Thumb-1 casesi dispatch sequence. */
25785 thumb1_output_casesi (rtx *operands)
25787 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
25789 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25791 switch (GET_MODE(diff_vec))
25794 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25795 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25797 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25798 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25800 return "bl\t%___gnu_thumb1_case_si";
25802 gcc_unreachable ();
25806 /* Output a Thumb-2 casesi instruction. */
25808 thumb2_output_casesi (rtx *operands)
25810 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
25812 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25814 output_asm_insn ("cmp\t%0, %1", operands);
25815 output_asm_insn ("bhi\t%l3", operands);
25816 switch (GET_MODE(diff_vec))
25819 return "tbb\t[%|pc, %0]";
25821 return "tbh\t[%|pc, %0, lsl #1]";
25825 output_asm_insn ("adr\t%4, %l2", operands);
25826 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
25827 output_asm_insn ("add\t%4, %4, %5", operands);
25832 output_asm_insn ("adr\t%4, %l2", operands);
25833 return "ldr\t%|pc, [%4, %0, lsl #2]";
25836 gcc_unreachable ();
25840 /* Most ARM cores are single issue, but some newer ones can dual issue.
25841 The scheduler descriptions rely on this being correct. */
25843 arm_issue_rate (void)
25867 /* A table and a function to perform ARM-specific name mangling for
25868 NEON vector types in order to conform to the AAPCS (see "Procedure
25869 Call Standard for the ARM Architecture", Appendix A). To qualify
25870 for emission with the mangled names defined in that document, a
25871 vector type must not only be of the correct mode but also be
25872 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25875 enum machine_mode mode;
25876 const char *element_type_name;
25877 const char *aapcs_name;
25878 } arm_mangle_map_entry;
25880 static arm_mangle_map_entry arm_mangle_map[] = {
25881 /* 64-bit containerized types. */
25882 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25883 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25884 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25885 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25886 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25887 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25888 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25889 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25890 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25891 /* 128-bit containerized types. */
25892 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25893 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25894 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25895 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25896 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25897 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25898 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25899 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25900 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25901 { VOIDmode, NULL, NULL }
25905 arm_mangle_type (const_tree type)
25907 arm_mangle_map_entry *pos = arm_mangle_map;
25909 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25910 has to be managled as if it is in the "std" namespace. */
25911 if (TARGET_AAPCS_BASED
25912 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25913 return "St9__va_list";
25915 /* Half-precision float. */
25916 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25919 if (TREE_CODE (type) != VECTOR_TYPE)
25922 /* Check the mode of the vector type, and the name of the vector
25923 element type, against the table. */
25924 while (pos->mode != VOIDmode)
25926 tree elt_type = TREE_TYPE (type);
25928 if (pos->mode == TYPE_MODE (type)
25929 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25930 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25931 pos->element_type_name))
25932 return pos->aapcs_name;
25937 /* Use the default mangling for unrecognized (possibly user-defined)
25942 /* Order of allocation of core registers for Thumb: this allocation is
25943 written over the corresponding initial entries of the array
25944 initialized with REG_ALLOC_ORDER. We allocate all low registers
25945 first. Saving and restoring a low register is usually cheaper than
25946 using a call-clobbered high register. */
25948 static const int thumb_core_reg_alloc_order[] =
25950 3, 2, 1, 0, 4, 5, 6, 7,
25951 14, 12, 8, 9, 10, 11
25954 /* Adjust register allocation order when compiling for Thumb. */
25957 arm_order_regs_for_local_alloc (void)
25959 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25960 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25962 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25963 sizeof (thumb_core_reg_alloc_order));
25966 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25969 arm_frame_pointer_required (void)
25971 return (cfun->has_nonlocal_label
25972 || SUBTARGET_FRAME_POINTER_REQUIRED
25973 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25976 /* Only thumb1 can't support conditional execution, so return true if
25977 the target is not thumb1. */
25979 arm_have_conditional_execution (void)
25981 return !TARGET_THUMB1;
25984 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25985 static HOST_WIDE_INT
25986 arm_vector_alignment (const_tree type)
25988 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25990 if (TARGET_AAPCS_BASED)
25991 align = MIN (align, 64);
25996 static unsigned int
25997 arm_autovectorize_vector_sizes (void)
25999 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26003 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26005 /* Vectors which aren't in packed structures will not be less aligned than
26006 the natural alignment of their element type, so this is safe. */
26007 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26010 return default_builtin_vector_alignment_reachable (type, is_packed);
26014 arm_builtin_support_vector_misalignment (enum machine_mode mode,
26015 const_tree type, int misalignment,
26018 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26020 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26025 /* If the misalignment is unknown, we should be able to handle the access
26026 so long as it is not to a member of a packed data structure. */
26027 if (misalignment == -1)
26030 /* Return true if the misalignment is a multiple of the natural alignment
26031 of the vector's element type. This is probably always going to be
26032 true in practice, since we've already established that this isn't a
26034 return ((misalignment % align) == 0);
26037 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26042 arm_conditional_register_usage (void)
26046 if (TARGET_THUMB1 && optimize_size)
26048 /* When optimizing for size on Thumb-1, it's better not
26049 to use the HI regs, because of the overhead of
26051 for (regno = FIRST_HI_REGNUM;
26052 regno <= LAST_HI_REGNUM; ++regno)
26053 fixed_regs[regno] = call_used_regs[regno] = 1;
26056 /* The link register can be clobbered by any branch insn,
26057 but we have no way to track that at present, so mark
26058 it as unavailable. */
26060 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26062 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
26064 /* VFPv3 registers are disabled when earlier VFP
26065 versions are selected due to the definition of
26066 LAST_VFP_REGNUM. */
26067 for (regno = FIRST_VFP_REGNUM;
26068 regno <= LAST_VFP_REGNUM; ++ regno)
26070 fixed_regs[regno] = 0;
26071 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26072 || regno >= FIRST_VFP_REGNUM + 32;
26076 if (TARGET_REALLY_IWMMXT)
26078 regno = FIRST_IWMMXT_GR_REGNUM;
26079 /* The 2002/10/09 revision of the XScale ABI has wCG0
26080 and wCG1 as call-preserved registers. The 2002/11/21
26081 revision changed this so that all wCG registers are
26082 scratch registers. */
26083 for (regno = FIRST_IWMMXT_GR_REGNUM;
26084 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26085 fixed_regs[regno] = 0;
26086 /* The XScale ABI has wR0 - wR9 as scratch registers,
26087 the rest as call-preserved registers. */
26088 for (regno = FIRST_IWMMXT_REGNUM;
26089 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26091 fixed_regs[regno] = 0;
26092 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26096 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26098 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26099 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26101 else if (TARGET_APCS_STACK)
26103 fixed_regs[10] = 1;
26104 call_used_regs[10] = 1;
26106 /* -mcaller-super-interworking reserves r11 for calls to
26107 _interwork_r11_call_via_rN(). Making the register global
26108 is an easy way of ensuring that it remains valid for all
26110 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26111 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26113 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26114 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26115 if (TARGET_CALLER_INTERWORKING)
26116 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26118 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26122 arm_preferred_rename_class (reg_class_t rclass)
26124 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26125 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26126 and code size can be reduced. */
26127 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26133 /* Compute the atrribute "length" of insn "*push_multi".
26134 So this function MUST be kept in sync with that insn pattern. */
26136 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26138 int i, regno, hi_reg;
26139 int num_saves = XVECLEN (parallel_op, 0);
26149 regno = REGNO (first_op);
26150 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26151 for (i = 1; i < num_saves && !hi_reg; i++)
26153 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26154 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26162 /* Compute the number of instructions emitted by output_move_double. */
26164 arm_count_output_move_double_insns (rtx *operands)
26168 /* output_move_double may modify the operands array, so call it
26169 here on a copy of the array. */
26170 ops[0] = operands[0];
26171 ops[1] = operands[1];
26172 output_move_double (ops, false, &count);
26177 vfp3_const_double_for_fract_bits (rtx operand)
26179 REAL_VALUE_TYPE r0;
26181 if (!CONST_DOUBLE_P (operand))
26184 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
26185 if (exact_real_inverse (DFmode, &r0))
26187 if (exact_real_truncate (DFmode, &r0))
26189 HOST_WIDE_INT value = real_to_integer (&r0);
26190 value = value & 0xffffffff;
26191 if ((value != 0) && ( (value & (value - 1)) == 0))
26192 return int_log2 (value);
26198 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26201 arm_pre_atomic_barrier (enum memmodel model)
26203 if (need_atomic_barrier_p (model, true))
26204 emit_insn (gen_memory_barrier ());
26208 arm_post_atomic_barrier (enum memmodel model)
26210 if (need_atomic_barrier_p (model, false))
26211 emit_insn (gen_memory_barrier ());
26214 /* Emit the load-exclusive and store-exclusive instructions. */
26217 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
26219 rtx (*gen) (rtx, rtx);
26223 case QImode: gen = gen_arm_load_exclusiveqi; break;
26224 case HImode: gen = gen_arm_load_exclusivehi; break;
26225 case SImode: gen = gen_arm_load_exclusivesi; break;
26226 case DImode: gen = gen_arm_load_exclusivedi; break;
26228 gcc_unreachable ();
26231 emit_insn (gen (rval, mem));
26235 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
26237 rtx (*gen) (rtx, rtx, rtx);
26241 case QImode: gen = gen_arm_store_exclusiveqi; break;
26242 case HImode: gen = gen_arm_store_exclusivehi; break;
26243 case SImode: gen = gen_arm_store_exclusivesi; break;
26244 case DImode: gen = gen_arm_store_exclusivedi; break;
26246 gcc_unreachable ();
26249 emit_insn (gen (bval, rval, mem));
26252 /* Mark the previous jump instruction as unlikely. */
26255 emit_unlikely_jump (rtx insn)
26257 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
26259 insn = emit_jump_insn (insn);
26260 add_reg_note (insn, REG_BR_PROB, very_unlikely);
26263 /* Expand a compare and swap pattern. */
26266 arm_expand_compare_and_swap (rtx operands[])
26268 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
26269 enum machine_mode mode;
26270 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
26272 bval = operands[0];
26273 rval = operands[1];
26275 oldval = operands[3];
26276 newval = operands[4];
26277 is_weak = operands[5];
26278 mod_s = operands[6];
26279 mod_f = operands[7];
26280 mode = GET_MODE (mem);
26286 /* For narrow modes, we're going to perform the comparison in SImode,
26287 so do the zero-extension now. */
26288 rval = gen_reg_rtx (SImode);
26289 oldval = convert_modes (SImode, mode, oldval, true);
26293 /* Force the value into a register if needed. We waited until after
26294 the zero-extension above to do this properly. */
26295 if (!arm_add_operand (oldval, SImode))
26296 oldval = force_reg (SImode, oldval);
26300 if (!cmpdi_operand (oldval, mode))
26301 oldval = force_reg (mode, oldval);
26305 gcc_unreachable ();
26310 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
26311 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
26312 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
26313 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
26315 gcc_unreachable ();
26318 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
26320 if (mode == QImode || mode == HImode)
26321 emit_move_insn (operands[1], gen_lowpart (mode, rval));
26323 /* In all cases, we arrange for success to be signaled by Z set.
26324 This arrangement allows for the boolean result to be used directly
26325 in a subsequent branch, post optimization. */
26326 x = gen_rtx_REG (CCmode, CC_REGNUM);
26327 x = gen_rtx_EQ (SImode, x, const0_rtx);
26328 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
26331 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
26332 another memory store between the load-exclusive and store-exclusive can
26333 reset the monitor from Exclusive to Open state. This means we must wait
26334 until after reload to split the pattern, lest we get a register spill in
26335 the middle of the atomic sequence. */
26338 arm_split_compare_and_swap (rtx operands[])
26340 rtx rval, mem, oldval, newval, scratch;
26341 enum machine_mode mode;
26342 enum memmodel mod_s, mod_f;
26344 rtx label1, label2, x, cond;
26346 rval = operands[0];
26348 oldval = operands[2];
26349 newval = operands[3];
26350 is_weak = (operands[4] != const0_rtx);
26351 mod_s = (enum memmodel) INTVAL (operands[5]);
26352 mod_f = (enum memmodel) INTVAL (operands[6]);
26353 scratch = operands[7];
26354 mode = GET_MODE (mem);
26356 arm_pre_atomic_barrier (mod_s);
26361 label1 = gen_label_rtx ();
26362 emit_label (label1);
26364 label2 = gen_label_rtx ();
26366 arm_emit_load_exclusive (mode, rval, mem);
26368 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
26369 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26370 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26371 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
26372 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26374 arm_emit_store_exclusive (mode, scratch, mem, newval);
26376 /* Weak or strong, we want EQ to be true for success, so that we
26377 match the flags that we got from the compare above. */
26378 cond = gen_rtx_REG (CCmode, CC_REGNUM);
26379 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
26380 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
26384 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26385 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26386 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
26387 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26390 if (mod_f != MEMMODEL_RELAXED)
26391 emit_label (label2);
26393 arm_post_atomic_barrier (mod_s);
26395 if (mod_f == MEMMODEL_RELAXED)
26396 emit_label (label2);
26400 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
26401 rtx value, rtx model_rtx, rtx cond)
26403 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
26404 enum machine_mode mode = GET_MODE (mem);
26405 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
26408 arm_pre_atomic_barrier (model);
26410 label = gen_label_rtx ();
26411 emit_label (label);
26414 new_out = gen_lowpart (wmode, new_out);
26416 old_out = gen_lowpart (wmode, old_out);
26419 value = simplify_gen_subreg (wmode, value, mode, 0);
26421 arm_emit_load_exclusive (mode, old_out, mem);
26430 x = gen_rtx_AND (wmode, old_out, value);
26431 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26432 x = gen_rtx_NOT (wmode, new_out);
26433 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26437 if (CONST_INT_P (value))
26439 value = GEN_INT (-INTVAL (value));
26445 if (mode == DImode)
26447 /* DImode plus/minus need to clobber flags. */
26448 /* The adddi3 and subdi3 patterns are incorrectly written so that
26449 they require matching operands, even when we could easily support
26450 three operands. Thankfully, this can be fixed up post-splitting,
26451 as the individual add+adc patterns do accept three operands and
26452 post-reload cprop can make these moves go away. */
26453 emit_move_insn (new_out, old_out);
26455 x = gen_adddi3 (new_out, new_out, value);
26457 x = gen_subdi3 (new_out, new_out, value);
26464 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
26465 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26469 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
26471 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26472 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
26474 arm_post_atomic_barrier (model);
26477 #define MAX_VECT_LEN 16
26479 struct expand_vec_perm_d
26481 rtx target, op0, op1;
26482 unsigned char perm[MAX_VECT_LEN];
26483 enum machine_mode vmode;
26484 unsigned char nelt;
26489 /* Generate a variable permutation. */
26492 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
26494 enum machine_mode vmode = GET_MODE (target);
26495 bool one_vector_p = rtx_equal_p (op0, op1);
26497 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
26498 gcc_checking_assert (GET_MODE (op0) == vmode);
26499 gcc_checking_assert (GET_MODE (op1) == vmode);
26500 gcc_checking_assert (GET_MODE (sel) == vmode);
26501 gcc_checking_assert (TARGET_NEON);
26505 if (vmode == V8QImode)
26506 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
26508 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
26514 if (vmode == V8QImode)
26516 pair = gen_reg_rtx (V16QImode);
26517 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
26518 pair = gen_lowpart (TImode, pair);
26519 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
26523 pair = gen_reg_rtx (OImode);
26524 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
26525 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
26531 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
26533 enum machine_mode vmode = GET_MODE (target);
26534 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
26535 bool one_vector_p = rtx_equal_p (op0, op1);
26536 rtx rmask[MAX_VECT_LEN], mask;
26538 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26539 numbering of elements for big-endian, we must reverse the order. */
26540 gcc_checking_assert (!BYTES_BIG_ENDIAN);
26542 /* The VTBL instruction does not use a modulo index, so we must take care
26543 of that ourselves. */
26544 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
26545 for (i = 0; i < nelt; ++i)
26547 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
26548 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
26550 arm_expand_vec_perm_1 (target, op0, op1, sel);
26553 /* Generate or test for an insn that supports a constant permutation. */
26555 /* Recognize patterns for the VUZP insns. */
26558 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
26560 unsigned int i, odd, mask, nelt = d->nelt;
26561 rtx out0, out1, in0, in1, x;
26562 rtx (*gen)(rtx, rtx, rtx, rtx);
26564 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26567 /* Note that these are little-endian tests. Adjust for big-endian later. */
26568 if (d->perm[0] == 0)
26570 else if (d->perm[0] == 1)
26574 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26576 for (i = 0; i < nelt; i++)
26578 unsigned elt = (i * 2 + odd) & mask;
26579 if (d->perm[i] != elt)
26589 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
26590 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
26591 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
26592 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
26593 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
26594 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
26595 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
26596 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
26598 gcc_unreachable ();
26603 if (BYTES_BIG_ENDIAN)
26605 x = in0, in0 = in1, in1 = x;
26610 out1 = gen_reg_rtx (d->vmode);
26612 x = out0, out0 = out1, out1 = x;
26614 emit_insn (gen (out0, in0, in1, out1));
26618 /* Recognize patterns for the VZIP insns. */
26621 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
26623 unsigned int i, high, mask, nelt = d->nelt;
26624 rtx out0, out1, in0, in1, x;
26625 rtx (*gen)(rtx, rtx, rtx, rtx);
26627 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26630 /* Note that these are little-endian tests. Adjust for big-endian later. */
26632 if (d->perm[0] == high)
26634 else if (d->perm[0] == 0)
26638 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26640 for (i = 0; i < nelt / 2; i++)
26642 unsigned elt = (i + high) & mask;
26643 if (d->perm[i * 2] != elt)
26645 elt = (elt + nelt) & mask;
26646 if (d->perm[i * 2 + 1] != elt)
26656 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
26657 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
26658 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
26659 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
26660 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
26661 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
26662 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
26663 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
26665 gcc_unreachable ();
26670 if (BYTES_BIG_ENDIAN)
26672 x = in0, in0 = in1, in1 = x;
26677 out1 = gen_reg_rtx (d->vmode);
26679 x = out0, out0 = out1, out1 = x;
26681 emit_insn (gen (out0, in0, in1, out1));
26685 /* Recognize patterns for the VREV insns. */
26688 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
26690 unsigned int i, j, diff, nelt = d->nelt;
26691 rtx (*gen)(rtx, rtx, rtx);
26693 if (!d->one_vector_p)
26702 case V16QImode: gen = gen_neon_vrev64v16qi; break;
26703 case V8QImode: gen = gen_neon_vrev64v8qi; break;
26711 case V16QImode: gen = gen_neon_vrev32v16qi; break;
26712 case V8QImode: gen = gen_neon_vrev32v8qi; break;
26713 case V8HImode: gen = gen_neon_vrev64v8hi; break;
26714 case V4HImode: gen = gen_neon_vrev64v4hi; break;
26722 case V16QImode: gen = gen_neon_vrev16v16qi; break;
26723 case V8QImode: gen = gen_neon_vrev16v8qi; break;
26724 case V8HImode: gen = gen_neon_vrev32v8hi; break;
26725 case V4HImode: gen = gen_neon_vrev32v4hi; break;
26726 case V4SImode: gen = gen_neon_vrev64v4si; break;
26727 case V2SImode: gen = gen_neon_vrev64v2si; break;
26728 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
26729 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
26738 for (i = 0; i < nelt ; i += diff + 1)
26739 for (j = 0; j <= diff; j += 1)
26741 /* This is guaranteed to be true as the value of diff
26742 is 7, 3, 1 and we should have enough elements in the
26743 queue to generate this. Getting a vector mask with a
26744 value of diff other than these values implies that
26745 something is wrong by the time we get here. */
26746 gcc_assert (i + j < nelt);
26747 if (d->perm[i + j] != i + diff - j)
26755 /* ??? The third operand is an artifact of the builtin infrastructure
26756 and is ignored by the actual instruction. */
26757 emit_insn (gen (d->target, d->op0, const0_rtx));
26761 /* Recognize patterns for the VTRN insns. */
26764 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
26766 unsigned int i, odd, mask, nelt = d->nelt;
26767 rtx out0, out1, in0, in1, x;
26768 rtx (*gen)(rtx, rtx, rtx, rtx);
26770 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26773 /* Note that these are little-endian tests. Adjust for big-endian later. */
26774 if (d->perm[0] == 0)
26776 else if (d->perm[0] == 1)
26780 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26782 for (i = 0; i < nelt; i += 2)
26784 if (d->perm[i] != i + odd)
26786 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
26796 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
26797 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
26798 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
26799 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
26800 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
26801 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
26802 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
26803 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
26805 gcc_unreachable ();
26810 if (BYTES_BIG_ENDIAN)
26812 x = in0, in0 = in1, in1 = x;
26817 out1 = gen_reg_rtx (d->vmode);
26819 x = out0, out0 = out1, out1 = x;
26821 emit_insn (gen (out0, in0, in1, out1));
26825 /* Recognize patterns for the VEXT insns. */
26828 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26830 unsigned int i, nelt = d->nelt;
26831 rtx (*gen) (rtx, rtx, rtx, rtx);
26834 unsigned int location;
26836 unsigned int next = d->perm[0] + 1;
26838 /* TODO: Handle GCC's numbering of elements for big-endian. */
26839 if (BYTES_BIG_ENDIAN)
26842 /* Check if the extracted indexes are increasing by one. */
26843 for (i = 1; i < nelt; next++, i++)
26845 /* If we hit the most significant element of the 2nd vector in
26846 the previous iteration, no need to test further. */
26847 if (next == 2 * nelt)
26850 /* If we are operating on only one vector: it could be a
26851 rotation. If there are only two elements of size < 64, let
26852 arm_evpc_neon_vrev catch it. */
26853 if (d->one_vector_p && (next == nelt))
26855 if ((nelt == 2) && (d->vmode != V2DImode))
26861 if (d->perm[i] != next)
26865 location = d->perm[0];
26869 case V16QImode: gen = gen_neon_vextv16qi; break;
26870 case V8QImode: gen = gen_neon_vextv8qi; break;
26871 case V4HImode: gen = gen_neon_vextv4hi; break;
26872 case V8HImode: gen = gen_neon_vextv8hi; break;
26873 case V2SImode: gen = gen_neon_vextv2si; break;
26874 case V4SImode: gen = gen_neon_vextv4si; break;
26875 case V2SFmode: gen = gen_neon_vextv2sf; break;
26876 case V4SFmode: gen = gen_neon_vextv4sf; break;
26877 case V2DImode: gen = gen_neon_vextv2di; break;
26886 offset = GEN_INT (location);
26887 emit_insn (gen (d->target, d->op0, d->op1, offset));
26891 /* The NEON VTBL instruction is a fully variable permuation that's even
26892 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26893 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26894 can do slightly better by expanding this as a constant where we don't
26895 have to apply a mask. */
26898 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26900 rtx rperm[MAX_VECT_LEN], sel;
26901 enum machine_mode vmode = d->vmode;
26902 unsigned int i, nelt = d->nelt;
26904 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26905 numbering of elements for big-endian, we must reverse the order. */
26906 if (BYTES_BIG_ENDIAN)
26912 /* Generic code will try constant permutation twice. Once with the
26913 original mode and again with the elements lowered to QImode.
26914 So wait and don't do the selector expansion ourselves. */
26915 if (vmode != V8QImode && vmode != V16QImode)
26918 for (i = 0; i < nelt; ++i)
26919 rperm[i] = GEN_INT (d->perm[i]);
26920 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26921 sel = force_reg (vmode, sel);
26923 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26928 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26930 /* Check if the input mask matches vext before reordering the
26933 if (arm_evpc_neon_vext (d))
26936 /* The pattern matching functions above are written to look for a small
26937 number to begin the sequence (0, 1, N/2). If we begin with an index
26938 from the second operand, we can swap the operands. */
26939 if (d->perm[0] >= d->nelt)
26941 unsigned i, nelt = d->nelt;
26944 for (i = 0; i < nelt; ++i)
26945 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26954 if (arm_evpc_neon_vuzp (d))
26956 if (arm_evpc_neon_vzip (d))
26958 if (arm_evpc_neon_vrev (d))
26960 if (arm_evpc_neon_vtrn (d))
26962 return arm_evpc_neon_vtbl (d);
26967 /* Expand a vec_perm_const pattern. */
26970 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26972 struct expand_vec_perm_d d;
26973 int i, nelt, which;
26979 d.vmode = GET_MODE (target);
26980 gcc_assert (VECTOR_MODE_P (d.vmode));
26981 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26982 d.testing_p = false;
26984 for (i = which = 0; i < nelt; ++i)
26986 rtx e = XVECEXP (sel, 0, i);
26987 int ei = INTVAL (e) & (2 * nelt - 1);
26988 which |= (ei < nelt ? 1 : 2);
26998 d.one_vector_p = false;
26999 if (!rtx_equal_p (op0, op1))
27002 /* The elements of PERM do not suggest that only the first operand
27003 is used, but both operands are identical. Allow easier matching
27004 of the permutation by folding the permutation into the single
27008 for (i = 0; i < nelt; ++i)
27009 d.perm[i] &= nelt - 1;
27011 d.one_vector_p = true;
27016 d.one_vector_p = true;
27020 return arm_expand_vec_perm_const_1 (&d);
27023 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27026 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
27027 const unsigned char *sel)
27029 struct expand_vec_perm_d d;
27030 unsigned int i, nelt, which;
27034 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27035 d.testing_p = true;
27036 memcpy (d.perm, sel, nelt);
27038 /* Categorize the set of elements in the selector. */
27039 for (i = which = 0; i < nelt; ++i)
27041 unsigned char e = d.perm[i];
27042 gcc_assert (e < 2 * nelt);
27043 which |= (e < nelt ? 1 : 2);
27046 /* For all elements from second vector, fold the elements to first. */
27048 for (i = 0; i < nelt; ++i)
27051 /* Check whether the mask can be applied to the vector type. */
27052 d.one_vector_p = (which != 3);
27054 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27055 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27056 if (!d.one_vector_p)
27057 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27060 ret = arm_expand_vec_perm_const_1 (&d);
27067 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
27069 /* If we are soft float and we do not have ldrd
27070 then all auto increment forms are ok. */
27071 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27076 /* Post increment and Pre Decrement are supported for all
27077 instruction forms except for vector forms. */
27080 if (VECTOR_MODE_P (mode))
27082 if (code != ARM_PRE_DEC)
27092 /* Without LDRD and mode size greater than
27093 word size, there is no point in auto-incrementing
27094 because ldm and stm will not have these forms. */
27095 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
27098 /* Vector and floating point modes do not support
27099 these auto increment forms. */
27100 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
27113 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27114 on ARM, since we know that shifts by negative amounts are no-ops.
27115 Additionally, the default expansion code is not available or suitable
27116 for post-reload insn splits (this can occur when the register allocator
27117 chooses not to do a shift in NEON).
27119 This function is used in both initial expand and post-reload splits, and
27120 handles all kinds of 64-bit shifts.
27122 Input requirements:
27123 - It is safe for the input and output to be the same register, but
27124 early-clobber rules apply for the shift amount and scratch registers.
27125 - Shift by register requires both scratch registers. In all other cases
27126 the scratch registers may be NULL.
27127 - Ashiftrt by a register also clobbers the CC register. */
27129 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
27130 rtx amount, rtx scratch1, rtx scratch2)
27132 rtx out_high = gen_highpart (SImode, out);
27133 rtx out_low = gen_lowpart (SImode, out);
27134 rtx in_high = gen_highpart (SImode, in);
27135 rtx in_low = gen_lowpart (SImode, in);
27138 in = the register pair containing the input value.
27139 out = the destination register pair.
27140 up = the high- or low-part of each pair.
27141 down = the opposite part to "up".
27142 In a shift, we can consider bits to shift from "up"-stream to
27143 "down"-stream, so in a left-shift "up" is the low-part and "down"
27144 is the high-part of each register pair. */
27146 rtx out_up = code == ASHIFT ? out_low : out_high;
27147 rtx out_down = code == ASHIFT ? out_high : out_low;
27148 rtx in_up = code == ASHIFT ? in_low : in_high;
27149 rtx in_down = code == ASHIFT ? in_high : in_low;
27151 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
27153 && (REG_P (out) || GET_CODE (out) == SUBREG)
27154 && GET_MODE (out) == DImode);
27156 && (REG_P (in) || GET_CODE (in) == SUBREG)
27157 && GET_MODE (in) == DImode);
27159 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
27160 && GET_MODE (amount) == SImode)
27161 || CONST_INT_P (amount)));
27162 gcc_assert (scratch1 == NULL
27163 || (GET_CODE (scratch1) == SCRATCH)
27164 || (GET_MODE (scratch1) == SImode
27165 && REG_P (scratch1)));
27166 gcc_assert (scratch2 == NULL
27167 || (GET_CODE (scratch2) == SCRATCH)
27168 || (GET_MODE (scratch2) == SImode
27169 && REG_P (scratch2)));
27170 gcc_assert (!REG_P (out) || !REG_P (amount)
27171 || !HARD_REGISTER_P (out)
27172 || (REGNO (out) != REGNO (amount)
27173 && REGNO (out) + 1 != REGNO (amount)));
27175 /* Macros to make following code more readable. */
27176 #define SUB_32(DEST,SRC) \
27177 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
27178 #define RSB_32(DEST,SRC) \
27179 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
27180 #define SUB_S_32(DEST,SRC) \
27181 gen_addsi3_compare0 ((DEST), (SRC), \
27183 #define SET(DEST,SRC) \
27184 gen_rtx_SET (SImode, (DEST), (SRC))
27185 #define SHIFT(CODE,SRC,AMOUNT) \
27186 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
27187 #define LSHIFT(CODE,SRC,AMOUNT) \
27188 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
27189 SImode, (SRC), (AMOUNT))
27190 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
27191 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
27192 SImode, (SRC), (AMOUNT))
27194 gen_rtx_IOR (SImode, (A), (B))
27195 #define BRANCH(COND,LABEL) \
27196 gen_arm_cond_branch ((LABEL), \
27197 gen_rtx_ ## COND (CCmode, cc_reg, \
27201 /* Shifts by register and shifts by constant are handled separately. */
27202 if (CONST_INT_P (amount))
27204 /* We have a shift-by-constant. */
27206 /* First, handle out-of-range shift amounts.
27207 In both cases we try to match the result an ARM instruction in a
27208 shift-by-register would give. This helps reduce execution
27209 differences between optimization levels, but it won't stop other
27210 parts of the compiler doing different things. This is "undefined
27211 behaviour, in any case. */
27212 if (INTVAL (amount) <= 0)
27213 emit_insn (gen_movdi (out, in));
27214 else if (INTVAL (amount) >= 64)
27216 if (code == ASHIFTRT)
27218 rtx const31_rtx = GEN_INT (31);
27219 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
27220 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
27223 emit_insn (gen_movdi (out, const0_rtx));
27226 /* Now handle valid shifts. */
27227 else if (INTVAL (amount) < 32)
27229 /* Shifts by a constant less than 32. */
27230 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
27232 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27233 emit_insn (SET (out_down,
27234 ORR (REV_LSHIFT (code, in_up, reverse_amount),
27236 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27240 /* Shifts by a constant greater than 31. */
27241 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
27243 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
27244 if (code == ASHIFTRT)
27245 emit_insn (gen_ashrsi3 (out_up, in_up,
27248 emit_insn (SET (out_up, const0_rtx));
27253 /* We have a shift-by-register. */
27254 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
27256 /* This alternative requires the scratch registers. */
27257 gcc_assert (scratch1 && REG_P (scratch1));
27258 gcc_assert (scratch2 && REG_P (scratch2));
27260 /* We will need the values "amount-32" and "32-amount" later.
27261 Swapping them around now allows the later code to be more general. */
27265 emit_insn (SUB_32 (scratch1, amount));
27266 emit_insn (RSB_32 (scratch2, amount));
27269 emit_insn (RSB_32 (scratch1, amount));
27270 /* Also set CC = amount > 32. */
27271 emit_insn (SUB_S_32 (scratch2, amount));
27274 emit_insn (RSB_32 (scratch1, amount));
27275 emit_insn (SUB_32 (scratch2, amount));
27278 gcc_unreachable ();
27281 /* Emit code like this:
27284 out_down = in_down << amount;
27285 out_down = (in_up << (amount - 32)) | out_down;
27286 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
27287 out_up = in_up << amount;
27290 out_down = in_down >> amount;
27291 out_down = (in_up << (32 - amount)) | out_down;
27293 out_down = ((signed)in_up >> (amount - 32)) | out_down;
27294 out_up = in_up << amount;
27297 out_down = in_down >> amount;
27298 out_down = (in_up << (32 - amount)) | out_down;
27300 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
27301 out_up = in_up << amount;
27303 The ARM and Thumb2 variants are the same but implemented slightly
27304 differently. If this were only called during expand we could just
27305 use the Thumb2 case and let combine do the right thing, but this
27306 can also be called from post-reload splitters. */
27308 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27310 if (!TARGET_THUMB2)
27312 /* Emit code for ARM mode. */
27313 emit_insn (SET (out_down,
27314 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
27315 if (code == ASHIFTRT)
27317 rtx done_label = gen_label_rtx ();
27318 emit_jump_insn (BRANCH (LT, done_label));
27319 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
27321 emit_label (done_label);
27324 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
27329 /* Emit code for Thumb2 mode.
27330 Thumb2 can't do shift and or in one insn. */
27331 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
27332 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
27334 if (code == ASHIFTRT)
27336 rtx done_label = gen_label_rtx ();
27337 emit_jump_insn (BRANCH (LT, done_label));
27338 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
27339 emit_insn (SET (out_down, ORR (out_down, scratch2)));
27340 emit_label (done_label);
27344 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
27345 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
27349 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27364 /* Returns true if a valid comparison operation and makes
27365 the operands in a form that is valid. */
27367 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
27369 enum rtx_code code = GET_CODE (*comparison);
27371 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
27372 ? GET_MODE (*op2) : GET_MODE (*op1);
27374 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
27376 if (code == UNEQ || code == LTGT)
27379 code_int = (int)code;
27380 arm_canonicalize_comparison (&code_int, op1, op2, 0);
27381 PUT_CODE (*comparison, (enum rtx_code)code_int);
27386 if (!arm_add_operand (*op1, mode))
27387 *op1 = force_reg (mode, *op1);
27388 if (!arm_add_operand (*op2, mode))
27389 *op2 = force_reg (mode, *op2);
27393 if (!cmpdi_operand (*op1, mode))
27394 *op1 = force_reg (mode, *op1);
27395 if (!cmpdi_operand (*op2, mode))
27396 *op2 = force_reg (mode, *op2);
27401 if (!arm_float_compare_operand (*op1, mode))
27402 *op1 = force_reg (mode, *op1);
27403 if (!arm_float_compare_operand (*op2, mode))
27404 *op2 = force_reg (mode, *op2);
27414 #include "gt-arm.h"