1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
41 #include "diagnostic-core.h"
48 #include "target-def.h"
50 #include "langhooks.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 void (*arm_lang_output_object_attributes_hook)(void);
69 /* Forward function declarations. */
70 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
71 static int arm_compute_static_chain_stack_bytes (void);
72 static arm_stack_offsets *arm_get_frame_offsets (void);
73 static void arm_add_gc_roots (void);
74 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
75 HOST_WIDE_INT, rtx, rtx, int, int);
76 static unsigned bit_count (unsigned long);
77 static int arm_address_register_rtx_p (rtx, int);
78 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
79 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
80 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
81 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
82 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
83 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
84 inline static int thumb1_index_register_rtx_p (rtx, int);
85 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
86 static int thumb_far_jump_used_p (void);
87 static bool thumb_force_lr_save (void);
88 static unsigned arm_size_return_regs (void);
89 static bool arm_assemble_integer (rtx, unsigned int, int);
90 static void arm_print_operand (FILE *, rtx, int);
91 static void arm_print_operand_address (FILE *, rtx);
92 static bool arm_print_operand_punct_valid_p (unsigned char code);
93 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
94 static arm_cc get_arm_condition_code (rtx);
95 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
96 static rtx is_jump_table (rtx);
97 static const char *output_multi_immediate (rtx *, const char *, const char *,
99 static const char *shift_op (rtx, HOST_WIDE_INT *);
100 static struct machine_function *arm_init_machine_status (void);
101 static void thumb_exit (FILE *, int);
102 static rtx is_jump_table (rtx);
103 static HOST_WIDE_INT get_jump_table_size (rtx);
104 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_forward_ref (Mfix *);
106 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_backward_ref (Mfix *);
108 static void assign_minipool_offsets (Mfix *);
109 static void arm_print_value (FILE *, rtx);
110 static void dump_minipool (rtx);
111 static int arm_barrier_cost (rtx);
112 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
113 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
114 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
116 static void arm_reorg (void);
117 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
118 static unsigned long arm_compute_save_reg0_reg12_mask (void);
119 static unsigned long arm_compute_save_reg_mask (void);
120 static unsigned long arm_isr_value (tree);
121 static unsigned long arm_compute_func_type (void);
122 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
125 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
126 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
128 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
129 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
134 static int optimal_immediate_sequence (enum rtx_code code,
135 unsigned HOST_WIDE_INT val,
136 struct four_ints *return_sequence);
137 static int optimal_immediate_sequence_1 (enum rtx_code code,
138 unsigned HOST_WIDE_INT val,
139 struct four_ints *return_sequence,
141 static int arm_get_strip_length (int);
142 static bool arm_function_ok_for_sibcall (tree, tree);
143 static enum machine_mode arm_promote_function_mode (const_tree,
144 enum machine_mode, int *,
146 static bool arm_return_in_memory (const_tree, const_tree);
147 static rtx arm_function_value (const_tree, const_tree, bool);
148 static rtx arm_libcall_value_1 (enum machine_mode);
149 static rtx arm_libcall_value (enum machine_mode, const_rtx);
150 static bool arm_function_value_regno_p (const unsigned int);
151 static void arm_internal_label (FILE *, const char *, unsigned long);
152 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 static bool arm_have_conditional_execution (void);
155 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
156 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
157 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
158 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
159 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
164 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
165 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
166 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
167 static void arm_init_builtins (void);
168 static void arm_init_iwmmxt_builtins (void);
169 static rtx safe_vector_operand (rtx, enum machine_mode);
170 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
171 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
172 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
173 static tree arm_builtin_decl (unsigned, bool);
174 static void emit_constant_insn (rtx cond, rtx pattern);
175 static rtx emit_set_insn (rtx, rtx);
176 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
183 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 static rtx aapcs_libcall_value (enum machine_mode);
186 static int aapcs_select_return_coproc (const_tree, const_tree);
188 #ifdef OBJECT_FORMAT_ELF
189 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
190 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
193 static void arm_encode_section_info (tree, rtx, int);
196 static void arm_file_end (void);
197 static void arm_file_start (void);
199 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 static bool arm_pass_by_reference (cumulative_args_t,
202 enum machine_mode, const_tree, bool);
203 static bool arm_promote_prototypes (const_tree);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree);
207 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
208 static bool arm_return_in_memory (const_tree, const_tree);
210 static void arm_unwind_emit (FILE *, rtx);
211 static bool arm_output_ttype (rtx);
212 static void arm_asm_emit_except_personality (rtx);
213 static void arm_asm_init_sections (void);
215 static rtx arm_dwarf_register_span (rtx);
217 static tree arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree arm_get_cookie_size (tree);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree, rtx);
229 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
230 static void arm_option_override (void);
231 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
232 static bool arm_cannot_copy_insn_p (rtx);
233 static bool arm_tls_symbol_p (rtx x);
234 static int arm_issue_rate (void);
235 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
236 static bool arm_output_addr_const_extra (FILE *, rtx);
237 static bool arm_allocate_stack_slots_for_args (void);
238 static bool arm_warn_func_return (tree);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
258 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
259 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
263 static void arm_conditional_register_usage (void);
264 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
265 static unsigned int arm_autovectorize_vector_sizes (void);
266 static int arm_default_branch_cost (bool, bool);
267 static int arm_cortex_a5_branch_cost (bool, bool);
269 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
270 const unsigned char *sel);
272 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
274 int misalign ATTRIBUTE_UNUSED);
275 static unsigned arm_add_stmt_cost (void *data, int count,
276 enum vect_cost_for_stmt kind,
277 struct _stmt_vec_info *stmt_info,
279 enum vect_cost_model_location where);
281 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
282 bool op0_preserve_value);
284 /* Table of machine attributes. */
285 static const struct attribute_spec arm_attribute_table[] =
287 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
288 affects_type_identity } */
289 /* Function calls made to this symbol must be done indirectly, because
290 it may lie outside of the 26 bit addressing range of a normal function
292 { "long_call", 0, 0, false, true, true, NULL, false },
293 /* Whereas these functions are always known to reside within the 26 bit
295 { "short_call", 0, 0, false, true, true, NULL, false },
296 /* Specify the procedure call conventions for a function. */
297 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
299 /* Interrupt Service Routines have special prologue and epilogue requirements. */
300 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
302 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
304 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
307 /* ARM/PE has three new attributes:
309 dllexport - for exporting a function/variable that will live in a dll
310 dllimport - for importing a function/variable from a dll
312 Microsoft allows multiple declspecs in one __declspec, separating
313 them with spaces. We do NOT support this. Instead, use __declspec
316 { "dllimport", 0, 0, true, false, false, NULL, false },
317 { "dllexport", 0, 0, true, false, false, NULL, false },
318 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
320 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
322 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
323 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
326 { NULL, 0, 0, false, false, false, NULL, false }
329 /* Initialize the GCC target structure. */
330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 #undef TARGET_MERGE_DECL_ATTRIBUTES
332 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
338 #undef TARGET_ATTRIBUTE_TABLE
339 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
341 #undef TARGET_ASM_FILE_START
342 #define TARGET_ASM_FILE_START arm_file_start
343 #undef TARGET_ASM_FILE_END
344 #define TARGET_ASM_FILE_END arm_file_end
346 #undef TARGET_ASM_ALIGNED_SI_OP
347 #define TARGET_ASM_ALIGNED_SI_OP NULL
348 #undef TARGET_ASM_INTEGER
349 #define TARGET_ASM_INTEGER arm_assemble_integer
351 #undef TARGET_PRINT_OPERAND
352 #define TARGET_PRINT_OPERAND arm_print_operand
353 #undef TARGET_PRINT_OPERAND_ADDRESS
354 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
355 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
356 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
358 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
359 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
361 #undef TARGET_ASM_FUNCTION_PROLOGUE
362 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
364 #undef TARGET_ASM_FUNCTION_EPILOGUE
365 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE arm_option_override
370 #undef TARGET_COMP_TYPE_ATTRIBUTES
371 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
373 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
374 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
379 #undef TARGET_SCHED_REORDER
380 #define TARGET_SCHED_REORDER arm_sched_reorder
382 #undef TARGET_REGISTER_MOVE_COST
383 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
385 #undef TARGET_MEMORY_MOVE_COST
386 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
388 #undef TARGET_ENCODE_SECTION_INFO
390 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
392 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
395 #undef TARGET_STRIP_NAME_ENCODING
396 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
398 #undef TARGET_ASM_INTERNAL_LABEL
399 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
404 #undef TARGET_FUNCTION_VALUE
405 #define TARGET_FUNCTION_VALUE arm_function_value
407 #undef TARGET_LIBCALL_VALUE
408 #define TARGET_LIBCALL_VALUE arm_libcall_value
410 #undef TARGET_FUNCTION_VALUE_REGNO_P
411 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
413 #undef TARGET_ASM_OUTPUT_MI_THUNK
414 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
415 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
416 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
418 #undef TARGET_RTX_COSTS
419 #define TARGET_RTX_COSTS arm_rtx_costs
420 #undef TARGET_ADDRESS_COST
421 #define TARGET_ADDRESS_COST arm_address_cost
423 #undef TARGET_SHIFT_TRUNCATION_MASK
424 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
425 #undef TARGET_VECTOR_MODE_SUPPORTED_P
426 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
427 #undef TARGET_ARRAY_MODE_SUPPORTED_P
428 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
429 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
430 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
431 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
432 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
433 arm_autovectorize_vector_sizes
435 #undef TARGET_MACHINE_DEPENDENT_REORG
436 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
438 #undef TARGET_INIT_BUILTINS
439 #define TARGET_INIT_BUILTINS arm_init_builtins
440 #undef TARGET_EXPAND_BUILTIN
441 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
442 #undef TARGET_BUILTIN_DECL
443 #define TARGET_BUILTIN_DECL arm_builtin_decl
445 #undef TARGET_INIT_LIBFUNCS
446 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
448 #undef TARGET_PROMOTE_FUNCTION_MODE
449 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
450 #undef TARGET_PROMOTE_PROTOTYPES
451 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
452 #undef TARGET_PASS_BY_REFERENCE
453 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
454 #undef TARGET_ARG_PARTIAL_BYTES
455 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
456 #undef TARGET_FUNCTION_ARG
457 #define TARGET_FUNCTION_ARG arm_function_arg
458 #undef TARGET_FUNCTION_ARG_ADVANCE
459 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
460 #undef TARGET_FUNCTION_ARG_BOUNDARY
461 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
463 #undef TARGET_SETUP_INCOMING_VARARGS
464 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
466 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
467 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
469 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
470 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
471 #undef TARGET_TRAMPOLINE_INIT
472 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
473 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
474 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
476 #undef TARGET_WARN_FUNC_RETURN
477 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
479 #undef TARGET_DEFAULT_SHORT_ENUMS
480 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
482 #undef TARGET_ALIGN_ANON_BITFIELD
483 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
485 #undef TARGET_NARROW_VOLATILE_BITFIELD
486 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
488 #undef TARGET_CXX_GUARD_TYPE
489 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
491 #undef TARGET_CXX_GUARD_MASK_BIT
492 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
494 #undef TARGET_CXX_GET_COOKIE_SIZE
495 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
497 #undef TARGET_CXX_COOKIE_HAS_SIZE
498 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
500 #undef TARGET_CXX_CDTOR_RETURNS_THIS
501 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
503 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
504 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
506 #undef TARGET_CXX_USE_AEABI_ATEXIT
507 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
509 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
510 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
511 arm_cxx_determine_class_data_visibility
513 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
514 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
516 #undef TARGET_RETURN_IN_MSB
517 #define TARGET_RETURN_IN_MSB arm_return_in_msb
519 #undef TARGET_RETURN_IN_MEMORY
520 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
522 #undef TARGET_MUST_PASS_IN_STACK
523 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
526 #undef TARGET_ASM_UNWIND_EMIT
527 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
529 /* EABI unwinding tables use a different format for the typeinfo tables. */
530 #undef TARGET_ASM_TTYPE
531 #define TARGET_ASM_TTYPE arm_output_ttype
533 #undef TARGET_ARM_EABI_UNWINDER
534 #define TARGET_ARM_EABI_UNWINDER true
536 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
537 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
539 #undef TARGET_ASM_INIT_SECTIONS
540 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
541 #endif /* ARM_UNWIND_INFO */
543 #undef TARGET_DWARF_REGISTER_SPAN
544 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
546 #undef TARGET_CANNOT_COPY_INSN_P
547 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
550 #undef TARGET_HAVE_TLS
551 #define TARGET_HAVE_TLS true
554 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
555 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
557 #undef TARGET_LEGITIMATE_CONSTANT_P
558 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
560 #undef TARGET_CANNOT_FORCE_CONST_MEM
561 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
563 #undef TARGET_MAX_ANCHOR_OFFSET
564 #define TARGET_MAX_ANCHOR_OFFSET 4095
566 /* The minimum is set such that the total size of the block
567 for a particular anchor is -4088 + 1 + 4095 bytes, which is
568 divisible by eight, ensuring natural spacing of anchors. */
569 #undef TARGET_MIN_ANCHOR_OFFSET
570 #define TARGET_MIN_ANCHOR_OFFSET -4088
572 #undef TARGET_SCHED_ISSUE_RATE
573 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
575 #undef TARGET_MANGLE_TYPE
576 #define TARGET_MANGLE_TYPE arm_mangle_type
578 #undef TARGET_BUILD_BUILTIN_VA_LIST
579 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
580 #undef TARGET_EXPAND_BUILTIN_VA_START
581 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
583 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
586 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
587 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
590 #undef TARGET_LEGITIMATE_ADDRESS_P
591 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
593 #undef TARGET_PREFERRED_RELOAD_CLASS
594 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
596 #undef TARGET_INVALID_PARAMETER_TYPE
597 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
599 #undef TARGET_INVALID_RETURN_TYPE
600 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
602 #undef TARGET_PROMOTED_TYPE
603 #define TARGET_PROMOTED_TYPE arm_promoted_type
605 #undef TARGET_CONVERT_TO_TYPE
606 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
608 #undef TARGET_SCALAR_MODE_SUPPORTED_P
609 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
611 #undef TARGET_FRAME_POINTER_REQUIRED
612 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
614 #undef TARGET_CAN_ELIMINATE
615 #define TARGET_CAN_ELIMINATE arm_can_eliminate
617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
618 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
620 #undef TARGET_CLASS_LIKELY_SPILLED_P
621 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
623 #undef TARGET_VECTOR_ALIGNMENT
624 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
626 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
627 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
628 arm_vector_alignment_reachable
630 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
631 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
632 arm_builtin_support_vector_misalignment
634 #undef TARGET_PREFERRED_RENAME_CLASS
635 #define TARGET_PREFERRED_RENAME_CLASS \
636 arm_preferred_rename_class
638 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
639 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
640 arm_vectorize_vec_perm_const_ok
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
644 arm_builtin_vectorization_cost
645 #undef TARGET_VECTORIZE_ADD_STMT_COST
646 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
648 #undef TARGET_CANONICALIZE_COMPARISON
649 #define TARGET_CANONICALIZE_COMPARISON \
650 arm_canonicalize_comparison
652 struct gcc_target targetm = TARGET_INITIALIZER;
654 /* Obstack for minipool constant handling. */
655 static struct obstack minipool_obstack;
656 static char * minipool_startobj;
658 /* The maximum number of insns skipped which
659 will be conditionalised if possible. */
660 static int max_insns_skipped = 5;
662 extern FILE * asm_out_file;
664 /* True if we are currently building a constant table. */
665 int making_const_table;
667 /* The processor for which instructions should be scheduled. */
668 enum processor_type arm_tune = arm_none;
670 /* The current tuning set. */
671 const struct tune_params *current_tune;
673 /* Which floating point hardware to schedule for. */
676 /* Which floating popint hardware to use. */
677 const struct arm_fpu_desc *arm_fpu_desc;
679 /* Used for Thumb call_via trampolines. */
680 rtx thumb_call_via_label[14];
681 static int thumb_call_reg_needed;
683 /* Bit values used to identify processor capabilities. */
684 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
685 #define FL_ARCH3M (1 << 1) /* Extended multiply */
686 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
687 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
688 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
689 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
690 #define FL_THUMB (1 << 6) /* Thumb aware */
691 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
692 #define FL_STRONG (1 << 8) /* StrongARM */
693 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
694 #define FL_XSCALE (1 << 10) /* XScale */
695 /* spare (1 << 11) */
696 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
697 media instructions. */
698 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
699 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
700 Note: ARM6 & 7 derivatives only. */
701 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
702 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
703 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
705 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
706 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
707 #define FL_NEON (1 << 20) /* Neon instructions. */
708 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
710 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
711 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
712 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
714 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
715 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
717 /* Flags that only effect tuning, not available instructions. */
718 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
721 #define FL_FOR_ARCH2 FL_NOTM
722 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
723 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
724 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
725 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
726 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
727 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
728 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
729 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
730 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
731 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
732 #define FL_FOR_ARCH6J FL_FOR_ARCH6
733 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
734 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
735 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
736 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
737 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
738 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
739 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
740 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
741 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
742 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
743 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
744 | FL_ARM_DIV | FL_NOTM)
746 /* The bits in this mask specify which
747 instructions we are allowed to generate. */
748 static unsigned long insn_flags = 0;
750 /* The bits in this mask specify which instruction scheduling options should
752 static unsigned long tune_flags = 0;
754 /* The highest ARM architecture version supported by the
756 enum base_architecture arm_base_arch = BASE_ARCH_0;
758 /* The following are used in the arm.md file as equivalents to bits
759 in the above two flag variables. */
761 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
764 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
767 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
770 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
773 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
776 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
779 /* Nonzero if this chip supports the ARM 6K extensions. */
782 /* Nonzero if instructions present in ARMv6-M can be used. */
785 /* Nonzero if this chip supports the ARM 7 extensions. */
788 /* Nonzero if instructions not present in the 'M' profile can be used. */
789 int arm_arch_notm = 0;
791 /* Nonzero if instructions present in ARMv7E-M can be used. */
794 /* Nonzero if instructions present in ARMv8 can be used. */
797 /* Nonzero if this chip can benefit from load scheduling. */
798 int arm_ld_sched = 0;
800 /* Nonzero if this chip is a StrongARM. */
801 int arm_tune_strongarm = 0;
803 /* Nonzero if this chip supports Intel Wireless MMX technology. */
804 int arm_arch_iwmmxt = 0;
806 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
807 int arm_arch_iwmmxt2 = 0;
809 /* Nonzero if this chip is an XScale. */
810 int arm_arch_xscale = 0;
812 /* Nonzero if tuning for XScale */
813 int arm_tune_xscale = 0;
815 /* Nonzero if we want to tune for stores that access the write-buffer.
816 This typically means an ARM6 or ARM7 with MMU or MPU. */
817 int arm_tune_wbuf = 0;
819 /* Nonzero if tuning for Cortex-A9. */
820 int arm_tune_cortex_a9 = 0;
822 /* Nonzero if generating Thumb instructions. */
825 /* Nonzero if generating Thumb-1 instructions. */
828 /* Nonzero if we should define __THUMB_INTERWORK__ in the
830 XXX This is a bit of a hack, it's intended to help work around
831 problems in GLD which doesn't understand that armv5t code is
832 interworking clean. */
833 int arm_cpp_interwork = 0;
835 /* Nonzero if chip supports Thumb 2. */
838 /* Nonzero if chip supports integer division instruction. */
839 int arm_arch_arm_hwdiv;
840 int arm_arch_thumb_hwdiv;
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 enum machine_mode output_memory_reference_mode;
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register = INVALID_REGNUM;
850 /* Set to 1 after arm_reorg has started. Reset to start at the start of
851 the next function. */
852 static int after_arm_reorg = 0;
854 enum arm_pcs arm_pcs_default;
856 /* For an explanation of these variables, see final_prescan_insn below. */
858 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
859 enum arm_cond_code arm_current_cc;
862 int arm_target_label;
863 /* The number of conditionally executed insns, including the current insn. */
864 int arm_condexec_count = 0;
865 /* A bitmask specifying the patterns for the IT block.
866 Zero means do not output an IT block before this insn. */
867 int arm_condexec_mask = 0;
868 /* The number of bits used in arm_condexec_mask. */
869 int arm_condexec_masklen = 0;
871 /* The condition codes of the ARM, and the inverse function. */
872 static const char * const arm_condition_codes[] =
874 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
875 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
878 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
879 int arm_regs_in_sequence[] =
881 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
884 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
885 #define streq(string1, string2) (strcmp (string1, string2) == 0)
887 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
888 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
889 | (1 << PIC_OFFSET_TABLE_REGNUM)))
891 /* Initialization code. */
895 const char *const name;
896 enum processor_type core;
898 enum base_architecture base_arch;
899 const unsigned long flags;
900 const struct tune_params *const tune;
904 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
905 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
910 /* arm generic vectorizer costs. */
912 struct cpu_vec_costs arm_default_vec_cost = {
913 1, /* scalar_stmt_cost. */
914 1, /* scalar load_cost. */
915 1, /* scalar_store_cost. */
916 1, /* vec_stmt_cost. */
917 1, /* vec_to_scalar_cost. */
918 1, /* scalar_to_vec_cost. */
919 1, /* vec_align_load_cost. */
920 1, /* vec_unalign_load_cost. */
921 1, /* vec_unalign_store_cost. */
922 1, /* vec_store_cost. */
923 3, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 const struct tune_params arm_slowmul_tune =
929 arm_slowmul_rtx_costs,
931 3, /* Constant limit. */
932 5, /* Max cond insns. */
933 ARM_PREFETCH_NOT_BENEFICIAL,
934 true, /* Prefer constant pool. */
935 arm_default_branch_cost,
936 false, /* Prefer LDRD/STRD. */
937 {true, true}, /* Prefer non short circuit. */
938 &arm_default_vec_cost, /* Vectorizer costs. */
941 const struct tune_params arm_fastmul_tune =
943 arm_fastmul_rtx_costs,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 true, /* Prefer constant pool. */
949 arm_default_branch_cost,
950 false, /* Prefer LDRD/STRD. */
951 {true, true}, /* Prefer non short circuit. */
952 &arm_default_vec_cost, /* Vectorizer costs. */
955 /* StrongARM has early execution of branches, so a sequence that is worth
956 skipping is shorter. Set max_insns_skipped to a lower value. */
958 const struct tune_params arm_strongarm_tune =
960 arm_fastmul_rtx_costs,
962 1, /* Constant limit. */
963 3, /* Max cond insns. */
964 ARM_PREFETCH_NOT_BENEFICIAL,
965 true, /* Prefer constant pool. */
966 arm_default_branch_cost,
967 false, /* Prefer LDRD/STRD. */
968 {true, true}, /* Prefer non short circuit. */
969 &arm_default_vec_cost, /* Vectorizer costs. */
972 const struct tune_params arm_xscale_tune =
974 arm_xscale_rtx_costs,
975 xscale_sched_adjust_cost,
976 2, /* Constant limit. */
977 3, /* Max cond insns. */
978 ARM_PREFETCH_NOT_BENEFICIAL,
979 true, /* Prefer constant pool. */
980 arm_default_branch_cost,
981 false, /* Prefer LDRD/STRD. */
982 {true, true}, /* Prefer non short circuit. */
983 &arm_default_vec_cost, /* Vectorizer costs. */
986 const struct tune_params arm_9e_tune =
990 1, /* Constant limit. */
991 5, /* Max cond insns. */
992 ARM_PREFETCH_NOT_BENEFICIAL,
993 true, /* Prefer constant pool. */
994 arm_default_branch_cost,
995 false, /* Prefer LDRD/STRD. */
996 {true, true}, /* Prefer non short circuit. */
997 &arm_default_vec_cost, /* Vectorizer costs. */
1000 const struct tune_params arm_v6t2_tune =
1004 1, /* Constant limit. */
1005 5, /* Max cond insns. */
1006 ARM_PREFETCH_NOT_BENEFICIAL,
1007 false, /* Prefer constant pool. */
1008 arm_default_branch_cost,
1009 false, /* Prefer LDRD/STRD. */
1010 {true, true}, /* Prefer non short circuit. */
1011 &arm_default_vec_cost, /* Vectorizer costs. */
1014 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1015 const struct tune_params arm_cortex_tune =
1019 1, /* Constant limit. */
1020 5, /* Max cond insns. */
1021 ARM_PREFETCH_NOT_BENEFICIAL,
1022 false, /* Prefer constant pool. */
1023 arm_default_branch_cost,
1024 false, /* Prefer LDRD/STRD. */
1025 {true, true}, /* Prefer non short circuit. */
1026 &arm_default_vec_cost, /* Vectorizer costs. */
1029 const struct tune_params arm_cortex_a15_tune =
1033 1, /* Constant limit. */
1034 5, /* Max cond insns. */
1035 ARM_PREFETCH_NOT_BENEFICIAL,
1036 false, /* Prefer constant pool. */
1037 arm_default_branch_cost,
1038 true, /* Prefer LDRD/STRD. */
1039 {true, true}, /* Prefer non short circuit. */
1040 &arm_default_vec_cost, /* Vectorizer costs. */
1043 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1044 less appealing. Set max_insns_skipped to a low value. */
1046 const struct tune_params arm_cortex_a5_tune =
1050 1, /* Constant limit. */
1051 1, /* Max cond insns. */
1052 ARM_PREFETCH_NOT_BENEFICIAL,
1053 false, /* Prefer constant pool. */
1054 arm_cortex_a5_branch_cost,
1055 false, /* Prefer LDRD/STRD. */
1056 {false, false}, /* Prefer non short circuit. */
1057 &arm_default_vec_cost, /* Vectorizer costs. */
1060 const struct tune_params arm_cortex_a9_tune =
1063 cortex_a9_sched_adjust_cost,
1064 1, /* Constant limit. */
1065 5, /* Max cond insns. */
1066 ARM_PREFETCH_BENEFICIAL(4,32,32),
1067 false, /* Prefer constant pool. */
1068 arm_default_branch_cost,
1069 false, /* Prefer LDRD/STRD. */
1070 {true, true}, /* Prefer non short circuit. */
1071 &arm_default_vec_cost, /* Vectorizer costs. */
1074 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1075 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1076 const struct tune_params arm_v6m_tune =
1080 1, /* Constant limit. */
1081 5, /* Max cond insns. */
1082 ARM_PREFETCH_NOT_BENEFICIAL,
1083 false, /* Prefer constant pool. */
1084 arm_default_branch_cost,
1085 false, /* Prefer LDRD/STRD. */
1086 {false, false}, /* Prefer non short circuit. */
1087 &arm_default_vec_cost, /* Vectorizer costs. */
1090 const struct tune_params arm_fa726te_tune =
1093 fa726te_sched_adjust_cost,
1094 1, /* Constant limit. */
1095 5, /* Max cond insns. */
1096 ARM_PREFETCH_NOT_BENEFICIAL,
1097 true, /* Prefer constant pool. */
1098 arm_default_branch_cost,
1099 false, /* Prefer LDRD/STRD. */
1100 {true, true}, /* Prefer non short circuit. */
1101 &arm_default_vec_cost, /* Vectorizer costs. */
1105 /* Not all of these give usefully different compilation alternatives,
1106 but there is no simple way of generalizing them. */
1107 static const struct processors all_cores[] =
1110 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1111 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1112 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1113 #include "arm-cores.def"
1115 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1118 static const struct processors all_architectures[] =
1120 /* ARM Architectures */
1121 /* We don't specify tuning costs here as it will be figured out
1124 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1125 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1126 #include "arm-arches.def"
1128 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1132 /* These are populated as commandline arguments are processed, or NULL
1133 if not specified. */
1134 static const struct processors *arm_selected_arch;
1135 static const struct processors *arm_selected_cpu;
1136 static const struct processors *arm_selected_tune;
1138 /* The name of the preprocessor macro to define for this architecture. */
1140 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1142 /* Available values for -mfpu=. */
1144 static const struct arm_fpu_desc all_fpus[] =
1146 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1147 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1148 #include "arm-fpus.def"
1153 /* Supported TLS relocations. */
1161 TLS_DESCSEQ /* GNU scheme */
1164 /* The maximum number of insns to be used when loading a constant. */
1166 arm_constant_limit (bool size_p)
1168 return size_p ? 1 : current_tune->constant_limit;
1171 /* Emit an insn that's a simple single-set. Both the operands must be known
1174 emit_set_insn (rtx x, rtx y)
1176 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1179 /* Return the number of bits set in VALUE. */
1181 bit_count (unsigned long value)
1183 unsigned long count = 0;
1188 value &= value - 1; /* Clear the least-significant set bit. */
1196 enum machine_mode mode;
1198 } arm_fixed_mode_set;
1200 /* A small helper for setting fixed-point library libfuncs. */
1203 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1204 const char *funcname, const char *modename,
1209 if (num_suffix == 0)
1210 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1212 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1214 set_optab_libfunc (optable, mode, buffer);
1218 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1219 enum machine_mode from, const char *funcname,
1220 const char *toname, const char *fromname)
1223 const char *maybe_suffix_2 = "";
1225 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1226 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1227 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1228 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1229 maybe_suffix_2 = "2";
1231 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1234 set_conv_libfunc (optable, to, from, buffer);
1237 /* Set up library functions unique to ARM. */
1240 arm_init_libfuncs (void)
1242 /* For Linux, we have access to kernel support for atomic operations. */
1243 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1244 init_sync_libfuncs (2 * UNITS_PER_WORD);
1246 /* There are no special library functions unless we are using the
1251 /* The functions below are described in Section 4 of the "Run-Time
1252 ABI for the ARM architecture", Version 1.0. */
1254 /* Double-precision floating-point arithmetic. Table 2. */
1255 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1256 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1257 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1258 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1259 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1261 /* Double-precision comparisons. Table 3. */
1262 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1263 set_optab_libfunc (ne_optab, DFmode, NULL);
1264 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1265 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1266 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1267 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1268 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1270 /* Single-precision floating-point arithmetic. Table 4. */
1271 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1272 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1273 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1274 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1275 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1277 /* Single-precision comparisons. Table 5. */
1278 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1279 set_optab_libfunc (ne_optab, SFmode, NULL);
1280 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1281 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1282 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1283 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1284 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1286 /* Floating-point to integer conversions. Table 6. */
1287 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1288 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1289 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1290 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1291 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1292 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1293 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1294 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1296 /* Conversions between floating types. Table 7. */
1297 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1298 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1300 /* Integer to floating-point conversions. Table 8. */
1301 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1302 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1303 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1304 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1305 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1306 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1307 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1308 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1310 /* Long long. Table 9. */
1311 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1312 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1313 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1314 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1315 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1316 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1317 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1318 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1320 /* Integer (32/32->32) division. \S 4.3.1. */
1321 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1322 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1324 /* The divmod functions are designed so that they can be used for
1325 plain division, even though they return both the quotient and the
1326 remainder. The quotient is returned in the usual location (i.e.,
1327 r0 for SImode, {r0, r1} for DImode), just as would be expected
1328 for an ordinary division routine. Because the AAPCS calling
1329 conventions specify that all of { r0, r1, r2, r3 } are
1330 callee-saved registers, there is no need to tell the compiler
1331 explicitly that those registers are clobbered by these
1333 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1334 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1336 /* For SImode division the ABI provides div-without-mod routines,
1337 which are faster. */
1338 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1339 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1341 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1342 divmod libcalls instead. */
1343 set_optab_libfunc (smod_optab, DImode, NULL);
1344 set_optab_libfunc (umod_optab, DImode, NULL);
1345 set_optab_libfunc (smod_optab, SImode, NULL);
1346 set_optab_libfunc (umod_optab, SImode, NULL);
1348 /* Half-precision float operations. The compiler handles all operations
1349 with NULL libfuncs by converting the SFmode. */
1350 switch (arm_fp16_format)
1352 case ARM_FP16_FORMAT_IEEE:
1353 case ARM_FP16_FORMAT_ALTERNATIVE:
1356 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1357 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1359 : "__gnu_f2h_alternative"));
1360 set_conv_libfunc (sext_optab, SFmode, HFmode,
1361 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1363 : "__gnu_h2f_alternative"));
1366 set_optab_libfunc (add_optab, HFmode, NULL);
1367 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1368 set_optab_libfunc (smul_optab, HFmode, NULL);
1369 set_optab_libfunc (neg_optab, HFmode, NULL);
1370 set_optab_libfunc (sub_optab, HFmode, NULL);
1373 set_optab_libfunc (eq_optab, HFmode, NULL);
1374 set_optab_libfunc (ne_optab, HFmode, NULL);
1375 set_optab_libfunc (lt_optab, HFmode, NULL);
1376 set_optab_libfunc (le_optab, HFmode, NULL);
1377 set_optab_libfunc (ge_optab, HFmode, NULL);
1378 set_optab_libfunc (gt_optab, HFmode, NULL);
1379 set_optab_libfunc (unord_optab, HFmode, NULL);
1386 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1388 const arm_fixed_mode_set fixed_arith_modes[] =
1409 const arm_fixed_mode_set fixed_conv_modes[] =
1439 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1441 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1442 "add", fixed_arith_modes[i].name, 3);
1443 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1444 "ssadd", fixed_arith_modes[i].name, 3);
1445 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1446 "usadd", fixed_arith_modes[i].name, 3);
1447 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1448 "sub", fixed_arith_modes[i].name, 3);
1449 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1450 "sssub", fixed_arith_modes[i].name, 3);
1451 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1452 "ussub", fixed_arith_modes[i].name, 3);
1453 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1454 "mul", fixed_arith_modes[i].name, 3);
1455 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1456 "ssmul", fixed_arith_modes[i].name, 3);
1457 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1458 "usmul", fixed_arith_modes[i].name, 3);
1459 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1460 "div", fixed_arith_modes[i].name, 3);
1461 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1462 "udiv", fixed_arith_modes[i].name, 3);
1463 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1464 "ssdiv", fixed_arith_modes[i].name, 3);
1465 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1466 "usdiv", fixed_arith_modes[i].name, 3);
1467 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1468 "neg", fixed_arith_modes[i].name, 2);
1469 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1470 "ssneg", fixed_arith_modes[i].name, 2);
1471 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1472 "usneg", fixed_arith_modes[i].name, 2);
1473 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1474 "ashl", fixed_arith_modes[i].name, 3);
1475 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1476 "ashr", fixed_arith_modes[i].name, 3);
1477 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1478 "lshr", fixed_arith_modes[i].name, 3);
1479 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1480 "ssashl", fixed_arith_modes[i].name, 3);
1481 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1482 "usashl", fixed_arith_modes[i].name, 3);
1483 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1484 "cmp", fixed_arith_modes[i].name, 2);
1487 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1488 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1491 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1492 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1495 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1496 fixed_conv_modes[j].mode, "fract",
1497 fixed_conv_modes[i].name,
1498 fixed_conv_modes[j].name);
1499 arm_set_fixed_conv_libfunc (satfract_optab,
1500 fixed_conv_modes[i].mode,
1501 fixed_conv_modes[j].mode, "satfract",
1502 fixed_conv_modes[i].name,
1503 fixed_conv_modes[j].name);
1504 arm_set_fixed_conv_libfunc (fractuns_optab,
1505 fixed_conv_modes[i].mode,
1506 fixed_conv_modes[j].mode, "fractuns",
1507 fixed_conv_modes[i].name,
1508 fixed_conv_modes[j].name);
1509 arm_set_fixed_conv_libfunc (satfractuns_optab,
1510 fixed_conv_modes[i].mode,
1511 fixed_conv_modes[j].mode, "satfractuns",
1512 fixed_conv_modes[i].name,
1513 fixed_conv_modes[j].name);
1517 if (TARGET_AAPCS_BASED)
1518 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1521 /* On AAPCS systems, this is the "struct __va_list". */
1522 static GTY(()) tree va_list_type;
1524 /* Return the type to use as __builtin_va_list. */
1526 arm_build_builtin_va_list (void)
1531 if (!TARGET_AAPCS_BASED)
1532 return std_build_builtin_va_list ();
1534 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1542 The C Library ABI further reinforces this definition in \S
1545 We must follow this definition exactly. The structure tag
1546 name is visible in C++ mangled names, and thus forms a part
1547 of the ABI. The field name may be used by people who
1548 #include <stdarg.h>. */
1549 /* Create the type. */
1550 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1551 /* Give it the required name. */
1552 va_list_name = build_decl (BUILTINS_LOCATION,
1554 get_identifier ("__va_list"),
1556 DECL_ARTIFICIAL (va_list_name) = 1;
1557 TYPE_NAME (va_list_type) = va_list_name;
1558 TYPE_STUB_DECL (va_list_type) = va_list_name;
1559 /* Create the __ap field. */
1560 ap_field = build_decl (BUILTINS_LOCATION,
1562 get_identifier ("__ap"),
1564 DECL_ARTIFICIAL (ap_field) = 1;
1565 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1566 TYPE_FIELDS (va_list_type) = ap_field;
1567 /* Compute its layout. */
1568 layout_type (va_list_type);
1570 return va_list_type;
1573 /* Return an expression of type "void *" pointing to the next
1574 available argument in a variable-argument list. VALIST is the
1575 user-level va_list object, of type __builtin_va_list. */
1577 arm_extract_valist_ptr (tree valist)
1579 if (TREE_TYPE (valist) == error_mark_node)
1580 return error_mark_node;
1582 /* On an AAPCS target, the pointer is stored within "struct
1584 if (TARGET_AAPCS_BASED)
1586 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1587 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1588 valist, ap_field, NULL_TREE);
1594 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1596 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1598 valist = arm_extract_valist_ptr (valist);
1599 std_expand_builtin_va_start (valist, nextarg);
1602 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1604 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1607 valist = arm_extract_valist_ptr (valist);
1608 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1611 /* Fix up any incompatible options that the user has specified. */
1613 arm_option_override (void)
1615 if (global_options_set.x_arm_arch_option)
1616 arm_selected_arch = &all_architectures[arm_arch_option];
1618 if (global_options_set.x_arm_cpu_option)
1619 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1621 if (global_options_set.x_arm_tune_option)
1622 arm_selected_tune = &all_cores[(int) arm_tune_option];
1624 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1625 SUBTARGET_OVERRIDE_OPTIONS;
1628 if (arm_selected_arch)
1630 if (arm_selected_cpu)
1632 /* Check for conflict between mcpu and march. */
1633 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1635 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1636 arm_selected_cpu->name, arm_selected_arch->name);
1637 /* -march wins for code generation.
1638 -mcpu wins for default tuning. */
1639 if (!arm_selected_tune)
1640 arm_selected_tune = arm_selected_cpu;
1642 arm_selected_cpu = arm_selected_arch;
1646 arm_selected_arch = NULL;
1649 /* Pick a CPU based on the architecture. */
1650 arm_selected_cpu = arm_selected_arch;
1653 /* If the user did not specify a processor, choose one for them. */
1654 if (!arm_selected_cpu)
1656 const struct processors * sel;
1657 unsigned int sought;
1659 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1660 if (!arm_selected_cpu->name)
1662 #ifdef SUBTARGET_CPU_DEFAULT
1663 /* Use the subtarget default CPU if none was specified by
1665 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1667 /* Default to ARM6. */
1668 if (!arm_selected_cpu->name)
1669 arm_selected_cpu = &all_cores[arm6];
1672 sel = arm_selected_cpu;
1673 insn_flags = sel->flags;
1675 /* Now check to see if the user has specified some command line
1676 switch that require certain abilities from the cpu. */
1679 if (TARGET_INTERWORK || TARGET_THUMB)
1681 sought |= (FL_THUMB | FL_MODE32);
1683 /* There are no ARM processors that support both APCS-26 and
1684 interworking. Therefore we force FL_MODE26 to be removed
1685 from insn_flags here (if it was set), so that the search
1686 below will always be able to find a compatible processor. */
1687 insn_flags &= ~FL_MODE26;
1690 if (sought != 0 && ((sought & insn_flags) != sought))
1692 /* Try to locate a CPU type that supports all of the abilities
1693 of the default CPU, plus the extra abilities requested by
1695 for (sel = all_cores; sel->name != NULL; sel++)
1696 if ((sel->flags & sought) == (sought | insn_flags))
1699 if (sel->name == NULL)
1701 unsigned current_bit_count = 0;
1702 const struct processors * best_fit = NULL;
1704 /* Ideally we would like to issue an error message here
1705 saying that it was not possible to find a CPU compatible
1706 with the default CPU, but which also supports the command
1707 line options specified by the programmer, and so they
1708 ought to use the -mcpu=<name> command line option to
1709 override the default CPU type.
1711 If we cannot find a cpu that has both the
1712 characteristics of the default cpu and the given
1713 command line options we scan the array again looking
1714 for a best match. */
1715 for (sel = all_cores; sel->name != NULL; sel++)
1716 if ((sel->flags & sought) == sought)
1720 count = bit_count (sel->flags & insn_flags);
1722 if (count >= current_bit_count)
1725 current_bit_count = count;
1729 gcc_assert (best_fit);
1733 arm_selected_cpu = sel;
1737 gcc_assert (arm_selected_cpu);
1738 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1739 if (!arm_selected_tune)
1740 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1742 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1743 insn_flags = arm_selected_cpu->flags;
1744 arm_base_arch = arm_selected_cpu->base_arch;
1746 arm_tune = arm_selected_tune->core;
1747 tune_flags = arm_selected_tune->flags;
1748 current_tune = arm_selected_tune->tune;
1750 /* Make sure that the processor choice does not conflict with any of the
1751 other command line choices. */
1752 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1753 error ("target CPU does not support ARM mode");
1755 /* BPABI targets use linker tricks to allow interworking on cores
1756 without thumb support. */
1757 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1759 warning (0, "target CPU does not support interworking" );
1760 target_flags &= ~MASK_INTERWORK;
1763 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1765 warning (0, "target CPU does not support THUMB instructions");
1766 target_flags &= ~MASK_THUMB;
1769 if (TARGET_APCS_FRAME && TARGET_THUMB)
1771 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1772 target_flags &= ~MASK_APCS_FRAME;
1775 /* Callee super interworking implies thumb interworking. Adding
1776 this to the flags here simplifies the logic elsewhere. */
1777 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1778 target_flags |= MASK_INTERWORK;
1780 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1781 from here where no function is being compiled currently. */
1782 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1783 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1785 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1786 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1788 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1790 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1791 target_flags |= MASK_APCS_FRAME;
1794 if (TARGET_POKE_FUNCTION_NAME)
1795 target_flags |= MASK_APCS_FRAME;
1797 if (TARGET_APCS_REENT && flag_pic)
1798 error ("-fpic and -mapcs-reent are incompatible");
1800 if (TARGET_APCS_REENT)
1801 warning (0, "APCS reentrant code not supported. Ignored");
1803 /* If this target is normally configured to use APCS frames, warn if they
1804 are turned off and debugging is turned on. */
1806 && write_symbols != NO_DEBUG
1807 && !TARGET_APCS_FRAME
1808 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1809 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1811 if (TARGET_APCS_FLOAT)
1812 warning (0, "passing floating point arguments in fp regs not yet supported");
1814 if (TARGET_LITTLE_WORDS)
1815 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1816 "will be removed in a future release");
1818 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1819 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1820 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1821 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1822 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1823 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1824 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1825 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1826 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1827 arm_arch6m = arm_arch6 && !arm_arch_notm;
1828 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1829 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1830 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1831 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1832 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1834 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1835 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1836 thumb_code = TARGET_ARM == 0;
1837 thumb1_code = TARGET_THUMB1 != 0;
1838 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1839 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1840 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1841 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1842 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1843 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1844 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1846 /* If we are not using the default (ARM mode) section anchor offset
1847 ranges, then set the correct ranges now. */
1850 /* Thumb-1 LDR instructions cannot have negative offsets.
1851 Permissible positive offset ranges are 5-bit (for byte loads),
1852 6-bit (for halfword loads), or 7-bit (for word loads).
1853 Empirical results suggest a 7-bit anchor range gives the best
1854 overall code size. */
1855 targetm.min_anchor_offset = 0;
1856 targetm.max_anchor_offset = 127;
1858 else if (TARGET_THUMB2)
1860 /* The minimum is set such that the total size of the block
1861 for a particular anchor is 248 + 1 + 4095 bytes, which is
1862 divisible by eight, ensuring natural spacing of anchors. */
1863 targetm.min_anchor_offset = -248;
1864 targetm.max_anchor_offset = 4095;
1867 /* V5 code we generate is completely interworking capable, so we turn off
1868 TARGET_INTERWORK here to avoid many tests later on. */
1870 /* XXX However, we must pass the right pre-processor defines to CPP
1871 or GLD can get confused. This is a hack. */
1872 if (TARGET_INTERWORK)
1873 arm_cpp_interwork = 1;
1876 target_flags &= ~MASK_INTERWORK;
1878 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1879 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1881 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1882 error ("iwmmxt abi requires an iwmmxt capable cpu");
1884 if (!global_options_set.x_arm_fpu_index)
1886 const char *target_fpu_name;
1889 #ifdef FPUTYPE_DEFAULT
1890 target_fpu_name = FPUTYPE_DEFAULT;
1892 target_fpu_name = "vfp";
1895 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1900 arm_fpu_desc = &all_fpus[arm_fpu_index];
1902 switch (arm_fpu_desc->model)
1904 case ARM_FP_MODEL_VFP:
1905 arm_fpu_attr = FPU_VFP;
1912 if (TARGET_AAPCS_BASED)
1914 if (TARGET_CALLER_INTERWORKING)
1915 error ("AAPCS does not support -mcaller-super-interworking");
1917 if (TARGET_CALLEE_INTERWORKING)
1918 error ("AAPCS does not support -mcallee-super-interworking");
1921 /* iWMMXt and NEON are incompatible. */
1922 if (TARGET_IWMMXT && TARGET_NEON)
1923 error ("iWMMXt and NEON are incompatible");
1925 /* iWMMXt unsupported under Thumb mode. */
1926 if (TARGET_THUMB && TARGET_IWMMXT)
1927 error ("iWMMXt unsupported under Thumb mode");
1929 /* __fp16 support currently assumes the core has ldrh. */
1930 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1931 sorry ("__fp16 and no ldrh");
1933 /* If soft-float is specified then don't use FPU. */
1934 if (TARGET_SOFT_FLOAT)
1935 arm_fpu_attr = FPU_NONE;
1937 if (TARGET_AAPCS_BASED)
1939 if (arm_abi == ARM_ABI_IWMMXT)
1940 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1941 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1942 && TARGET_HARD_FLOAT
1944 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1946 arm_pcs_default = ARM_PCS_AAPCS;
1950 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1951 sorry ("-mfloat-abi=hard and VFP");
1953 if (arm_abi == ARM_ABI_APCS)
1954 arm_pcs_default = ARM_PCS_APCS;
1956 arm_pcs_default = ARM_PCS_ATPCS;
1959 /* For arm2/3 there is no need to do any scheduling if we are doing
1960 software floating-point. */
1961 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1962 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1964 /* Use the cp15 method if it is available. */
1965 if (target_thread_pointer == TP_AUTO)
1967 if (arm_arch6k && !TARGET_THUMB1)
1968 target_thread_pointer = TP_CP15;
1970 target_thread_pointer = TP_SOFT;
1973 if (TARGET_HARD_TP && TARGET_THUMB1)
1974 error ("can not use -mtp=cp15 with 16-bit Thumb");
1976 /* Override the default structure alignment for AAPCS ABI. */
1977 if (!global_options_set.x_arm_structure_size_boundary)
1979 if (TARGET_AAPCS_BASED)
1980 arm_structure_size_boundary = 8;
1984 if (arm_structure_size_boundary != 8
1985 && arm_structure_size_boundary != 32
1986 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1988 if (ARM_DOUBLEWORD_ALIGN)
1990 "structure size boundary can only be set to 8, 32 or 64");
1992 warning (0, "structure size boundary can only be set to 8 or 32");
1993 arm_structure_size_boundary
1994 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1998 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2000 error ("RTP PIC is incompatible with Thumb");
2004 /* If stack checking is disabled, we can use r10 as the PIC register,
2005 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2006 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2008 if (TARGET_VXWORKS_RTP)
2009 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2010 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2013 if (flag_pic && TARGET_VXWORKS_RTP)
2014 arm_pic_register = 9;
2016 if (arm_pic_register_string != NULL)
2018 int pic_register = decode_reg_name (arm_pic_register_string);
2021 warning (0, "-mpic-register= is useless without -fpic");
2023 /* Prevent the user from choosing an obviously stupid PIC register. */
2024 else if (pic_register < 0 || call_used_regs[pic_register]
2025 || pic_register == HARD_FRAME_POINTER_REGNUM
2026 || pic_register == STACK_POINTER_REGNUM
2027 || pic_register >= PC_REGNUM
2028 || (TARGET_VXWORKS_RTP
2029 && (unsigned int) pic_register != arm_pic_register))
2030 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2032 arm_pic_register = pic_register;
2035 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2036 if (fix_cm3_ldrd == 2)
2038 if (arm_selected_cpu->core == cortexm3)
2044 /* Enable -munaligned-access by default for
2045 - all ARMv6 architecture-based processors
2046 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2047 - ARMv8 architecture-base processors.
2049 Disable -munaligned-access by default for
2050 - all pre-ARMv6 architecture-based processors
2051 - ARMv6-M architecture-based processors. */
2053 if (unaligned_access == 2)
2055 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2056 unaligned_access = 1;
2058 unaligned_access = 0;
2060 else if (unaligned_access == 1
2061 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2063 warning (0, "target CPU does not support unaligned accesses");
2064 unaligned_access = 0;
2067 if (TARGET_THUMB1 && flag_schedule_insns)
2069 /* Don't warn since it's on by default in -O2. */
2070 flag_schedule_insns = 0;
2075 /* If optimizing for size, bump the number of instructions that we
2076 are prepared to conditionally execute (even on a StrongARM). */
2077 max_insns_skipped = 6;
2080 max_insns_skipped = current_tune->max_insns_skipped;
2082 /* Hot/Cold partitioning is not currently supported, since we can't
2083 handle literal pool placement in that case. */
2084 if (flag_reorder_blocks_and_partition)
2086 inform (input_location,
2087 "-freorder-blocks-and-partition not supported on this architecture");
2088 flag_reorder_blocks_and_partition = 0;
2089 flag_reorder_blocks = 1;
2093 /* Hoisting PIC address calculations more aggressively provides a small,
2094 but measurable, size reduction for PIC code. Therefore, we decrease
2095 the bar for unrestricted expression hoisting to the cost of PIC address
2096 calculation, which is 2 instructions. */
2097 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2098 global_options.x_param_values,
2099 global_options_set.x_param_values);
2101 /* ARM EABI defaults to strict volatile bitfields. */
2102 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2103 && abi_version_at_least(2))
2104 flag_strict_volatile_bitfields = 1;
2106 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2107 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2108 if (flag_prefetch_loop_arrays < 0
2111 && current_tune->num_prefetch_slots > 0)
2112 flag_prefetch_loop_arrays = 1;
2114 /* Set up parameters to be used in prefetching algorithm. Do not override the
2115 defaults unless we are tuning for a core we have researched values for. */
2116 if (current_tune->num_prefetch_slots > 0)
2117 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2118 current_tune->num_prefetch_slots,
2119 global_options.x_param_values,
2120 global_options_set.x_param_values);
2121 if (current_tune->l1_cache_line_size >= 0)
2122 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2123 current_tune->l1_cache_line_size,
2124 global_options.x_param_values,
2125 global_options_set.x_param_values);
2126 if (current_tune->l1_cache_size >= 0)
2127 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2128 current_tune->l1_cache_size,
2129 global_options.x_param_values,
2130 global_options_set.x_param_values);
2132 /* Use the alternative scheduling-pressure algorithm by default. */
2133 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2134 global_options.x_param_values,
2135 global_options_set.x_param_values);
2137 /* Register global variables with the garbage collector. */
2138 arm_add_gc_roots ();
2142 arm_add_gc_roots (void)
2144 gcc_obstack_init(&minipool_obstack);
2145 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2148 /* A table of known ARM exception types.
2149 For use with the interrupt function attribute. */
2153 const char *const arg;
2154 const unsigned long return_value;
2158 static const isr_attribute_arg isr_attribute_args [] =
2160 { "IRQ", ARM_FT_ISR },
2161 { "irq", ARM_FT_ISR },
2162 { "FIQ", ARM_FT_FIQ },
2163 { "fiq", ARM_FT_FIQ },
2164 { "ABORT", ARM_FT_ISR },
2165 { "abort", ARM_FT_ISR },
2166 { "ABORT", ARM_FT_ISR },
2167 { "abort", ARM_FT_ISR },
2168 { "UNDEF", ARM_FT_EXCEPTION },
2169 { "undef", ARM_FT_EXCEPTION },
2170 { "SWI", ARM_FT_EXCEPTION },
2171 { "swi", ARM_FT_EXCEPTION },
2172 { NULL, ARM_FT_NORMAL }
2175 /* Returns the (interrupt) function type of the current
2176 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2178 static unsigned long
2179 arm_isr_value (tree argument)
2181 const isr_attribute_arg * ptr;
2185 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2187 /* No argument - default to IRQ. */
2188 if (argument == NULL_TREE)
2191 /* Get the value of the argument. */
2192 if (TREE_VALUE (argument) == NULL_TREE
2193 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2194 return ARM_FT_UNKNOWN;
2196 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2198 /* Check it against the list of known arguments. */
2199 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2200 if (streq (arg, ptr->arg))
2201 return ptr->return_value;
2203 /* An unrecognized interrupt type. */
2204 return ARM_FT_UNKNOWN;
2207 /* Computes the type of the current function. */
2209 static unsigned long
2210 arm_compute_func_type (void)
2212 unsigned long type = ARM_FT_UNKNOWN;
2216 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2218 /* Decide if the current function is volatile. Such functions
2219 never return, and many memory cycles can be saved by not storing
2220 register values that will never be needed again. This optimization
2221 was added to speed up context switching in a kernel application. */
2223 && (TREE_NOTHROW (current_function_decl)
2224 || !(flag_unwind_tables
2226 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2227 && TREE_THIS_VOLATILE (current_function_decl))
2228 type |= ARM_FT_VOLATILE;
2230 if (cfun->static_chain_decl != NULL)
2231 type |= ARM_FT_NESTED;
2233 attr = DECL_ATTRIBUTES (current_function_decl);
2235 a = lookup_attribute ("naked", attr);
2237 type |= ARM_FT_NAKED;
2239 a = lookup_attribute ("isr", attr);
2241 a = lookup_attribute ("interrupt", attr);
2244 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2246 type |= arm_isr_value (TREE_VALUE (a));
2251 /* Returns the type of the current function. */
2254 arm_current_func_type (void)
2256 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2257 cfun->machine->func_type = arm_compute_func_type ();
2259 return cfun->machine->func_type;
2263 arm_allocate_stack_slots_for_args (void)
2265 /* Naked functions should not allocate stack slots for arguments. */
2266 return !IS_NAKED (arm_current_func_type ());
2270 arm_warn_func_return (tree decl)
2272 /* Naked functions are implemented entirely in assembly, including the
2273 return sequence, so suppress warnings about this. */
2274 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2278 /* Output assembler code for a block containing the constant parts
2279 of a trampoline, leaving space for the variable parts.
2281 On the ARM, (if r8 is the static chain regnum, and remembering that
2282 referencing pc adds an offset of 8) the trampoline looks like:
2285 .word static chain value
2286 .word function's address
2287 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2290 arm_asm_trampoline_template (FILE *f)
2294 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2295 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2297 else if (TARGET_THUMB2)
2299 /* The Thumb-2 trampoline is similar to the arm implementation.
2300 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2301 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2302 STATIC_CHAIN_REGNUM, PC_REGNUM);
2303 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2307 ASM_OUTPUT_ALIGN (f, 2);
2308 fprintf (f, "\t.code\t16\n");
2309 fprintf (f, ".Ltrampoline_start:\n");
2310 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2311 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2312 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2313 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2314 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2315 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2317 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2318 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2321 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2324 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2326 rtx fnaddr, mem, a_tramp;
2328 emit_block_move (m_tramp, assemble_trampoline_template (),
2329 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2331 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2332 emit_move_insn (mem, chain_value);
2334 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2335 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2336 emit_move_insn (mem, fnaddr);
2338 a_tramp = XEXP (m_tramp, 0);
2339 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2340 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2341 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2344 /* Thumb trampolines should be entered in thumb mode, so set
2345 the bottom bit of the address. */
2348 arm_trampoline_adjust_address (rtx addr)
2351 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2352 NULL, 0, OPTAB_LIB_WIDEN);
2356 /* Return 1 if it is possible to return using a single instruction.
2357 If SIBLING is non-null, this is a test for a return before a sibling
2358 call. SIBLING is the call insn, so we can examine its register usage. */
2361 use_return_insn (int iscond, rtx sibling)
2364 unsigned int func_type;
2365 unsigned long saved_int_regs;
2366 unsigned HOST_WIDE_INT stack_adjust;
2367 arm_stack_offsets *offsets;
2369 /* Never use a return instruction before reload has run. */
2370 if (!reload_completed)
2373 func_type = arm_current_func_type ();
2375 /* Naked, volatile and stack alignment functions need special
2377 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2380 /* So do interrupt functions that use the frame pointer and Thumb
2381 interrupt functions. */
2382 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2385 offsets = arm_get_frame_offsets ();
2386 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2388 /* As do variadic functions. */
2389 if (crtl->args.pretend_args_size
2390 || cfun->machine->uses_anonymous_args
2391 /* Or if the function calls __builtin_eh_return () */
2392 || crtl->calls_eh_return
2393 /* Or if the function calls alloca */
2394 || cfun->calls_alloca
2395 /* Or if there is a stack adjustment. However, if the stack pointer
2396 is saved on the stack, we can use a pre-incrementing stack load. */
2397 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2398 && stack_adjust == 4)))
2401 saved_int_regs = offsets->saved_regs_mask;
2403 /* Unfortunately, the insn
2405 ldmib sp, {..., sp, ...}
2407 triggers a bug on most SA-110 based devices, such that the stack
2408 pointer won't be correctly restored if the instruction takes a
2409 page fault. We work around this problem by popping r3 along with
2410 the other registers, since that is never slower than executing
2411 another instruction.
2413 We test for !arm_arch5 here, because code for any architecture
2414 less than this could potentially be run on one of the buggy
2416 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2418 /* Validate that r3 is a call-clobbered register (always true in
2419 the default abi) ... */
2420 if (!call_used_regs[3])
2423 /* ... that it isn't being used for a return value ... */
2424 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2427 /* ... or for a tail-call argument ... */
2430 gcc_assert (CALL_P (sibling));
2432 if (find_regno_fusage (sibling, USE, 3))
2436 /* ... and that there are no call-saved registers in r0-r2
2437 (always true in the default ABI). */
2438 if (saved_int_regs & 0x7)
2442 /* Can't be done if interworking with Thumb, and any registers have been
2444 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2447 /* On StrongARM, conditional returns are expensive if they aren't
2448 taken and multiple registers have been stacked. */
2449 if (iscond && arm_tune_strongarm)
2451 /* Conditional return when just the LR is stored is a simple
2452 conditional-load instruction, that's not expensive. */
2453 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2457 && arm_pic_register != INVALID_REGNUM
2458 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2462 /* If there are saved registers but the LR isn't saved, then we need
2463 two instructions for the return. */
2464 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2467 /* Can't be done if any of the VFP regs are pushed,
2468 since this also requires an insn. */
2469 if (TARGET_HARD_FLOAT && TARGET_VFP)
2470 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2471 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2474 if (TARGET_REALLY_IWMMXT)
2475 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2476 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2482 /* Return TRUE if int I is a valid immediate ARM constant. */
2485 const_ok_for_arm (HOST_WIDE_INT i)
2489 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2490 be all zero, or all one. */
2491 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2492 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2493 != ((~(unsigned HOST_WIDE_INT) 0)
2494 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2497 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2499 /* Fast return for 0 and small values. We must do this for zero, since
2500 the code below can't handle that one case. */
2501 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2504 /* Get the number of trailing zeros. */
2505 lowbit = ffs((int) i) - 1;
2507 /* Only even shifts are allowed in ARM mode so round down to the
2508 nearest even number. */
2512 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2517 /* Allow rotated constants in ARM mode. */
2519 && ((i & ~0xc000003f) == 0
2520 || (i & ~0xf000000f) == 0
2521 || (i & ~0xfc000003) == 0))
2528 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2531 if (i == v || i == (v | (v << 8)))
2534 /* Allow repeated pattern 0xXY00XY00. */
2544 /* Return true if I is a valid constant for the operation CODE. */
2546 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2548 if (const_ok_for_arm (i))
2554 /* See if we can use movw. */
2555 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2558 /* Otherwise, try mvn. */
2559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2562 /* See if we can use addw or subw. */
2564 && ((i & 0xfffff000) == 0
2565 || ((-i) & 0xfffff000) == 0))
2567 /* else fall through. */
2587 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2589 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2595 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2599 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2606 /* Return true if I is a valid di mode constant for the operation CODE. */
2608 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2610 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2611 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2612 rtx hi = GEN_INT (hi_val);
2613 rtx lo = GEN_INT (lo_val);
2621 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2628 /* Emit a sequence of insns to handle a large constant.
2629 CODE is the code of the operation required, it can be any of SET, PLUS,
2630 IOR, AND, XOR, MINUS;
2631 MODE is the mode in which the operation is being performed;
2632 VAL is the integer to operate on;
2633 SOURCE is the other operand (a register, or a null-pointer for SET);
2634 SUBTARGETS means it is safe to create scratch registers if that will
2635 either produce a simpler sequence, or we will want to cse the values.
2636 Return value is the number of insns emitted. */
2638 /* ??? Tweak this for thumb2. */
2640 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2641 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2645 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2646 cond = COND_EXEC_TEST (PATTERN (insn));
2650 if (subtargets || code == SET
2651 || (REG_P (target) && REG_P (source)
2652 && REGNO (target) != REGNO (source)))
2654 /* After arm_reorg has been called, we can't fix up expensive
2655 constants by pushing them into memory so we must synthesize
2656 them in-line, regardless of the cost. This is only likely to
2657 be more costly on chips that have load delay slots and we are
2658 compiling without running the scheduler (so no splitting
2659 occurred before the final instruction emission).
2661 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2663 if (!after_arm_reorg
2665 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2667 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2672 /* Currently SET is the only monadic value for CODE, all
2673 the rest are diadic. */
2674 if (TARGET_USE_MOVT)
2675 arm_emit_movpair (target, GEN_INT (val));
2677 emit_set_insn (target, GEN_INT (val));
2683 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2685 if (TARGET_USE_MOVT)
2686 arm_emit_movpair (temp, GEN_INT (val));
2688 emit_set_insn (temp, GEN_INT (val));
2690 /* For MINUS, the value is subtracted from, since we never
2691 have subtraction of a constant. */
2693 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2695 emit_set_insn (target,
2696 gen_rtx_fmt_ee (code, mode, source, temp));
2702 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2707 ARM/THUMB2 immediates, and add up to VAL.
2708 Thr function return value gives the number of insns required. */
2710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2711 struct four_ints *return_sequence)
2713 int best_consecutive_zeros = 0;
2717 struct four_ints tmp_sequence;
2719 /* If we aren't targeting ARM, the best place to start is always at
2720 the bottom, otherwise look more closely. */
2723 for (i = 0; i < 32; i += 2)
2725 int consecutive_zeros = 0;
2727 if (!(val & (3 << i)))
2729 while ((i < 32) && !(val & (3 << i)))
2731 consecutive_zeros += 2;
2734 if (consecutive_zeros > best_consecutive_zeros)
2736 best_consecutive_zeros = consecutive_zeros;
2737 best_start = i - consecutive_zeros;
2744 /* So long as it won't require any more insns to do so, it's
2745 desirable to emit a small constant (in bits 0...9) in the last
2746 insn. This way there is more chance that it can be combined with
2747 a later addressing insn to form a pre-indexed load or store
2748 operation. Consider:
2750 *((volatile int *)0xe0000100) = 1;
2751 *((volatile int *)0xe0000110) = 2;
2753 We want this to wind up as:
2757 str rB, [rA, #0x100]
2759 str rB, [rA, #0x110]
2761 rather than having to synthesize both large constants from scratch.
2763 Therefore, we calculate how many insns would be required to emit
2764 the constant starting from `best_start', and also starting from
2765 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2766 yield a shorter sequence, we may as well use zero. */
2767 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2769 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2771 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2772 if (insns2 <= insns1)
2774 *return_sequence = tmp_sequence;
2782 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2785 struct four_ints *return_sequence, int i)
2787 int remainder = val & 0xffffffff;
2790 /* Try and find a way of doing the job in either two or three
2793 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2794 location. We start at position I. This may be the MSB, or
2795 optimial_immediate_sequence may have positioned it at the largest block
2796 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2797 wrapping around to the top of the word when we drop off the bottom.
2798 In the worst case this code should produce no more than four insns.
2800 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2801 constants, shifted to any arbitrary location. We should always start
2806 unsigned int b1, b2, b3, b4;
2807 unsigned HOST_WIDE_INT result;
2810 gcc_assert (insns < 4);
2815 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2816 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2819 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2820 /* We can use addw/subw for the last 12 bits. */
2824 /* Use an 8-bit shifted/rotated immediate. */
2828 result = remainder & ((0x0ff << end)
2829 | ((i < end) ? (0xff >> (32 - end))
2836 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2837 arbitrary shifts. */
2838 i -= TARGET_ARM ? 2 : 1;
2842 /* Next, see if we can do a better job with a thumb2 replicated
2845 We do it this way around to catch the cases like 0x01F001E0 where
2846 two 8-bit immediates would work, but a replicated constant would
2849 TODO: 16-bit constants that don't clear all the bits, but still win.
2850 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2853 b1 = (remainder & 0xff000000) >> 24;
2854 b2 = (remainder & 0x00ff0000) >> 16;
2855 b3 = (remainder & 0x0000ff00) >> 8;
2856 b4 = remainder & 0xff;
2860 /* The 8-bit immediate already found clears b1 (and maybe b2),
2861 but must leave b3 and b4 alone. */
2863 /* First try to find a 32-bit replicated constant that clears
2864 almost everything. We can assume that we can't do it in one,
2865 or else we wouldn't be here. */
2866 unsigned int tmp = b1 & b2 & b3 & b4;
2867 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2869 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2870 + (tmp == b3) + (tmp == b4);
2872 && (matching_bytes >= 3
2873 || (matching_bytes == 2
2874 && const_ok_for_op (remainder & ~tmp2, code))))
2876 /* At least 3 of the bytes match, and the fourth has at
2877 least as many bits set, or two of the bytes match
2878 and it will only require one more insn to finish. */
2886 /* Second, try to find a 16-bit replicated constant that can
2887 leave three of the bytes clear. If b2 or b4 is already
2888 zero, then we can. If the 8-bit from above would not
2889 clear b2 anyway, then we still win. */
2890 else if (b1 == b3 && (!b2 || !b4
2891 || (remainder & 0x00ff0000 & ~result)))
2893 result = remainder & 0xff00ff00;
2899 /* The 8-bit immediate already found clears b2 (and maybe b3)
2900 and we don't get here unless b1 is alredy clear, but it will
2901 leave b4 unchanged. */
2903 /* If we can clear b2 and b4 at once, then we win, since the
2904 8-bits couldn't possibly reach that far. */
2907 result = remainder & 0x00ff00ff;
2913 return_sequence->i[insns++] = result;
2914 remainder &= ~result;
2916 if (code == SET || code == MINUS)
2924 /* Emit an instruction with the indicated PATTERN. If COND is
2925 non-NULL, conditionalize the execution of the instruction on COND
2929 emit_constant_insn (rtx cond, rtx pattern)
2932 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2933 emit_insn (pattern);
2936 /* As above, but extra parameter GENERATE which, if clear, suppresses
2940 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2941 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2946 int final_invert = 0;
2948 int set_sign_bit_copies = 0;
2949 int clear_sign_bit_copies = 0;
2950 int clear_zero_bit_copies = 0;
2951 int set_zero_bit_copies = 0;
2952 int insns = 0, neg_insns, inv_insns;
2953 unsigned HOST_WIDE_INT temp1, temp2;
2954 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2955 struct four_ints *immediates;
2956 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2958 /* Find out which operations are safe for a given CODE. Also do a quick
2959 check for degenerate cases; these can occur when DImode operations
2972 if (remainder == 0xffffffff)
2975 emit_constant_insn (cond,
2976 gen_rtx_SET (VOIDmode, target,
2977 GEN_INT (ARM_SIGN_EXTEND (val))));
2983 if (reload_completed && rtx_equal_p (target, source))
2987 emit_constant_insn (cond,
2988 gen_rtx_SET (VOIDmode, target, source));
2997 emit_constant_insn (cond,
2998 gen_rtx_SET (VOIDmode, target, const0_rtx));
3001 if (remainder == 0xffffffff)
3003 if (reload_completed && rtx_equal_p (target, source))
3006 emit_constant_insn (cond,
3007 gen_rtx_SET (VOIDmode, target, source));
3016 if (reload_completed && rtx_equal_p (target, source))
3019 emit_constant_insn (cond,
3020 gen_rtx_SET (VOIDmode, target, source));
3024 if (remainder == 0xffffffff)
3027 emit_constant_insn (cond,
3028 gen_rtx_SET (VOIDmode, target,
3029 gen_rtx_NOT (mode, source)));
3036 /* We treat MINUS as (val - source), since (source - val) is always
3037 passed as (source + (-val)). */
3041 emit_constant_insn (cond,
3042 gen_rtx_SET (VOIDmode, target,
3043 gen_rtx_NEG (mode, source)));
3046 if (const_ok_for_arm (val))
3049 emit_constant_insn (cond,
3050 gen_rtx_SET (VOIDmode, target,
3051 gen_rtx_MINUS (mode, GEN_INT (val),
3062 /* If we can do it in one insn get out quickly. */
3063 if (const_ok_for_op (val, code))
3066 emit_constant_insn (cond,
3067 gen_rtx_SET (VOIDmode, target,
3069 ? gen_rtx_fmt_ee (code, mode, source,
3075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3077 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3078 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3082 if (mode == SImode && i == 16)
3083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3085 emit_constant_insn (cond,
3086 gen_zero_extendhisi2
3087 (target, gen_lowpart (HImode, source)));
3089 /* Extz only supports SImode, but we can coerce the operands
3091 emit_constant_insn (cond,
3092 gen_extzv_t2 (gen_lowpart (SImode, target),
3093 gen_lowpart (SImode, source),
3094 GEN_INT (i), const0_rtx));
3100 /* Calculate a few attributes that may be useful for specific
3102 /* Count number of leading zeros. */
3103 for (i = 31; i >= 0; i--)
3105 if ((remainder & (1 << i)) == 0)
3106 clear_sign_bit_copies++;
3111 /* Count number of leading 1's. */
3112 for (i = 31; i >= 0; i--)
3114 if ((remainder & (1 << i)) != 0)
3115 set_sign_bit_copies++;
3120 /* Count number of trailing zero's. */
3121 for (i = 0; i <= 31; i++)
3123 if ((remainder & (1 << i)) == 0)
3124 clear_zero_bit_copies++;
3129 /* Count number of trailing 1's. */
3130 for (i = 0; i <= 31; i++)
3132 if ((remainder & (1 << i)) != 0)
3133 set_zero_bit_copies++;
3141 /* See if we can do this by sign_extending a constant that is known
3142 to be negative. This is a good, way of doing it, since the shift
3143 may well merge into a subsequent insn. */
3144 if (set_sign_bit_copies > 1)
3146 if (const_ok_for_arm
3147 (temp1 = ARM_SIGN_EXTEND (remainder
3148 << (set_sign_bit_copies - 1))))
3152 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3153 emit_constant_insn (cond,
3154 gen_rtx_SET (VOIDmode, new_src,
3156 emit_constant_insn (cond,
3157 gen_ashrsi3 (target, new_src,
3158 GEN_INT (set_sign_bit_copies - 1)));
3162 /* For an inverted constant, we will need to set the low bits,
3163 these will be shifted out of harm's way. */
3164 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3165 if (const_ok_for_arm (~temp1))
3169 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3170 emit_constant_insn (cond,
3171 gen_rtx_SET (VOIDmode, new_src,
3173 emit_constant_insn (cond,
3174 gen_ashrsi3 (target, new_src,
3175 GEN_INT (set_sign_bit_copies - 1)));
3181 /* See if we can calculate the value as the difference between two
3182 valid immediates. */
3183 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3185 int topshift = clear_sign_bit_copies & ~1;
3187 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3188 & (0xff000000 >> topshift));
3190 /* If temp1 is zero, then that means the 9 most significant
3191 bits of remainder were 1 and we've caused it to overflow.
3192 When topshift is 0 we don't need to do anything since we
3193 can borrow from 'bit 32'. */
3194 if (temp1 == 0 && topshift != 0)
3195 temp1 = 0x80000000 >> (topshift - 1);
3197 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3199 if (const_ok_for_arm (temp2))
3203 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3204 emit_constant_insn (cond,
3205 gen_rtx_SET (VOIDmode, new_src,
3207 emit_constant_insn (cond,
3208 gen_addsi3 (target, new_src,
3216 /* See if we can generate this by setting the bottom (or the top)
3217 16 bits, and then shifting these into the other half of the
3218 word. We only look for the simplest cases, to do more would cost
3219 too much. Be careful, however, not to generate this when the
3220 alternative would take fewer insns. */
3221 if (val & 0xffff0000)
3223 temp1 = remainder & 0xffff0000;
3224 temp2 = remainder & 0x0000ffff;
3226 /* Overlaps outside this range are best done using other methods. */
3227 for (i = 9; i < 24; i++)
3229 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3230 && !const_ok_for_arm (temp2))
3232 rtx new_src = (subtargets
3233 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3235 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3236 source, subtargets, generate);
3244 gen_rtx_ASHIFT (mode, source,
3251 /* Don't duplicate cases already considered. */
3252 for (i = 17; i < 24; i++)
3254 if (((temp1 | (temp1 >> i)) == remainder)
3255 && !const_ok_for_arm (temp1))
3257 rtx new_src = (subtargets
3258 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3260 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3261 source, subtargets, generate);
3266 gen_rtx_SET (VOIDmode, target,
3269 gen_rtx_LSHIFTRT (mode, source,
3280 /* If we have IOR or XOR, and the constant can be loaded in a
3281 single instruction, and we can find a temporary to put it in,
3282 then this can be done in two instructions instead of 3-4. */
3284 /* TARGET can't be NULL if SUBTARGETS is 0 */
3285 || (reload_completed && !reg_mentioned_p (target, source)))
3287 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3291 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3293 emit_constant_insn (cond,
3294 gen_rtx_SET (VOIDmode, sub,
3296 emit_constant_insn (cond,
3297 gen_rtx_SET (VOIDmode, target,
3298 gen_rtx_fmt_ee (code, mode,
3309 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3310 and the remainder 0s for e.g. 0xfff00000)
3311 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3313 This can be done in 2 instructions by using shifts with mov or mvn.
3318 mvn r0, r0, lsr #12 */
3319 if (set_sign_bit_copies > 8
3320 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3324 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3325 rtx shift = GEN_INT (set_sign_bit_copies);
3329 gen_rtx_SET (VOIDmode, sub,
3331 gen_rtx_ASHIFT (mode,
3336 gen_rtx_SET (VOIDmode, target,
3338 gen_rtx_LSHIFTRT (mode, sub,
3345 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3347 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3349 For eg. r0 = r0 | 0xfff
3354 if (set_zero_bit_copies > 8
3355 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3359 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3360 rtx shift = GEN_INT (set_zero_bit_copies);
3364 gen_rtx_SET (VOIDmode, sub,
3366 gen_rtx_LSHIFTRT (mode,
3371 gen_rtx_SET (VOIDmode, target,
3373 gen_rtx_ASHIFT (mode, sub,
3379 /* This will never be reached for Thumb2 because orn is a valid
3380 instruction. This is for Thumb1 and the ARM 32 bit cases.
3382 x = y | constant (such that ~constant is a valid constant)
3384 x = ~(~y & ~constant).
3386 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3390 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3391 emit_constant_insn (cond,
3392 gen_rtx_SET (VOIDmode, sub,
3393 gen_rtx_NOT (mode, source)));
3396 sub = gen_reg_rtx (mode);
3397 emit_constant_insn (cond,
3398 gen_rtx_SET (VOIDmode, sub,
3399 gen_rtx_AND (mode, source,
3401 emit_constant_insn (cond,
3402 gen_rtx_SET (VOIDmode, target,
3403 gen_rtx_NOT (mode, sub)));
3410 /* See if two shifts will do 2 or more insn's worth of work. */
3411 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3413 HOST_WIDE_INT shift_mask = ((0xffffffff
3414 << (32 - clear_sign_bit_copies))
3417 if ((remainder | shift_mask) != 0xffffffff)
3421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3422 insns = arm_gen_constant (AND, mode, cond,
3423 remainder | shift_mask,
3424 new_src, source, subtargets, 1);
3429 rtx targ = subtargets ? NULL_RTX : target;
3430 insns = arm_gen_constant (AND, mode, cond,
3431 remainder | shift_mask,
3432 targ, source, subtargets, 0);
3438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3439 rtx shift = GEN_INT (clear_sign_bit_copies);
3441 emit_insn (gen_ashlsi3 (new_src, source, shift));
3442 emit_insn (gen_lshrsi3 (target, new_src, shift));
3448 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3450 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3452 if ((remainder | shift_mask) != 0xffffffff)
3456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3458 insns = arm_gen_constant (AND, mode, cond,
3459 remainder | shift_mask,
3460 new_src, source, subtargets, 1);
3465 rtx targ = subtargets ? NULL_RTX : target;
3467 insns = arm_gen_constant (AND, mode, cond,
3468 remainder | shift_mask,
3469 targ, source, subtargets, 0);
3475 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3476 rtx shift = GEN_INT (clear_zero_bit_copies);
3478 emit_insn (gen_lshrsi3 (new_src, source, shift));
3479 emit_insn (gen_ashlsi3 (target, new_src, shift));
3491 /* Calculate what the instruction sequences would be if we generated it
3492 normally, negated, or inverted. */
3494 /* AND cannot be split into multiple insns, so invert and use BIC. */
3497 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3500 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3505 if (can_invert || final_invert)
3506 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3511 immediates = &pos_immediates;
3513 /* Is the negated immediate sequence more efficient? */
3514 if (neg_insns < insns && neg_insns <= inv_insns)
3517 immediates = &neg_immediates;
3522 /* Is the inverted immediate sequence more efficient?
3523 We must allow for an extra NOT instruction for XOR operations, although
3524 there is some chance that the final 'mvn' will get optimized later. */
3525 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3528 immediates = &inv_immediates;
3536 /* Now output the chosen sequence as instructions. */
3539 for (i = 0; i < insns; i++)
3541 rtx new_src, temp1_rtx;
3543 temp1 = immediates->i[i];
3545 if (code == SET || code == MINUS)
3546 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3547 else if ((final_invert || i < (insns - 1)) && subtargets)
3548 new_src = gen_reg_rtx (mode);
3554 else if (can_negate)
3557 temp1 = trunc_int_for_mode (temp1, mode);
3558 temp1_rtx = GEN_INT (temp1);
3562 else if (code == MINUS)
3563 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3565 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3567 emit_constant_insn (cond,
3568 gen_rtx_SET (VOIDmode, new_src,
3574 can_negate = can_invert;
3578 else if (code == MINUS)
3586 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3587 gen_rtx_NOT (mode, source)));
3594 /* Canonicalize a comparison so that we are more likely to recognize it.
3595 This can be done for a few constant compares, where we can make the
3596 immediate value easier to load. */
3599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3600 bool op0_preserve_value)
3602 enum machine_mode mode;
3603 unsigned HOST_WIDE_INT i, maxval;
3605 mode = GET_MODE (*op0);
3606 if (mode == VOIDmode)
3607 mode = GET_MODE (*op1);
3609 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3611 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3612 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3613 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3614 for GTU/LEU in Thumb mode. */
3619 if (*code == GT || *code == LE
3620 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3622 /* Missing comparison. First try to use an available
3624 if (CONST_INT_P (*op1))
3632 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3634 *op1 = GEN_INT (i + 1);
3635 *code = *code == GT ? GE : LT;
3641 if (i != ~((unsigned HOST_WIDE_INT) 0)
3642 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3644 *op1 = GEN_INT (i + 1);
3645 *code = *code == GTU ? GEU : LTU;
3654 /* If that did not work, reverse the condition. */
3655 if (!op0_preserve_value)
3660 *code = (int)swap_condition ((enum rtx_code)*code);
3666 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3667 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3668 to facilitate possible combining with a cmp into 'ands'. */
3670 && GET_CODE (*op0) == ZERO_EXTEND
3671 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3672 && GET_MODE (XEXP (*op0, 0)) == QImode
3673 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3674 && subreg_lowpart_p (XEXP (*op0, 0))
3675 && *op1 == const0_rtx)
3676 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3679 /* Comparisons smaller than DImode. Only adjust comparisons against
3680 an out-of-range constant. */
3681 if (!CONST_INT_P (*op1)
3682 || const_ok_for_arm (INTVAL (*op1))
3683 || const_ok_for_arm (- INTVAL (*op1)))
3697 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3699 *op1 = GEN_INT (i + 1);
3700 *code = *code == GT ? GE : LT;
3708 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3710 *op1 = GEN_INT (i - 1);
3711 *code = *code == GE ? GT : LE;
3718 if (i != ~((unsigned HOST_WIDE_INT) 0)
3719 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3721 *op1 = GEN_INT (i + 1);
3722 *code = *code == GTU ? GEU : LTU;
3730 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3732 *op1 = GEN_INT (i - 1);
3733 *code = *code == GEU ? GTU : LEU;
3744 /* Define how to find the value returned by a function. */
3747 arm_function_value(const_tree type, const_tree func,
3748 bool outgoing ATTRIBUTE_UNUSED)
3750 enum machine_mode mode;
3751 int unsignedp ATTRIBUTE_UNUSED;
3752 rtx r ATTRIBUTE_UNUSED;
3754 mode = TYPE_MODE (type);
3756 if (TARGET_AAPCS_BASED)
3757 return aapcs_allocate_return_reg (mode, type, func);
3759 /* Promote integer types. */
3760 if (INTEGRAL_TYPE_P (type))
3761 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3763 /* Promotes small structs returned in a register to full-word size
3764 for big-endian AAPCS. */
3765 if (arm_return_in_msb (type))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if (size % UNITS_PER_WORD != 0)
3770 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3771 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3775 return arm_libcall_value_1 (mode);
3779 libcall_eq (const void *p1, const void *p2)
3781 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3785 libcall_hash (const void *p1)
3787 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3791 add_libcall (htab_t htab, rtx libcall)
3793 *htab_find_slot (htab, libcall, INSERT) = libcall;
3797 arm_libcall_uses_aapcs_base (const_rtx libcall)
3799 static bool init_done = false;
3800 static htab_t libcall_htab;
3806 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3808 add_libcall (libcall_htab,
3809 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3810 add_libcall (libcall_htab,
3811 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3812 add_libcall (libcall_htab,
3813 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3814 add_libcall (libcall_htab,
3815 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3817 add_libcall (libcall_htab,
3818 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3819 add_libcall (libcall_htab,
3820 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3821 add_libcall (libcall_htab,
3822 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3823 add_libcall (libcall_htab,
3824 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3826 add_libcall (libcall_htab,
3827 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3828 add_libcall (libcall_htab,
3829 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3830 add_libcall (libcall_htab,
3831 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3832 add_libcall (libcall_htab,
3833 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3834 add_libcall (libcall_htab,
3835 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3836 add_libcall (libcall_htab,
3837 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3838 add_libcall (libcall_htab,
3839 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3840 add_libcall (libcall_htab,
3841 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3843 /* Values from double-precision helper functions are returned in core
3844 registers if the selected core only supports single-precision
3845 arithmetic, even if we are using the hard-float ABI. The same is
3846 true for single-precision helpers, but we will never be using the
3847 hard-float ABI on a CPU which doesn't support single-precision
3848 operations in hardware. */
3849 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3850 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3851 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3852 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3853 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3854 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3855 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3856 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3857 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3858 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3859 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3860 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3862 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3866 return libcall && htab_find (libcall_htab, libcall) != NULL;
3870 arm_libcall_value_1 (enum machine_mode mode)
3872 if (TARGET_AAPCS_BASED)
3873 return aapcs_libcall_value (mode);
3874 else if (TARGET_IWMMXT_ABI
3875 && arm_vector_mode_supported_p (mode))
3876 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3878 return gen_rtx_REG (mode, ARG_REGISTER (1));
3881 /* Define how to find the value returned by a library function
3882 assuming the value has mode MODE. */
3885 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3887 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3888 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3890 /* The following libcalls return their result in integer registers,
3891 even though they return a floating point value. */
3892 if (arm_libcall_uses_aapcs_base (libcall))
3893 return gen_rtx_REG (mode, ARG_REGISTER(1));
3897 return arm_libcall_value_1 (mode);
3900 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3903 arm_function_value_regno_p (const unsigned int regno)
3905 if (regno == ARG_REGISTER (1)
3907 && TARGET_AAPCS_BASED
3909 && TARGET_HARD_FLOAT
3910 && regno == FIRST_VFP_REGNUM)
3911 || (TARGET_IWMMXT_ABI
3912 && regno == FIRST_IWMMXT_REGNUM))
3918 /* Determine the amount of memory needed to store the possible return
3919 registers of an untyped call. */
3921 arm_apply_result_size (void)
3927 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3929 if (TARGET_IWMMXT_ABI)
3936 /* Decide whether TYPE should be returned in memory (true)
3937 or in a register (false). FNTYPE is the type of the function making
3940 arm_return_in_memory (const_tree type, const_tree fntype)
3944 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3946 if (TARGET_AAPCS_BASED)
3948 /* Simple, non-aggregate types (ie not including vectors and
3949 complex) are always returned in a register (or registers).
3950 We don't care about which register here, so we can short-cut
3951 some of the detail. */
3952 if (!AGGREGATE_TYPE_P (type)
3953 && TREE_CODE (type) != VECTOR_TYPE
3954 && TREE_CODE (type) != COMPLEX_TYPE)
3957 /* Any return value that is no larger than one word can be
3959 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3962 /* Check any available co-processors to see if they accept the
3963 type as a register candidate (VFP, for example, can return
3964 some aggregates in consecutive registers). These aren't
3965 available if the call is variadic. */
3966 if (aapcs_select_return_coproc (type, fntype) >= 0)
3969 /* Vector values should be returned using ARM registers, not
3970 memory (unless they're over 16 bytes, which will break since
3971 we only have four call-clobbered registers to play with). */
3972 if (TREE_CODE (type) == VECTOR_TYPE)
3973 return (size < 0 || size > (4 * UNITS_PER_WORD));
3975 /* The rest go in memory. */
3979 if (TREE_CODE (type) == VECTOR_TYPE)
3980 return (size < 0 || size > (4 * UNITS_PER_WORD));
3982 if (!AGGREGATE_TYPE_P (type) &&
3983 (TREE_CODE (type) != VECTOR_TYPE))
3984 /* All simple types are returned in registers. */
3987 if (arm_abi != ARM_ABI_APCS)
3989 /* ATPCS and later return aggregate types in memory only if they are
3990 larger than a word (or are variable size). */
3991 return (size < 0 || size > UNITS_PER_WORD);
3994 /* For the arm-wince targets we choose to be compatible with Microsoft's
3995 ARM and Thumb compilers, which always return aggregates in memory. */
3997 /* All structures/unions bigger than one word are returned in memory.
3998 Also catch the case where int_size_in_bytes returns -1. In this case
3999 the aggregate is either huge or of variable size, and in either case
4000 we will want to return it via memory and not in a register. */
4001 if (size < 0 || size > UNITS_PER_WORD)
4004 if (TREE_CODE (type) == RECORD_TYPE)
4008 /* For a struct the APCS says that we only return in a register
4009 if the type is 'integer like' and every addressable element
4010 has an offset of zero. For practical purposes this means
4011 that the structure can have at most one non bit-field element
4012 and that this element must be the first one in the structure. */
4014 /* Find the first field, ignoring non FIELD_DECL things which will
4015 have been created by C++. */
4016 for (field = TYPE_FIELDS (type);
4017 field && TREE_CODE (field) != FIELD_DECL;
4018 field = DECL_CHAIN (field))
4022 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4024 /* Check that the first field is valid for returning in a register. */
4026 /* ... Floats are not allowed */
4027 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4030 /* ... Aggregates that are not themselves valid for returning in
4031 a register are not allowed. */
4032 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4035 /* Now check the remaining fields, if any. Only bitfields are allowed,
4036 since they are not addressable. */
4037 for (field = DECL_CHAIN (field);
4039 field = DECL_CHAIN (field))
4041 if (TREE_CODE (field) != FIELD_DECL)
4044 if (!DECL_BIT_FIELD_TYPE (field))
4051 if (TREE_CODE (type) == UNION_TYPE)
4055 /* Unions can be returned in registers if every element is
4056 integral, or can be returned in an integer register. */
4057 for (field = TYPE_FIELDS (type);
4059 field = DECL_CHAIN (field))
4061 if (TREE_CODE (field) != FIELD_DECL)
4064 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4067 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4073 #endif /* not ARM_WINCE */
4075 /* Return all other types in memory. */
4079 const struct pcs_attribute_arg
4083 } pcs_attribute_args[] =
4085 {"aapcs", ARM_PCS_AAPCS},
4086 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4088 /* We could recognize these, but changes would be needed elsewhere
4089 * to implement them. */
4090 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4091 {"atpcs", ARM_PCS_ATPCS},
4092 {"apcs", ARM_PCS_APCS},
4094 {NULL, ARM_PCS_UNKNOWN}
4098 arm_pcs_from_attribute (tree attr)
4100 const struct pcs_attribute_arg *ptr;
4103 /* Get the value of the argument. */
4104 if (TREE_VALUE (attr) == NULL_TREE
4105 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4106 return ARM_PCS_UNKNOWN;
4108 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4110 /* Check it against the list of known arguments. */
4111 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4112 if (streq (arg, ptr->arg))
4115 /* An unrecognized interrupt type. */
4116 return ARM_PCS_UNKNOWN;
4119 /* Get the PCS variant to use for this call. TYPE is the function's type
4120 specification, DECL is the specific declartion. DECL may be null if
4121 the call could be indirect or if this is a library call. */
4123 arm_get_pcs_model (const_tree type, const_tree decl)
4125 bool user_convention = false;
4126 enum arm_pcs user_pcs = arm_pcs_default;
4131 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4134 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4135 user_convention = true;
4138 if (TARGET_AAPCS_BASED)
4140 /* Detect varargs functions. These always use the base rules
4141 (no argument is ever a candidate for a co-processor
4143 bool base_rules = stdarg_p (type);
4145 if (user_convention)
4147 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4148 sorry ("non-AAPCS derived PCS variant");
4149 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4150 error ("variadic functions must use the base AAPCS variant");
4154 return ARM_PCS_AAPCS;
4155 else if (user_convention)
4157 else if (decl && flag_unit_at_a_time)
4159 /* Local functions never leak outside this compilation unit,
4160 so we are free to use whatever conventions are
4162 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4163 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4165 return ARM_PCS_AAPCS_LOCAL;
4168 else if (user_convention && user_pcs != arm_pcs_default)
4169 sorry ("PCS variant");
4171 /* For everything else we use the target's default. */
4172 return arm_pcs_default;
4177 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4178 const_tree fntype ATTRIBUTE_UNUSED,
4179 rtx libcall ATTRIBUTE_UNUSED,
4180 const_tree fndecl ATTRIBUTE_UNUSED)
4182 /* Record the unallocated VFP registers. */
4183 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4184 pcum->aapcs_vfp_reg_alloc = 0;
4187 /* Walk down the type tree of TYPE counting consecutive base elements.
4188 If *MODEP is VOIDmode, then set it to the first valid floating point
4189 type. If a non-floating point type is found, or if a floating point
4190 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4191 otherwise return the count in the sub-tree. */
4193 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4195 enum machine_mode mode;
4198 switch (TREE_CODE (type))
4201 mode = TYPE_MODE (type);
4202 if (mode != DFmode && mode != SFmode)
4205 if (*modep == VOIDmode)
4214 mode = TYPE_MODE (TREE_TYPE (type));
4215 if (mode != DFmode && mode != SFmode)
4218 if (*modep == VOIDmode)
4227 /* Use V2SImode and V4SImode as representatives of all 64-bit
4228 and 128-bit vector types, whether or not those modes are
4229 supported with the present options. */
4230 size = int_size_in_bytes (type);
4243 if (*modep == VOIDmode)
4246 /* Vector modes are considered to be opaque: two vectors are
4247 equivalent for the purposes of being homogeneous aggregates
4248 if they are the same size. */
4257 tree index = TYPE_DOMAIN (type);
4259 /* Can't handle incomplete types. */
4260 if (!COMPLETE_TYPE_P (type))
4263 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4266 || !TYPE_MAX_VALUE (index)
4267 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4268 || !TYPE_MIN_VALUE (index)
4269 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4273 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4274 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4276 /* There must be no padding. */
4277 if (!host_integerp (TYPE_SIZE (type), 1)
4278 || (tree_low_cst (TYPE_SIZE (type), 1)
4279 != count * GET_MODE_BITSIZE (*modep)))
4291 /* Can't handle incomplete types. */
4292 if (!COMPLETE_TYPE_P (type))
4295 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4297 if (TREE_CODE (field) != FIELD_DECL)
4300 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4306 /* There must be no padding. */
4307 if (!host_integerp (TYPE_SIZE (type), 1)
4308 || (tree_low_cst (TYPE_SIZE (type), 1)
4309 != count * GET_MODE_BITSIZE (*modep)))
4316 case QUAL_UNION_TYPE:
4318 /* These aren't very interesting except in a degenerate case. */
4323 /* Can't handle incomplete types. */
4324 if (!COMPLETE_TYPE_P (type))
4327 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4329 if (TREE_CODE (field) != FIELD_DECL)
4332 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4335 count = count > sub_count ? count : sub_count;
4338 /* There must be no padding. */
4339 if (!host_integerp (TYPE_SIZE (type), 1)
4340 || (tree_low_cst (TYPE_SIZE (type), 1)
4341 != count * GET_MODE_BITSIZE (*modep)))
4354 /* Return true if PCS_VARIANT should use VFP registers. */
4356 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4358 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4360 static bool seen_thumb1_vfp = false;
4362 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4364 sorry ("Thumb-1 hard-float VFP ABI");
4365 /* sorry() is not immediately fatal, so only display this once. */
4366 seen_thumb1_vfp = true;
4372 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4375 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4376 (TARGET_VFP_DOUBLE || !is_double));
4379 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4380 suitable for passing or returning in VFP registers for the PCS
4381 variant selected. If it is, then *BASE_MODE is updated to contain
4382 a machine mode describing each element of the argument's type and
4383 *COUNT to hold the number of such elements. */
4385 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4386 enum machine_mode mode, const_tree type,
4387 enum machine_mode *base_mode, int *count)
4389 enum machine_mode new_mode = VOIDmode;
4391 /* If we have the type information, prefer that to working things
4392 out from the mode. */
4395 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4397 if (ag_count > 0 && ag_count <= 4)
4402 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4403 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4404 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4409 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4412 new_mode = (mode == DCmode ? DFmode : SFmode);
4418 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4421 *base_mode = new_mode;
4426 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4427 enum machine_mode mode, const_tree type)
4429 int count ATTRIBUTE_UNUSED;
4430 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4432 if (!use_vfp_abi (pcs_variant, false))
4434 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4439 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4442 if (!use_vfp_abi (pcum->pcs_variant, false))
4445 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4446 &pcum->aapcs_vfp_rmode,
4447 &pcum->aapcs_vfp_rcount);
4451 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4452 const_tree type ATTRIBUTE_UNUSED)
4454 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4455 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4458 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4459 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4461 pcum->aapcs_vfp_reg_alloc = mask << regno;
4463 || (mode == TImode && ! TARGET_NEON)
4464 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4467 int rcount = pcum->aapcs_vfp_rcount;
4469 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4473 /* Avoid using unsupported vector modes. */
4474 if (rmode == V2SImode)
4476 else if (rmode == V4SImode)
4483 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4484 for (i = 0; i < rcount; i++)
4486 rtx tmp = gen_rtx_REG (rmode,
4487 FIRST_VFP_REGNUM + regno + i * rshift);
4488 tmp = gen_rtx_EXPR_LIST
4490 GEN_INT (i * GET_MODE_SIZE (rmode)));
4491 XVECEXP (par, 0, i) = tmp;
4494 pcum->aapcs_reg = par;
4497 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4504 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4505 enum machine_mode mode,
4506 const_tree type ATTRIBUTE_UNUSED)
4508 if (!use_vfp_abi (pcs_variant, false))
4511 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4514 enum machine_mode ag_mode;
4519 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4524 if (ag_mode == V2SImode)
4526 else if (ag_mode == V4SImode)
4532 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4533 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4534 for (i = 0; i < count; i++)
4536 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4537 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4538 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4539 XVECEXP (par, 0, i) = tmp;
4545 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4549 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4550 enum machine_mode mode ATTRIBUTE_UNUSED,
4551 const_tree type ATTRIBUTE_UNUSED)
4553 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4554 pcum->aapcs_vfp_reg_alloc = 0;
4558 #define AAPCS_CP(X) \
4560 aapcs_ ## X ## _cum_init, \
4561 aapcs_ ## X ## _is_call_candidate, \
4562 aapcs_ ## X ## _allocate, \
4563 aapcs_ ## X ## _is_return_candidate, \
4564 aapcs_ ## X ## _allocate_return_reg, \
4565 aapcs_ ## X ## _advance \
4568 /* Table of co-processors that can be used to pass arguments in
4569 registers. Idealy no arugment should be a candidate for more than
4570 one co-processor table entry, but the table is processed in order
4571 and stops after the first match. If that entry then fails to put
4572 the argument into a co-processor register, the argument will go on
4576 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4577 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4579 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4580 BLKmode) is a candidate for this co-processor's registers; this
4581 function should ignore any position-dependent state in
4582 CUMULATIVE_ARGS and only use call-type dependent information. */
4583 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4585 /* Return true if the argument does get a co-processor register; it
4586 should set aapcs_reg to an RTX of the register allocated as is
4587 required for a return from FUNCTION_ARG. */
4588 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4590 /* Return true if a result of mode MODE (or type TYPE if MODE is
4591 BLKmode) is can be returned in this co-processor's registers. */
4592 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4594 /* Allocate and return an RTX element to hold the return type of a
4595 call, this routine must not fail and will only be called if
4596 is_return_candidate returned true with the same parameters. */
4597 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4599 /* Finish processing this argument and prepare to start processing
4601 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4602 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4610 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4615 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4616 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4623 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4625 /* We aren't passed a decl, so we can't check that a call is local.
4626 However, it isn't clear that that would be a win anyway, since it
4627 might limit some tail-calling opportunities. */
4628 enum arm_pcs pcs_variant;
4632 const_tree fndecl = NULL_TREE;
4634 if (TREE_CODE (fntype) == FUNCTION_DECL)
4637 fntype = TREE_TYPE (fntype);
4640 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4643 pcs_variant = arm_pcs_default;
4645 if (pcs_variant != ARM_PCS_AAPCS)
4649 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4650 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4659 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4662 /* We aren't passed a decl, so we can't check that a call is local.
4663 However, it isn't clear that that would be a win anyway, since it
4664 might limit some tail-calling opportunities. */
4665 enum arm_pcs pcs_variant;
4666 int unsignedp ATTRIBUTE_UNUSED;
4670 const_tree fndecl = NULL_TREE;
4672 if (TREE_CODE (fntype) == FUNCTION_DECL)
4675 fntype = TREE_TYPE (fntype);
4678 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4681 pcs_variant = arm_pcs_default;
4683 /* Promote integer types. */
4684 if (type && INTEGRAL_TYPE_P (type))
4685 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4687 if (pcs_variant != ARM_PCS_AAPCS)
4691 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4692 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4694 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4698 /* Promotes small structs returned in a register to full-word size
4699 for big-endian AAPCS. */
4700 if (type && arm_return_in_msb (type))
4702 HOST_WIDE_INT size = int_size_in_bytes (type);
4703 if (size % UNITS_PER_WORD != 0)
4705 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4706 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4710 return gen_rtx_REG (mode, R0_REGNUM);
4714 aapcs_libcall_value (enum machine_mode mode)
4716 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4717 && GET_MODE_SIZE (mode) <= 4)
4720 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4723 /* Lay out a function argument using the AAPCS rules. The rule
4724 numbers referred to here are those in the AAPCS. */
4726 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4727 const_tree type, bool named)
4732 /* We only need to do this once per argument. */
4733 if (pcum->aapcs_arg_processed)
4736 pcum->aapcs_arg_processed = true;
4738 /* Special case: if named is false then we are handling an incoming
4739 anonymous argument which is on the stack. */
4743 /* Is this a potential co-processor register candidate? */
4744 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4746 int slot = aapcs_select_call_coproc (pcum, mode, type);
4747 pcum->aapcs_cprc_slot = slot;
4749 /* We don't have to apply any of the rules from part B of the
4750 preparation phase, these are handled elsewhere in the
4755 /* A Co-processor register candidate goes either in its own
4756 class of registers or on the stack. */
4757 if (!pcum->aapcs_cprc_failed[slot])
4759 /* C1.cp - Try to allocate the argument to co-processor
4761 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4764 /* C2.cp - Put the argument on the stack and note that we
4765 can't assign any more candidates in this slot. We also
4766 need to note that we have allocated stack space, so that
4767 we won't later try to split a non-cprc candidate between
4768 core registers and the stack. */
4769 pcum->aapcs_cprc_failed[slot] = true;
4770 pcum->can_split = false;
4773 /* We didn't get a register, so this argument goes on the
4775 gcc_assert (pcum->can_split == false);
4780 /* C3 - For double-word aligned arguments, round the NCRN up to the
4781 next even number. */
4782 ncrn = pcum->aapcs_ncrn;
4783 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4786 nregs = ARM_NUM_REGS2(mode, type);
4788 /* Sigh, this test should really assert that nregs > 0, but a GCC
4789 extension allows empty structs and then gives them empty size; it
4790 then allows such a structure to be passed by value. For some of
4791 the code below we have to pretend that such an argument has
4792 non-zero size so that we 'locate' it correctly either in
4793 registers or on the stack. */
4794 gcc_assert (nregs >= 0);
4796 nregs2 = nregs ? nregs : 1;
4798 /* C4 - Argument fits entirely in core registers. */
4799 if (ncrn + nregs2 <= NUM_ARG_REGS)
4801 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4802 pcum->aapcs_next_ncrn = ncrn + nregs;
4806 /* C5 - Some core registers left and there are no arguments already
4807 on the stack: split this argument between the remaining core
4808 registers and the stack. */
4809 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4811 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4812 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4813 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4817 /* C6 - NCRN is set to 4. */
4818 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4820 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4824 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4825 for a call to a function whose data type is FNTYPE.
4826 For a library call, FNTYPE is NULL. */
4828 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4830 tree fndecl ATTRIBUTE_UNUSED)
4832 /* Long call handling. */
4834 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4836 pcum->pcs_variant = arm_pcs_default;
4838 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4840 if (arm_libcall_uses_aapcs_base (libname))
4841 pcum->pcs_variant = ARM_PCS_AAPCS;
4843 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4844 pcum->aapcs_reg = NULL_RTX;
4845 pcum->aapcs_partial = 0;
4846 pcum->aapcs_arg_processed = false;
4847 pcum->aapcs_cprc_slot = -1;
4848 pcum->can_split = true;
4850 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4854 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4856 pcum->aapcs_cprc_failed[i] = false;
4857 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4865 /* On the ARM, the offset starts at 0. */
4867 pcum->iwmmxt_nregs = 0;
4868 pcum->can_split = true;
4870 /* Varargs vectors are treated the same as long long.
4871 named_count avoids having to change the way arm handles 'named' */
4872 pcum->named_count = 0;
4875 if (TARGET_REALLY_IWMMXT && fntype)
4879 for (fn_arg = TYPE_ARG_TYPES (fntype);
4881 fn_arg = TREE_CHAIN (fn_arg))
4882 pcum->named_count += 1;
4884 if (! pcum->named_count)
4885 pcum->named_count = INT_MAX;
4890 /* Return true if mode/type need doubleword alignment. */
4892 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4894 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4895 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4899 /* Determine where to put an argument to a function.
4900 Value is zero to push the argument on the stack,
4901 or a hard register in which to store the argument.
4903 MODE is the argument's machine mode.
4904 TYPE is the data type of the argument (as a tree).
4905 This is null for libcalls where that information may
4907 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4908 the preceding args and about the function being called.
4909 NAMED is nonzero if this argument is a named parameter
4910 (otherwise it is an extra parameter matching an ellipsis).
4912 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4913 other arguments are passed on the stack. If (NAMED == 0) (which happens
4914 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4915 defined), say it is passed in the stack (function_prologue will
4916 indeed make it pass in the stack if necessary). */
4919 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4920 const_tree type, bool named)
4922 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4925 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4926 a call insn (op3 of a call_value insn). */
4927 if (mode == VOIDmode)
4930 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4932 aapcs_layout_arg (pcum, mode, type, named);
4933 return pcum->aapcs_reg;
4936 /* Varargs vectors are treated the same as long long.
4937 named_count avoids having to change the way arm handles 'named' */
4938 if (TARGET_IWMMXT_ABI
4939 && arm_vector_mode_supported_p (mode)
4940 && pcum->named_count > pcum->nargs + 1)
4942 if (pcum->iwmmxt_nregs <= 9)
4943 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4946 pcum->can_split = false;
4951 /* Put doubleword aligned quantities in even register pairs. */
4953 && ARM_DOUBLEWORD_ALIGN
4954 && arm_needs_doubleword_align (mode, type))
4957 /* Only allow splitting an arg between regs and memory if all preceding
4958 args were allocated to regs. For args passed by reference we only count
4959 the reference pointer. */
4960 if (pcum->can_split)
4963 nregs = ARM_NUM_REGS2 (mode, type);
4965 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4968 return gen_rtx_REG (mode, pcum->nregs);
4972 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4974 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4975 ? DOUBLEWORD_ALIGNMENT
4980 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4981 tree type, bool named)
4983 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4984 int nregs = pcum->nregs;
4986 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4988 aapcs_layout_arg (pcum, mode, type, named);
4989 return pcum->aapcs_partial;
4992 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4995 if (NUM_ARG_REGS > nregs
4996 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4998 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5003 /* Update the data in PCUM to advance over an argument
5004 of mode MODE and data type TYPE.
5005 (TYPE is null for libcalls where that information may not be available.) */
5008 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5009 const_tree type, bool named)
5011 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5013 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5015 aapcs_layout_arg (pcum, mode, type, named);
5017 if (pcum->aapcs_cprc_slot >= 0)
5019 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5021 pcum->aapcs_cprc_slot = -1;
5024 /* Generic stuff. */
5025 pcum->aapcs_arg_processed = false;
5026 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5027 pcum->aapcs_reg = NULL_RTX;
5028 pcum->aapcs_partial = 0;
5033 if (arm_vector_mode_supported_p (mode)
5034 && pcum->named_count > pcum->nargs
5035 && TARGET_IWMMXT_ABI)
5036 pcum->iwmmxt_nregs += 1;
5038 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5042 /* Variable sized types are passed by reference. This is a GCC
5043 extension to the ARM ABI. */
5046 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5047 enum machine_mode mode ATTRIBUTE_UNUSED,
5048 const_tree type, bool named ATTRIBUTE_UNUSED)
5050 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5053 /* Encode the current state of the #pragma [no_]long_calls. */
5056 OFF, /* No #pragma [no_]long_calls is in effect. */
5057 LONG, /* #pragma long_calls is in effect. */
5058 SHORT /* #pragma no_long_calls is in effect. */
5061 static arm_pragma_enum arm_pragma_long_calls = OFF;
5064 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5066 arm_pragma_long_calls = LONG;
5070 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5072 arm_pragma_long_calls = SHORT;
5076 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5078 arm_pragma_long_calls = OFF;
5081 /* Handle an attribute requiring a FUNCTION_DECL;
5082 arguments as in struct attribute_spec.handler. */
5084 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5085 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5087 if (TREE_CODE (*node) != FUNCTION_DECL)
5089 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5091 *no_add_attrs = true;
5097 /* Handle an "interrupt" or "isr" attribute;
5098 arguments as in struct attribute_spec.handler. */
5100 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5105 if (TREE_CODE (*node) != FUNCTION_DECL)
5107 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5109 *no_add_attrs = true;
5111 /* FIXME: the argument if any is checked for type attributes;
5112 should it be checked for decl ones? */
5116 if (TREE_CODE (*node) == FUNCTION_TYPE
5117 || TREE_CODE (*node) == METHOD_TYPE)
5119 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5121 warning (OPT_Wattributes, "%qE attribute ignored",
5123 *no_add_attrs = true;
5126 else if (TREE_CODE (*node) == POINTER_TYPE
5127 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5128 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5129 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5131 *node = build_variant_type_copy (*node);
5132 TREE_TYPE (*node) = build_type_attribute_variant
5134 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5135 *no_add_attrs = true;
5139 /* Possibly pass this attribute on from the type to a decl. */
5140 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5141 | (int) ATTR_FLAG_FUNCTION_NEXT
5142 | (int) ATTR_FLAG_ARRAY_NEXT))
5144 *no_add_attrs = true;
5145 return tree_cons (name, args, NULL_TREE);
5149 warning (OPT_Wattributes, "%qE attribute ignored",
5158 /* Handle a "pcs" attribute; arguments as in struct
5159 attribute_spec.handler. */
5161 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5162 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5164 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5166 warning (OPT_Wattributes, "%qE attribute ignored", name);
5167 *no_add_attrs = true;
5172 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5173 /* Handle the "notshared" attribute. This attribute is another way of
5174 requesting hidden visibility. ARM's compiler supports
5175 "__declspec(notshared)"; we support the same thing via an
5179 arm_handle_notshared_attribute (tree *node,
5180 tree name ATTRIBUTE_UNUSED,
5181 tree args ATTRIBUTE_UNUSED,
5182 int flags ATTRIBUTE_UNUSED,
5185 tree decl = TYPE_NAME (*node);
5189 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5190 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5191 *no_add_attrs = false;
5197 /* Return 0 if the attributes for two types are incompatible, 1 if they
5198 are compatible, and 2 if they are nearly compatible (which causes a
5199 warning to be generated). */
5201 arm_comp_type_attributes (const_tree type1, const_tree type2)
5205 /* Check for mismatch of non-default calling convention. */
5206 if (TREE_CODE (type1) != FUNCTION_TYPE)
5209 /* Check for mismatched call attributes. */
5210 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5211 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5212 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5213 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5215 /* Only bother to check if an attribute is defined. */
5216 if (l1 | l2 | s1 | s2)
5218 /* If one type has an attribute, the other must have the same attribute. */
5219 if ((l1 != l2) || (s1 != s2))
5222 /* Disallow mixed attributes. */
5223 if ((l1 & s2) || (l2 & s1))
5227 /* Check for mismatched ISR attribute. */
5228 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5230 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5231 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5233 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5240 /* Assigns default attributes to newly defined type. This is used to
5241 set short_call/long_call attributes for function types of
5242 functions defined inside corresponding #pragma scopes. */
5244 arm_set_default_type_attributes (tree type)
5246 /* Add __attribute__ ((long_call)) to all functions, when
5247 inside #pragma long_calls or __attribute__ ((short_call)),
5248 when inside #pragma no_long_calls. */
5249 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5251 tree type_attr_list, attr_name;
5252 type_attr_list = TYPE_ATTRIBUTES (type);
5254 if (arm_pragma_long_calls == LONG)
5255 attr_name = get_identifier ("long_call");
5256 else if (arm_pragma_long_calls == SHORT)
5257 attr_name = get_identifier ("short_call");
5261 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5262 TYPE_ATTRIBUTES (type) = type_attr_list;
5266 /* Return true if DECL is known to be linked into section SECTION. */
5269 arm_function_in_section_p (tree decl, section *section)
5271 /* We can only be certain about functions defined in the same
5272 compilation unit. */
5273 if (!TREE_STATIC (decl))
5276 /* Make sure that SYMBOL always binds to the definition in this
5277 compilation unit. */
5278 if (!targetm.binds_local_p (decl))
5281 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5282 if (!DECL_SECTION_NAME (decl))
5284 /* Make sure that we will not create a unique section for DECL. */
5285 if (flag_function_sections || DECL_ONE_ONLY (decl))
5289 return function_section (decl) == section;
5292 /* Return nonzero if a 32-bit "long_call" should be generated for
5293 a call from the current function to DECL. We generate a long_call
5296 a. has an __attribute__((long call))
5297 or b. is within the scope of a #pragma long_calls
5298 or c. the -mlong-calls command line switch has been specified
5300 However we do not generate a long call if the function:
5302 d. has an __attribute__ ((short_call))
5303 or e. is inside the scope of a #pragma no_long_calls
5304 or f. is defined in the same section as the current function. */
5307 arm_is_long_call_p (tree decl)
5312 return TARGET_LONG_CALLS;
5314 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5315 if (lookup_attribute ("short_call", attrs))
5318 /* For "f", be conservative, and only cater for cases in which the
5319 whole of the current function is placed in the same section. */
5320 if (!flag_reorder_blocks_and_partition
5321 && TREE_CODE (decl) == FUNCTION_DECL
5322 && arm_function_in_section_p (decl, current_function_section ()))
5325 if (lookup_attribute ("long_call", attrs))
5328 return TARGET_LONG_CALLS;
5331 /* Return nonzero if it is ok to make a tail-call to DECL. */
5333 arm_function_ok_for_sibcall (tree decl, tree exp)
5335 unsigned long func_type;
5337 if (cfun->machine->sibcall_blocked)
5340 /* Never tailcall something for which we have no decl, or if we
5341 are generating code for Thumb-1. */
5342 if (decl == NULL || TARGET_THUMB1)
5345 /* The PIC register is live on entry to VxWorks PLT entries, so we
5346 must make the call before restoring the PIC register. */
5347 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5350 /* Cannot tail-call to long calls, since these are out of range of
5351 a branch instruction. */
5352 if (arm_is_long_call_p (decl))
5355 /* If we are interworking and the function is not declared static
5356 then we can't tail-call it unless we know that it exists in this
5357 compilation unit (since it might be a Thumb routine). */
5358 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5361 func_type = arm_current_func_type ();
5362 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5363 if (IS_INTERRUPT (func_type))
5366 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5368 /* Check that the return value locations are the same. For
5369 example that we aren't returning a value from the sibling in
5370 a VFP register but then need to transfer it to a core
5374 a = arm_function_value (TREE_TYPE (exp), decl, false);
5375 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5377 if (!rtx_equal_p (a, b))
5381 /* Never tailcall if function may be called with a misaligned SP. */
5382 if (IS_STACKALIGN (func_type))
5385 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5386 references should become a NOP. Don't convert such calls into
5388 if (TARGET_AAPCS_BASED
5389 && arm_abi == ARM_ABI_AAPCS
5390 && DECL_WEAK (decl))
5393 /* Everything else is ok. */
5398 /* Addressing mode support functions. */
5400 /* Return nonzero if X is a legitimate immediate operand when compiling
5401 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5403 legitimate_pic_operand_p (rtx x)
5405 if (GET_CODE (x) == SYMBOL_REF
5406 || (GET_CODE (x) == CONST
5407 && GET_CODE (XEXP (x, 0)) == PLUS
5408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5414 /* Record that the current function needs a PIC register. Initialize
5415 cfun->machine->pic_reg if we have not already done so. */
5418 require_pic_register (void)
5420 /* A lot of the logic here is made obscure by the fact that this
5421 routine gets called as part of the rtx cost estimation process.
5422 We don't want those calls to affect any assumptions about the real
5423 function; and further, we can't call entry_of_function() until we
5424 start the real expansion process. */
5425 if (!crtl->uses_pic_offset_table)
5427 gcc_assert (can_create_pseudo_p ());
5428 if (arm_pic_register != INVALID_REGNUM)
5430 if (!cfun->machine->pic_reg)
5431 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5433 /* Play games to avoid marking the function as needing pic
5434 if we are being called as part of the cost-estimation
5436 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5437 crtl->uses_pic_offset_table = 1;
5443 if (!cfun->machine->pic_reg)
5444 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5446 /* Play games to avoid marking the function as needing pic
5447 if we are being called as part of the cost-estimation
5449 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5451 crtl->uses_pic_offset_table = 1;
5454 arm_load_pic_register (0UL);
5459 for (insn = seq; insn; insn = NEXT_INSN (insn))
5461 INSN_LOCATION (insn) = prologue_location;
5463 /* We can be called during expansion of PHI nodes, where
5464 we can't yet emit instructions directly in the final
5465 insn stream. Queue the insns on the entry edge, they will
5466 be committed after everything else is expanded. */
5467 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5474 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5476 if (GET_CODE (orig) == SYMBOL_REF
5477 || GET_CODE (orig) == LABEL_REF)
5483 gcc_assert (can_create_pseudo_p ());
5484 reg = gen_reg_rtx (Pmode);
5487 /* VxWorks does not impose a fixed gap between segments; the run-time
5488 gap can be different from the object-file gap. We therefore can't
5489 use GOTOFF unless we are absolutely sure that the symbol is in the
5490 same segment as the GOT. Unfortunately, the flexibility of linker
5491 scripts means that we can't be sure of that in general, so assume
5492 that GOTOFF is never valid on VxWorks. */
5493 if ((GET_CODE (orig) == LABEL_REF
5494 || (GET_CODE (orig) == SYMBOL_REF &&
5495 SYMBOL_REF_LOCAL_P (orig)))
5497 && !TARGET_VXWORKS_RTP)
5498 insn = arm_pic_static_addr (orig, reg);
5504 /* If this function doesn't have a pic register, create one now. */
5505 require_pic_register ();
5507 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5509 /* Make the MEM as close to a constant as possible. */
5510 mem = SET_SRC (pat);
5511 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5512 MEM_READONLY_P (mem) = 1;
5513 MEM_NOTRAP_P (mem) = 1;
5515 insn = emit_insn (pat);
5518 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5520 set_unique_reg_note (insn, REG_EQUAL, orig);
5524 else if (GET_CODE (orig) == CONST)
5528 if (GET_CODE (XEXP (orig, 0)) == PLUS
5529 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5532 /* Handle the case where we have: const (UNSPEC_TLS). */
5533 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5534 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5537 /* Handle the case where we have:
5538 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5540 if (GET_CODE (XEXP (orig, 0)) == PLUS
5541 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5542 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5544 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5550 gcc_assert (can_create_pseudo_p ());
5551 reg = gen_reg_rtx (Pmode);
5554 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5556 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5557 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5558 base == reg ? 0 : reg);
5560 if (CONST_INT_P (offset))
5562 /* The base register doesn't really matter, we only want to
5563 test the index for the appropriate mode. */
5564 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5566 gcc_assert (can_create_pseudo_p ());
5567 offset = force_reg (Pmode, offset);
5570 if (CONST_INT_P (offset))
5571 return plus_constant (Pmode, base, INTVAL (offset));
5574 if (GET_MODE_SIZE (mode) > 4
5575 && (GET_MODE_CLASS (mode) == MODE_INT
5576 || TARGET_SOFT_FLOAT))
5578 emit_insn (gen_addsi3 (reg, base, offset));
5582 return gen_rtx_PLUS (Pmode, base, offset);
5589 /* Find a spare register to use during the prolog of a function. */
5592 thumb_find_work_register (unsigned long pushed_regs_mask)
5596 /* Check the argument registers first as these are call-used. The
5597 register allocation order means that sometimes r3 might be used
5598 but earlier argument registers might not, so check them all. */
5599 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5600 if (!df_regs_ever_live_p (reg))
5603 /* Before going on to check the call-saved registers we can try a couple
5604 more ways of deducing that r3 is available. The first is when we are
5605 pushing anonymous arguments onto the stack and we have less than 4
5606 registers worth of fixed arguments(*). In this case r3 will be part of
5607 the variable argument list and so we can be sure that it will be
5608 pushed right at the start of the function. Hence it will be available
5609 for the rest of the prologue.
5610 (*): ie crtl->args.pretend_args_size is greater than 0. */
5611 if (cfun->machine->uses_anonymous_args
5612 && crtl->args.pretend_args_size > 0)
5613 return LAST_ARG_REGNUM;
5615 /* The other case is when we have fixed arguments but less than 4 registers
5616 worth. In this case r3 might be used in the body of the function, but
5617 it is not being used to convey an argument into the function. In theory
5618 we could just check crtl->args.size to see how many bytes are
5619 being passed in argument registers, but it seems that it is unreliable.
5620 Sometimes it will have the value 0 when in fact arguments are being
5621 passed. (See testcase execute/20021111-1.c for an example). So we also
5622 check the args_info.nregs field as well. The problem with this field is
5623 that it makes no allowances for arguments that are passed to the
5624 function but which are not used. Hence we could miss an opportunity
5625 when a function has an unused argument in r3. But it is better to be
5626 safe than to be sorry. */
5627 if (! cfun->machine->uses_anonymous_args
5628 && crtl->args.size >= 0
5629 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5630 && (TARGET_AAPCS_BASED
5631 ? crtl->args.info.aapcs_ncrn < 4
5632 : crtl->args.info.nregs < 4))
5633 return LAST_ARG_REGNUM;
5635 /* Otherwise look for a call-saved register that is going to be pushed. */
5636 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5637 if (pushed_regs_mask & (1 << reg))
5642 /* Thumb-2 can use high regs. */
5643 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5644 if (pushed_regs_mask & (1 << reg))
5647 /* Something went wrong - thumb_compute_save_reg_mask()
5648 should have arranged for a suitable register to be pushed. */
5652 static GTY(()) int pic_labelno;
5654 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5658 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5660 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5662 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5665 gcc_assert (flag_pic);
5667 pic_reg = cfun->machine->pic_reg;
5668 if (TARGET_VXWORKS_RTP)
5670 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5671 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5672 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5674 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5676 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5677 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5681 /* We use an UNSPEC rather than a LABEL_REF because this label
5682 never appears in the code stream. */
5684 labelno = GEN_INT (pic_labelno++);
5685 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5686 l1 = gen_rtx_CONST (VOIDmode, l1);
5688 /* On the ARM the PC register contains 'dot + 8' at the time of the
5689 addition, on the Thumb it is 'dot + 4'. */
5690 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5691 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5693 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5697 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5699 else /* TARGET_THUMB1 */
5701 if (arm_pic_register != INVALID_REGNUM
5702 && REGNO (pic_reg) > LAST_LO_REGNUM)
5704 /* We will have pushed the pic register, so we should always be
5705 able to find a work register. */
5706 pic_tmp = gen_rtx_REG (SImode,
5707 thumb_find_work_register (saved_regs));
5708 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5709 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5710 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5713 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5717 /* Need to emit this whether or not we obey regdecls,
5718 since setjmp/longjmp can cause life info to screw up. */
5722 /* Generate code to load the address of a static var when flag_pic is set. */
5724 arm_pic_static_addr (rtx orig, rtx reg)
5726 rtx l1, labelno, offset_rtx, insn;
5728 gcc_assert (flag_pic);
5730 /* We use an UNSPEC rather than a LABEL_REF because this label
5731 never appears in the code stream. */
5732 labelno = GEN_INT (pic_labelno++);
5733 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5734 l1 = gen_rtx_CONST (VOIDmode, l1);
5736 /* On the ARM the PC register contains 'dot + 8' at the time of the
5737 addition, on the Thumb it is 'dot + 4'. */
5738 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5739 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5740 UNSPEC_SYMBOL_OFFSET);
5741 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5743 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5747 /* Return nonzero if X is valid as an ARM state addressing register. */
5749 arm_address_register_rtx_p (rtx x, int strict_p)
5759 return ARM_REGNO_OK_FOR_BASE_P (regno);
5761 return (regno <= LAST_ARM_REGNUM
5762 || regno >= FIRST_PSEUDO_REGISTER
5763 || regno == FRAME_POINTER_REGNUM
5764 || regno == ARG_POINTER_REGNUM);
5767 /* Return TRUE if this rtx is the difference of a symbol and a label,
5768 and will reduce to a PC-relative relocation in the object file.
5769 Expressions like this can be left alone when generating PIC, rather
5770 than forced through the GOT. */
5772 pcrel_constant_p (rtx x)
5774 if (GET_CODE (x) == MINUS)
5775 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5780 /* Return true if X will surely end up in an index register after next
5783 will_be_in_index_register (const_rtx x)
5785 /* arm.md: calculate_pic_address will split this into a register. */
5786 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5789 /* Return nonzero if X is a valid ARM state address operand. */
5791 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5795 enum rtx_code code = GET_CODE (x);
5797 if (arm_address_register_rtx_p (x, strict_p))
5800 use_ldrd = (TARGET_LDRD
5802 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5804 if (code == POST_INC || code == PRE_DEC
5805 || ((code == PRE_INC || code == POST_DEC)
5806 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5807 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5809 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5810 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5811 && GET_CODE (XEXP (x, 1)) == PLUS
5812 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5814 rtx addend = XEXP (XEXP (x, 1), 1);
5816 /* Don't allow ldrd post increment by register because it's hard
5817 to fixup invalid register choices. */
5819 && GET_CODE (x) == POST_MODIFY
5823 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5824 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5827 /* After reload constants split into minipools will have addresses
5828 from a LABEL_REF. */
5829 else if (reload_completed
5830 && (code == LABEL_REF
5832 && GET_CODE (XEXP (x, 0)) == PLUS
5833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5834 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5837 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5840 else if (code == PLUS)
5842 rtx xop0 = XEXP (x, 0);
5843 rtx xop1 = XEXP (x, 1);
5845 return ((arm_address_register_rtx_p (xop0, strict_p)
5846 && ((CONST_INT_P (xop1)
5847 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5848 || (!strict_p && will_be_in_index_register (xop1))))
5849 || (arm_address_register_rtx_p (xop1, strict_p)
5850 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5854 /* Reload currently can't handle MINUS, so disable this for now */
5855 else if (GET_CODE (x) == MINUS)
5857 rtx xop0 = XEXP (x, 0);
5858 rtx xop1 = XEXP (x, 1);
5860 return (arm_address_register_rtx_p (xop0, strict_p)
5861 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5865 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5866 && code == SYMBOL_REF
5867 && CONSTANT_POOL_ADDRESS_P (x)
5869 && symbol_mentioned_p (get_pool_constant (x))
5870 && ! pcrel_constant_p (get_pool_constant (x))))
5876 /* Return nonzero if X is a valid Thumb-2 address operand. */
5878 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5881 enum rtx_code code = GET_CODE (x);
5883 if (arm_address_register_rtx_p (x, strict_p))
5886 use_ldrd = (TARGET_LDRD
5888 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5890 if (code == POST_INC || code == PRE_DEC
5891 || ((code == PRE_INC || code == POST_DEC)
5892 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5893 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5895 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5896 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5897 && GET_CODE (XEXP (x, 1)) == PLUS
5898 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5900 /* Thumb-2 only has autoincrement by constant. */
5901 rtx addend = XEXP (XEXP (x, 1), 1);
5902 HOST_WIDE_INT offset;
5904 if (!CONST_INT_P (addend))
5907 offset = INTVAL(addend);
5908 if (GET_MODE_SIZE (mode) <= 4)
5909 return (offset > -256 && offset < 256);
5911 return (use_ldrd && offset > -1024 && offset < 1024
5912 && (offset & 3) == 0);
5915 /* After reload constants split into minipools will have addresses
5916 from a LABEL_REF. */
5917 else if (reload_completed
5918 && (code == LABEL_REF
5920 && GET_CODE (XEXP (x, 0)) == PLUS
5921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5922 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5925 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5928 else if (code == PLUS)
5930 rtx xop0 = XEXP (x, 0);
5931 rtx xop1 = XEXP (x, 1);
5933 return ((arm_address_register_rtx_p (xop0, strict_p)
5934 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5935 || (!strict_p && will_be_in_index_register (xop1))))
5936 || (arm_address_register_rtx_p (xop1, strict_p)
5937 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5940 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5941 && code == SYMBOL_REF
5942 && CONSTANT_POOL_ADDRESS_P (x)
5944 && symbol_mentioned_p (get_pool_constant (x))
5945 && ! pcrel_constant_p (get_pool_constant (x))))
5951 /* Return nonzero if INDEX is valid for an address index operand in
5954 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5957 HOST_WIDE_INT range;
5958 enum rtx_code code = GET_CODE (index);
5960 /* Standard coprocessor addressing modes. */
5961 if (TARGET_HARD_FLOAT
5963 && (mode == SFmode || mode == DFmode))
5964 return (code == CONST_INT && INTVAL (index) < 1024
5965 && INTVAL (index) > -1024
5966 && (INTVAL (index) & 3) == 0);
5968 /* For quad modes, we restrict the constant offset to be slightly less
5969 than what the instruction format permits. We do this because for
5970 quad mode moves, we will actually decompose them into two separate
5971 double-mode reads or writes. INDEX must therefore be a valid
5972 (double-mode) offset and so should INDEX+8. */
5973 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5974 return (code == CONST_INT
5975 && INTVAL (index) < 1016
5976 && INTVAL (index) > -1024
5977 && (INTVAL (index) & 3) == 0);
5979 /* We have no such constraint on double mode offsets, so we permit the
5980 full range of the instruction format. */
5981 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5982 return (code == CONST_INT
5983 && INTVAL (index) < 1024
5984 && INTVAL (index) > -1024
5985 && (INTVAL (index) & 3) == 0);
5987 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5988 return (code == CONST_INT
5989 && INTVAL (index) < 1024
5990 && INTVAL (index) > -1024
5991 && (INTVAL (index) & 3) == 0);
5993 if (arm_address_register_rtx_p (index, strict_p)
5994 && (GET_MODE_SIZE (mode) <= 4))
5997 if (mode == DImode || mode == DFmode)
5999 if (code == CONST_INT)
6001 HOST_WIDE_INT val = INTVAL (index);
6004 return val > -256 && val < 256;
6006 return val > -4096 && val < 4092;
6009 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6012 if (GET_MODE_SIZE (mode) <= 4
6016 || (mode == QImode && outer == SIGN_EXTEND))))
6020 rtx xiop0 = XEXP (index, 0);
6021 rtx xiop1 = XEXP (index, 1);
6023 return ((arm_address_register_rtx_p (xiop0, strict_p)
6024 && power_of_two_operand (xiop1, SImode))
6025 || (arm_address_register_rtx_p (xiop1, strict_p)
6026 && power_of_two_operand (xiop0, SImode)));
6028 else if (code == LSHIFTRT || code == ASHIFTRT
6029 || code == ASHIFT || code == ROTATERT)
6031 rtx op = XEXP (index, 1);
6033 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6036 && INTVAL (op) <= 31);
6040 /* For ARM v4 we may be doing a sign-extend operation during the
6046 || (outer == SIGN_EXTEND && mode == QImode))
6052 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6054 return (code == CONST_INT
6055 && INTVAL (index) < range
6056 && INTVAL (index) > -range);
6059 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6060 index operand. i.e. 1, 2, 4 or 8. */
6062 thumb2_index_mul_operand (rtx op)
6066 if (!CONST_INT_P (op))
6070 return (val == 1 || val == 2 || val == 4 || val == 8);
6073 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6075 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6077 enum rtx_code code = GET_CODE (index);
6079 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6080 /* Standard coprocessor addressing modes. */
6081 if (TARGET_HARD_FLOAT
6083 && (mode == SFmode || mode == DFmode))
6084 return (code == CONST_INT && INTVAL (index) < 1024
6085 /* Thumb-2 allows only > -256 index range for it's core register
6086 load/stores. Since we allow SF/DF in core registers, we have
6087 to use the intersection between -256~4096 (core) and -1024~1024
6089 && INTVAL (index) > -256
6090 && (INTVAL (index) & 3) == 0);
6092 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6094 /* For DImode assume values will usually live in core regs
6095 and only allow LDRD addressing modes. */
6096 if (!TARGET_LDRD || mode != DImode)
6097 return (code == CONST_INT
6098 && INTVAL (index) < 1024
6099 && INTVAL (index) > -1024
6100 && (INTVAL (index) & 3) == 0);
6103 /* For quad modes, we restrict the constant offset to be slightly less
6104 than what the instruction format permits. We do this because for
6105 quad mode moves, we will actually decompose them into two separate
6106 double-mode reads or writes. INDEX must therefore be a valid
6107 (double-mode) offset and so should INDEX+8. */
6108 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6109 return (code == CONST_INT
6110 && INTVAL (index) < 1016
6111 && INTVAL (index) > -1024
6112 && (INTVAL (index) & 3) == 0);
6114 /* We have no such constraint on double mode offsets, so we permit the
6115 full range of the instruction format. */
6116 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6117 return (code == CONST_INT
6118 && INTVAL (index) < 1024
6119 && INTVAL (index) > -1024
6120 && (INTVAL (index) & 3) == 0);
6122 if (arm_address_register_rtx_p (index, strict_p)
6123 && (GET_MODE_SIZE (mode) <= 4))
6126 if (mode == DImode || mode == DFmode)
6128 if (code == CONST_INT)
6130 HOST_WIDE_INT val = INTVAL (index);
6131 /* ??? Can we assume ldrd for thumb2? */
6132 /* Thumb-2 ldrd only has reg+const addressing modes. */
6133 /* ldrd supports offsets of +-1020.
6134 However the ldr fallback does not. */
6135 return val > -256 && val < 256 && (val & 3) == 0;
6143 rtx xiop0 = XEXP (index, 0);
6144 rtx xiop1 = XEXP (index, 1);
6146 return ((arm_address_register_rtx_p (xiop0, strict_p)
6147 && thumb2_index_mul_operand (xiop1))
6148 || (arm_address_register_rtx_p (xiop1, strict_p)
6149 && thumb2_index_mul_operand (xiop0)));
6151 else if (code == ASHIFT)
6153 rtx op = XEXP (index, 1);
6155 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6158 && INTVAL (op) <= 3);
6161 return (code == CONST_INT
6162 && INTVAL (index) < 4096
6163 && INTVAL (index) > -256);
6166 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6168 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6178 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6180 return (regno <= LAST_LO_REGNUM
6181 || regno > LAST_VIRTUAL_REGISTER
6182 || regno == FRAME_POINTER_REGNUM
6183 || (GET_MODE_SIZE (mode) >= 4
6184 && (regno == STACK_POINTER_REGNUM
6185 || regno >= FIRST_PSEUDO_REGISTER
6186 || x == hard_frame_pointer_rtx
6187 || x == arg_pointer_rtx)));
6190 /* Return nonzero if x is a legitimate index register. This is the case
6191 for any base register that can access a QImode object. */
6193 thumb1_index_register_rtx_p (rtx x, int strict_p)
6195 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6198 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6200 The AP may be eliminated to either the SP or the FP, so we use the
6201 least common denominator, e.g. SImode, and offsets from 0 to 64.
6203 ??? Verify whether the above is the right approach.
6205 ??? Also, the FP may be eliminated to the SP, so perhaps that
6206 needs special handling also.
6208 ??? Look at how the mips16 port solves this problem. It probably uses
6209 better ways to solve some of these problems.
6211 Although it is not incorrect, we don't accept QImode and HImode
6212 addresses based on the frame pointer or arg pointer until the
6213 reload pass starts. This is so that eliminating such addresses
6214 into stack based ones won't produce impossible code. */
6216 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6218 /* ??? Not clear if this is right. Experiment. */
6219 if (GET_MODE_SIZE (mode) < 4
6220 && !(reload_in_progress || reload_completed)
6221 && (reg_mentioned_p (frame_pointer_rtx, x)
6222 || reg_mentioned_p (arg_pointer_rtx, x)
6223 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6224 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6225 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6226 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6229 /* Accept any base register. SP only in SImode or larger. */
6230 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6233 /* This is PC relative data before arm_reorg runs. */
6234 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6235 && GET_CODE (x) == SYMBOL_REF
6236 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6239 /* This is PC relative data after arm_reorg runs. */
6240 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6242 && (GET_CODE (x) == LABEL_REF
6243 || (GET_CODE (x) == CONST
6244 && GET_CODE (XEXP (x, 0)) == PLUS
6245 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6246 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6249 /* Post-inc indexing only supported for SImode and larger. */
6250 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6251 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6254 else if (GET_CODE (x) == PLUS)
6256 /* REG+REG address can be any two index registers. */
6257 /* We disallow FRAME+REG addressing since we know that FRAME
6258 will be replaced with STACK, and SP relative addressing only
6259 permits SP+OFFSET. */
6260 if (GET_MODE_SIZE (mode) <= 4
6261 && XEXP (x, 0) != frame_pointer_rtx
6262 && XEXP (x, 1) != frame_pointer_rtx
6263 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6264 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6265 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6268 /* REG+const has 5-7 bit offset for non-SP registers. */
6269 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6270 || XEXP (x, 0) == arg_pointer_rtx)
6271 && CONST_INT_P (XEXP (x, 1))
6272 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6275 /* REG+const has 10-bit offset for SP, but only SImode and
6276 larger is supported. */
6277 /* ??? Should probably check for DI/DFmode overflow here
6278 just like GO_IF_LEGITIMATE_OFFSET does. */
6279 else if (REG_P (XEXP (x, 0))
6280 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6281 && GET_MODE_SIZE (mode) >= 4
6282 && CONST_INT_P (XEXP (x, 1))
6283 && INTVAL (XEXP (x, 1)) >= 0
6284 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6285 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6288 else if (REG_P (XEXP (x, 0))
6289 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6290 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6291 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6292 && REGNO (XEXP (x, 0))
6293 <= LAST_VIRTUAL_POINTER_REGISTER))
6294 && GET_MODE_SIZE (mode) >= 4
6295 && CONST_INT_P (XEXP (x, 1))
6296 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6300 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6301 && GET_MODE_SIZE (mode) == 4
6302 && GET_CODE (x) == SYMBOL_REF
6303 && CONSTANT_POOL_ADDRESS_P (x)
6305 && symbol_mentioned_p (get_pool_constant (x))
6306 && ! pcrel_constant_p (get_pool_constant (x))))
6312 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6313 instruction of mode MODE. */
6315 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6317 switch (GET_MODE_SIZE (mode))
6320 return val >= 0 && val < 32;
6323 return val >= 0 && val < 64 && (val & 1) == 0;
6327 && (val + GET_MODE_SIZE (mode)) <= 128
6333 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6336 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6337 else if (TARGET_THUMB2)
6338 return thumb2_legitimate_address_p (mode, x, strict_p);
6339 else /* if (TARGET_THUMB1) */
6340 return thumb1_legitimate_address_p (mode, x, strict_p);
6343 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6345 Given an rtx X being reloaded into a reg required to be
6346 in class CLASS, return the class of reg to actually use.
6347 In general this is just CLASS, but for the Thumb core registers and
6348 immediate constants we prefer a LO_REGS class or a subset. */
6351 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6357 if (rclass == GENERAL_REGS
6358 || rclass == HI_REGS
6359 || rclass == NO_REGS
6360 || rclass == STACK_REG)
6367 /* Build the SYMBOL_REF for __tls_get_addr. */
6369 static GTY(()) rtx tls_get_addr_libfunc;
6372 get_tls_get_addr (void)
6374 if (!tls_get_addr_libfunc)
6375 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6376 return tls_get_addr_libfunc;
6380 arm_load_tp (rtx target)
6383 target = gen_reg_rtx (SImode);
6387 /* Can return in any reg. */
6388 emit_insn (gen_load_tp_hard (target));
6392 /* Always returned in r0. Immediately copy the result into a pseudo,
6393 otherwise other uses of r0 (e.g. setting up function arguments) may
6394 clobber the value. */
6398 emit_insn (gen_load_tp_soft ());
6400 tmp = gen_rtx_REG (SImode, 0);
6401 emit_move_insn (target, tmp);
6407 load_tls_operand (rtx x, rtx reg)
6411 if (reg == NULL_RTX)
6412 reg = gen_reg_rtx (SImode);
6414 tmp = gen_rtx_CONST (SImode, x);
6416 emit_move_insn (reg, tmp);
6422 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6424 rtx insns, label, labelno, sum;
6426 gcc_assert (reloc != TLS_DESCSEQ);
6429 labelno = GEN_INT (pic_labelno++);
6430 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6431 label = gen_rtx_CONST (VOIDmode, label);
6433 sum = gen_rtx_UNSPEC (Pmode,
6434 gen_rtvec (4, x, GEN_INT (reloc), label,
6435 GEN_INT (TARGET_ARM ? 8 : 4)),
6437 reg = load_tls_operand (sum, reg);
6440 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6442 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6444 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6445 LCT_PURE, /* LCT_CONST? */
6446 Pmode, 1, reg, Pmode);
6448 insns = get_insns ();
6455 arm_tls_descseq_addr (rtx x, rtx reg)
6457 rtx labelno = GEN_INT (pic_labelno++);
6458 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6459 rtx sum = gen_rtx_UNSPEC (Pmode,
6460 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6461 gen_rtx_CONST (VOIDmode, label),
6462 GEN_INT (!TARGET_ARM)),
6464 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6466 emit_insn (gen_tlscall (x, labelno));
6468 reg = gen_reg_rtx (SImode);
6470 gcc_assert (REGNO (reg) != 0);
6472 emit_move_insn (reg, reg0);
6478 legitimize_tls_address (rtx x, rtx reg)
6480 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6481 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6485 case TLS_MODEL_GLOBAL_DYNAMIC:
6486 if (TARGET_GNU2_TLS)
6488 reg = arm_tls_descseq_addr (x, reg);
6490 tp = arm_load_tp (NULL_RTX);
6492 dest = gen_rtx_PLUS (Pmode, tp, reg);
6496 /* Original scheme */
6497 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6498 dest = gen_reg_rtx (Pmode);
6499 emit_libcall_block (insns, dest, ret, x);
6503 case TLS_MODEL_LOCAL_DYNAMIC:
6504 if (TARGET_GNU2_TLS)
6506 reg = arm_tls_descseq_addr (x, reg);
6508 tp = arm_load_tp (NULL_RTX);
6510 dest = gen_rtx_PLUS (Pmode, tp, reg);
6514 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6516 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6517 share the LDM result with other LD model accesses. */
6518 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6520 dest = gen_reg_rtx (Pmode);
6521 emit_libcall_block (insns, dest, ret, eqv);
6523 /* Load the addend. */
6524 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6525 GEN_INT (TLS_LDO32)),
6527 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6528 dest = gen_rtx_PLUS (Pmode, dest, addend);
6532 case TLS_MODEL_INITIAL_EXEC:
6533 labelno = GEN_INT (pic_labelno++);
6534 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6535 label = gen_rtx_CONST (VOIDmode, label);
6536 sum = gen_rtx_UNSPEC (Pmode,
6537 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6538 GEN_INT (TARGET_ARM ? 8 : 4)),
6540 reg = load_tls_operand (sum, reg);
6543 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6544 else if (TARGET_THUMB2)
6545 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6548 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6549 emit_move_insn (reg, gen_const_mem (SImode, reg));
6552 tp = arm_load_tp (NULL_RTX);
6554 return gen_rtx_PLUS (Pmode, tp, reg);
6556 case TLS_MODEL_LOCAL_EXEC:
6557 tp = arm_load_tp (NULL_RTX);
6559 reg = gen_rtx_UNSPEC (Pmode,
6560 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6562 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6564 return gen_rtx_PLUS (Pmode, tp, reg);
6571 /* Try machine-dependent ways of modifying an illegitimate address
6572 to be legitimate. If we find one, return the new, valid address. */
6574 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6578 /* TODO: legitimize_address for Thumb2. */
6581 return thumb_legitimize_address (x, orig_x, mode);
6584 if (arm_tls_symbol_p (x))
6585 return legitimize_tls_address (x, NULL_RTX);
6587 if (GET_CODE (x) == PLUS)
6589 rtx xop0 = XEXP (x, 0);
6590 rtx xop1 = XEXP (x, 1);
6592 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6593 xop0 = force_reg (SImode, xop0);
6595 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6596 xop1 = force_reg (SImode, xop1);
6598 if (ARM_BASE_REGISTER_RTX_P (xop0)
6599 && CONST_INT_P (xop1))
6601 HOST_WIDE_INT n, low_n;
6605 /* VFP addressing modes actually allow greater offsets, but for
6606 now we just stick with the lowest common denominator. */
6608 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6620 low_n = ((mode) == TImode ? 0
6621 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6625 base_reg = gen_reg_rtx (SImode);
6626 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6627 emit_move_insn (base_reg, val);
6628 x = plus_constant (Pmode, base_reg, low_n);
6630 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6631 x = gen_rtx_PLUS (SImode, xop0, xop1);
6634 /* XXX We don't allow MINUS any more -- see comment in
6635 arm_legitimate_address_outer_p (). */
6636 else if (GET_CODE (x) == MINUS)
6638 rtx xop0 = XEXP (x, 0);
6639 rtx xop1 = XEXP (x, 1);
6641 if (CONSTANT_P (xop0))
6642 xop0 = force_reg (SImode, xop0);
6644 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6645 xop1 = force_reg (SImode, xop1);
6647 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6648 x = gen_rtx_MINUS (SImode, xop0, xop1);
6651 /* Make sure to take full advantage of the pre-indexed addressing mode
6652 with absolute addresses which often allows for the base register to
6653 be factorized for multiple adjacent memory references, and it might
6654 even allows for the mini pool to be avoided entirely. */
6655 else if (CONST_INT_P (x) && optimize > 0)
6658 HOST_WIDE_INT mask, base, index;
6661 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6662 use a 8-bit index. So let's use a 12-bit index for SImode only and
6663 hope that arm_gen_constant will enable ldrb to use more bits. */
6664 bits = (mode == SImode) ? 12 : 8;
6665 mask = (1 << bits) - 1;
6666 base = INTVAL (x) & ~mask;
6667 index = INTVAL (x) & mask;
6668 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6670 /* It'll most probably be more efficient to generate the base
6671 with more bits set and use a negative index instead. */
6675 base_reg = force_reg (SImode, GEN_INT (base));
6676 x = plus_constant (Pmode, base_reg, index);
6681 /* We need to find and carefully transform any SYMBOL and LABEL
6682 references; so go back to the original address expression. */
6683 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6685 if (new_x != orig_x)
6693 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6694 to be legitimate. If we find one, return the new, valid address. */
6696 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6698 if (arm_tls_symbol_p (x))
6699 return legitimize_tls_address (x, NULL_RTX);
6701 if (GET_CODE (x) == PLUS
6702 && CONST_INT_P (XEXP (x, 1))
6703 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6704 || INTVAL (XEXP (x, 1)) < 0))
6706 rtx xop0 = XEXP (x, 0);
6707 rtx xop1 = XEXP (x, 1);
6708 HOST_WIDE_INT offset = INTVAL (xop1);
6710 /* Try and fold the offset into a biasing of the base register and
6711 then offsetting that. Don't do this when optimizing for space
6712 since it can cause too many CSEs. */
6713 if (optimize_size && offset >= 0
6714 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6716 HOST_WIDE_INT delta;
6719 delta = offset - (256 - GET_MODE_SIZE (mode));
6720 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6721 delta = 31 * GET_MODE_SIZE (mode);
6723 delta = offset & (~31 * GET_MODE_SIZE (mode));
6725 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6727 x = plus_constant (Pmode, xop0, delta);
6729 else if (offset < 0 && offset > -256)
6730 /* Small negative offsets are best done with a subtract before the
6731 dereference, forcing these into a register normally takes two
6733 x = force_operand (x, NULL_RTX);
6736 /* For the remaining cases, force the constant into a register. */
6737 xop1 = force_reg (SImode, xop1);
6738 x = gen_rtx_PLUS (SImode, xop0, xop1);
6741 else if (GET_CODE (x) == PLUS
6742 && s_register_operand (XEXP (x, 1), SImode)
6743 && !s_register_operand (XEXP (x, 0), SImode))
6745 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6747 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6752 /* We need to find and carefully transform any SYMBOL and LABEL
6753 references; so go back to the original address expression. */
6754 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6756 if (new_x != orig_x)
6764 arm_legitimize_reload_address (rtx *p,
6765 enum machine_mode mode,
6766 int opnum, int type,
6767 int ind_levels ATTRIBUTE_UNUSED)
6769 /* We must recognize output that we have already generated ourselves. */
6770 if (GET_CODE (*p) == PLUS
6771 && GET_CODE (XEXP (*p, 0)) == PLUS
6772 && REG_P (XEXP (XEXP (*p, 0), 0))
6773 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6774 && CONST_INT_P (XEXP (*p, 1)))
6776 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6777 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6778 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6782 if (GET_CODE (*p) == PLUS
6783 && REG_P (XEXP (*p, 0))
6784 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6785 /* If the base register is equivalent to a constant, let the generic
6786 code handle it. Otherwise we will run into problems if a future
6787 reload pass decides to rematerialize the constant. */
6788 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6789 && CONST_INT_P (XEXP (*p, 1)))
6791 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6792 HOST_WIDE_INT low, high;
6794 /* Detect coprocessor load/stores. */
6795 bool coproc_p = ((TARGET_HARD_FLOAT
6797 && (mode == SFmode || mode == DFmode))
6798 || (TARGET_REALLY_IWMMXT
6799 && VALID_IWMMXT_REG_MODE (mode))
6801 && (VALID_NEON_DREG_MODE (mode)
6802 || VALID_NEON_QREG_MODE (mode))));
6804 /* For some conditions, bail out when lower two bits are unaligned. */
6805 if ((val & 0x3) != 0
6806 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6808 /* For DI, and DF under soft-float: */
6809 || ((mode == DImode || mode == DFmode)
6810 /* Without ldrd, we use stm/ldm, which does not
6811 fair well with unaligned bits. */
6813 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6814 || TARGET_THUMB2))))
6817 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6818 of which the (reg+high) gets turned into a reload add insn,
6819 we try to decompose the index into high/low values that can often
6820 also lead to better reload CSE.
6822 ldr r0, [r2, #4100] // Offset too large
6823 ldr r1, [r2, #4104] // Offset too large
6825 is best reloaded as:
6831 which post-reload CSE can simplify in most cases to eliminate the
6832 second add instruction:
6837 The idea here is that we want to split out the bits of the constant
6838 as a mask, rather than as subtracting the maximum offset that the
6839 respective type of load/store used can handle.
6841 When encountering negative offsets, we can still utilize it even if
6842 the overall offset is positive; sometimes this may lead to an immediate
6843 that can be constructed with fewer instructions.
6845 ldr r0, [r2, #0x3FFFFC]
6847 This is best reloaded as:
6848 add t1, r2, #0x400000
6851 The trick for spotting this for a load insn with N bits of offset
6852 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6853 negative offset that is going to make bit N and all the bits below
6854 it become zero in the remainder part.
6856 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6857 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6858 used in most cases of ARM load/store instructions. */
6860 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6861 (((VAL) & ((1 << (N)) - 1)) \
6862 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6867 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6869 /* NEON quad-word load/stores are made of two double-word accesses,
6870 so the valid index range is reduced by 8. Treat as 9-bit range if
6872 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6873 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6875 else if (GET_MODE_SIZE (mode) == 8)
6878 low = (TARGET_THUMB2
6879 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6880 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6882 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6883 to access doublewords. The supported load/store offsets are
6884 -8, -4, and 4, which we try to produce here. */
6885 low = ((val & 0xf) ^ 0x8) - 0x8;
6887 else if (GET_MODE_SIZE (mode) < 8)
6889 /* NEON element load/stores do not have an offset. */
6890 if (TARGET_NEON_FP16 && mode == HFmode)
6895 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6896 Try the wider 12-bit range first, and re-try if the result
6898 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6900 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6904 if (mode == HImode || mode == HFmode)
6907 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6910 /* The storehi/movhi_bytes fallbacks can use only
6911 [-4094,+4094] of the full ldrb/strb index range. */
6912 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6913 if (low == 4095 || low == -4095)
6918 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6924 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6925 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6926 - (unsigned HOST_WIDE_INT) 0x80000000);
6927 /* Check for overflow or zero */
6928 if (low == 0 || high == 0 || (high + low != val))
6931 /* Reload the high part into a base reg; leave the low part
6933 *p = gen_rtx_PLUS (GET_MODE (*p),
6934 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6937 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6938 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6939 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6947 thumb_legitimize_reload_address (rtx *x_p,
6948 enum machine_mode mode,
6949 int opnum, int type,
6950 int ind_levels ATTRIBUTE_UNUSED)
6954 if (GET_CODE (x) == PLUS
6955 && GET_MODE_SIZE (mode) < 4
6956 && REG_P (XEXP (x, 0))
6957 && XEXP (x, 0) == stack_pointer_rtx
6958 && CONST_INT_P (XEXP (x, 1))
6959 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6964 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6965 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6969 /* If both registers are hi-regs, then it's better to reload the
6970 entire expression rather than each register individually. That
6971 only requires one reload register rather than two. */
6972 if (GET_CODE (x) == PLUS
6973 && REG_P (XEXP (x, 0))
6974 && REG_P (XEXP (x, 1))
6975 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6976 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6981 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6982 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6989 /* Test for various thread-local symbols. */
6991 /* Return TRUE if X is a thread-local symbol. */
6994 arm_tls_symbol_p (rtx x)
6996 if (! TARGET_HAVE_TLS)
6999 if (GET_CODE (x) != SYMBOL_REF)
7002 return SYMBOL_REF_TLS_MODEL (x) != 0;
7005 /* Helper for arm_tls_referenced_p. */
7008 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7010 if (GET_CODE (*x) == SYMBOL_REF)
7011 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7013 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7014 TLS offsets, not real symbol references. */
7015 if (GET_CODE (*x) == UNSPEC
7016 && XINT (*x, 1) == UNSPEC_TLS)
7022 /* Return TRUE if X contains any TLS symbol references. */
7025 arm_tls_referenced_p (rtx x)
7027 if (! TARGET_HAVE_TLS)
7030 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7033 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7035 On the ARM, allow any integer (invalid ones are removed later by insn
7036 patterns), nice doubles and symbol_refs which refer to the function's
7039 When generating pic allow anything. */
7042 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7044 /* At present, we have no support for Neon structure constants, so forbid
7045 them here. It might be possible to handle simple cases like 0 and -1
7047 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7050 return flag_pic || !label_mentioned_p (x);
7054 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7056 return (CONST_INT_P (x)
7057 || CONST_DOUBLE_P (x)
7058 || CONSTANT_ADDRESS_P (x)
7063 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7065 return (!arm_cannot_force_const_mem (mode, x)
7067 ? arm_legitimate_constant_p_1 (mode, x)
7068 : thumb_legitimate_constant_p (mode, x)));
7071 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7074 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7078 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7080 split_const (x, &base, &offset);
7081 if (GET_CODE (base) == SYMBOL_REF
7082 && !offset_within_block_p (base, INTVAL (offset)))
7085 return arm_tls_referenced_p (x);
7088 #define REG_OR_SUBREG_REG(X) \
7090 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7092 #define REG_OR_SUBREG_RTX(X) \
7093 (REG_P (X) ? (X) : SUBREG_REG (X))
7096 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7098 enum machine_mode mode = GET_MODE (x);
7107 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7114 return COSTS_N_INSNS (1);
7117 if (CONST_INT_P (XEXP (x, 1)))
7120 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7127 return COSTS_N_INSNS (2) + cycles;
7129 return COSTS_N_INSNS (1) + 16;
7132 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7134 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7135 return (COSTS_N_INSNS (words)
7136 + 4 * ((MEM_P (SET_SRC (x)))
7137 + MEM_P (SET_DEST (x))));
7142 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7144 if (thumb_shiftable_const (INTVAL (x)))
7145 return COSTS_N_INSNS (2);
7146 return COSTS_N_INSNS (3);
7148 else if ((outer == PLUS || outer == COMPARE)
7149 && INTVAL (x) < 256 && INTVAL (x) > -256)
7151 else if ((outer == IOR || outer == XOR || outer == AND)
7152 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7153 return COSTS_N_INSNS (1);
7154 else if (outer == AND)
7157 /* This duplicates the tests in the andsi3 expander. */
7158 for (i = 9; i <= 31; i++)
7159 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7160 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7161 return COSTS_N_INSNS (2);
7163 else if (outer == ASHIFT || outer == ASHIFTRT
7164 || outer == LSHIFTRT)
7166 return COSTS_N_INSNS (2);
7172 return COSTS_N_INSNS (3);
7190 /* XXX another guess. */
7191 /* Memory costs quite a lot for the first word, but subsequent words
7192 load at the equivalent of a single insn each. */
7193 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7194 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7199 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7205 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7206 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7212 return total + COSTS_N_INSNS (1);
7214 /* Assume a two-shift sequence. Increase the cost slightly so
7215 we prefer actual shifts over an extend operation. */
7216 return total + 1 + COSTS_N_INSNS (2);
7224 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7226 enum machine_mode mode = GET_MODE (x);
7227 enum rtx_code subcode;
7229 enum rtx_code code = GET_CODE (x);
7235 /* Memory costs quite a lot for the first word, but subsequent words
7236 load at the equivalent of a single insn each. */
7237 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7244 if (TARGET_HARD_FLOAT && mode == SFmode)
7245 *total = COSTS_N_INSNS (2);
7246 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7247 *total = COSTS_N_INSNS (4);
7249 *total = COSTS_N_INSNS (20);
7253 if (REG_P (XEXP (x, 1)))
7254 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7255 else if (!CONST_INT_P (XEXP (x, 1)))
7256 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7262 *total += COSTS_N_INSNS (4);
7267 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7268 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7271 *total += COSTS_N_INSNS (3);
7275 *total += COSTS_N_INSNS (1);
7276 /* Increase the cost of complex shifts because they aren't any faster,
7277 and reduce dual issue opportunities. */
7278 if (arm_tune_cortex_a9
7279 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7287 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7288 if (CONST_INT_P (XEXP (x, 0))
7289 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7291 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7295 if (CONST_INT_P (XEXP (x, 1))
7296 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7298 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7305 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7307 if (TARGET_HARD_FLOAT
7309 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7311 *total = COSTS_N_INSNS (1);
7312 if (CONST_DOUBLE_P (XEXP (x, 0))
7313 && arm_const_double_rtx (XEXP (x, 0)))
7315 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7319 if (CONST_DOUBLE_P (XEXP (x, 1))
7320 && arm_const_double_rtx (XEXP (x, 1)))
7322 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7328 *total = COSTS_N_INSNS (20);
7332 *total = COSTS_N_INSNS (1);
7333 if (CONST_INT_P (XEXP (x, 0))
7334 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7336 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7340 subcode = GET_CODE (XEXP (x, 1));
7341 if (subcode == ASHIFT || subcode == ASHIFTRT
7342 || subcode == LSHIFTRT
7343 || subcode == ROTATE || subcode == ROTATERT)
7345 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7346 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7350 /* A shift as a part of RSB costs no more than RSB itself. */
7351 if (GET_CODE (XEXP (x, 0)) == MULT
7352 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7354 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7355 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7360 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7362 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7363 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7367 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7368 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7370 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7371 if (REG_P (XEXP (XEXP (x, 1), 0))
7372 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7373 *total += COSTS_N_INSNS (1);
7381 if (code == PLUS && arm_arch6 && mode == SImode
7382 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7383 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7385 *total = COSTS_N_INSNS (1);
7386 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7392 /* MLA: All arguments must be registers. We filter out
7393 multiplication by a power of two, so that we fall down into
7395 if (GET_CODE (XEXP (x, 0)) == MULT
7396 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7398 /* The cost comes from the cost of the multiply. */
7402 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7404 if (TARGET_HARD_FLOAT
7406 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7408 *total = COSTS_N_INSNS (1);
7409 if (CONST_DOUBLE_P (XEXP (x, 1))
7410 && arm_const_double_rtx (XEXP (x, 1)))
7412 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7419 *total = COSTS_N_INSNS (20);
7423 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7424 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7426 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7427 if (REG_P (XEXP (XEXP (x, 0), 0))
7428 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7429 *total += COSTS_N_INSNS (1);
7435 case AND: case XOR: case IOR:
7437 /* Normally the frame registers will be spilt into reg+const during
7438 reload, so it is a bad idea to combine them with other instructions,
7439 since then they might not be moved outside of loops. As a compromise
7440 we allow integration with ops that have a constant as their second
7442 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7443 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7444 && !CONST_INT_P (XEXP (x, 1)))
7445 *total = COSTS_N_INSNS (1);
7449 *total += COSTS_N_INSNS (2);
7450 if (CONST_INT_P (XEXP (x, 1))
7451 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7453 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7460 *total += COSTS_N_INSNS (1);
7461 if (CONST_INT_P (XEXP (x, 1))
7462 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7464 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7467 subcode = GET_CODE (XEXP (x, 0));
7468 if (subcode == ASHIFT || subcode == ASHIFTRT
7469 || subcode == LSHIFTRT
7470 || subcode == ROTATE || subcode == ROTATERT)
7472 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7473 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7478 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7480 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7481 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7485 if (subcode == UMIN || subcode == UMAX
7486 || subcode == SMIN || subcode == SMAX)
7488 *total = COSTS_N_INSNS (3);
7495 /* This should have been handled by the CPU specific routines. */
7499 if (arm_arch3m && mode == SImode
7500 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7501 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7502 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7503 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7504 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7505 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7507 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7510 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7514 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7516 if (TARGET_HARD_FLOAT
7518 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7520 *total = COSTS_N_INSNS (1);
7523 *total = COSTS_N_INSNS (2);
7529 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7530 if (mode == SImode && code == NOT)
7532 subcode = GET_CODE (XEXP (x, 0));
7533 if (subcode == ASHIFT || subcode == ASHIFTRT
7534 || subcode == LSHIFTRT
7535 || subcode == ROTATE || subcode == ROTATERT
7537 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7539 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7540 /* Register shifts cost an extra cycle. */
7541 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7542 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7551 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7553 *total = COSTS_N_INSNS (4);
7557 operand = XEXP (x, 0);
7559 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7560 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7561 && REG_P (XEXP (operand, 0))
7562 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7563 *total += COSTS_N_INSNS (1);
7564 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7565 + rtx_cost (XEXP (x, 2), code, 2, speed));
7569 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7571 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7577 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7578 && mode == SImode && XEXP (x, 1) == const0_rtx)
7580 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7586 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7587 && mode == SImode && XEXP (x, 1) == const0_rtx)
7589 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7609 /* SCC insns. In the case where the comparison has already been
7610 performed, then they cost 2 instructions. Otherwise they need
7611 an additional comparison before them. */
7612 *total = COSTS_N_INSNS (2);
7613 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7620 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7626 *total += COSTS_N_INSNS (1);
7627 if (CONST_INT_P (XEXP (x, 1))
7628 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7630 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7634 subcode = GET_CODE (XEXP (x, 0));
7635 if (subcode == ASHIFT || subcode == ASHIFTRT
7636 || subcode == LSHIFTRT
7637 || subcode == ROTATE || subcode == ROTATERT)
7639 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7640 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7645 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7647 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7648 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7658 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7659 if (!CONST_INT_P (XEXP (x, 1))
7660 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7661 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7665 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7667 if (TARGET_HARD_FLOAT
7669 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7671 *total = COSTS_N_INSNS (1);
7674 *total = COSTS_N_INSNS (20);
7677 *total = COSTS_N_INSNS (1);
7679 *total += COSTS_N_INSNS (3);
7685 if (GET_MODE_CLASS (mode) == MODE_INT)
7687 rtx op = XEXP (x, 0);
7688 enum machine_mode opmode = GET_MODE (op);
7691 *total += COSTS_N_INSNS (1);
7693 if (opmode != SImode)
7697 /* If !arm_arch4, we use one of the extendhisi2_mem
7698 or movhi_bytes patterns for HImode. For a QImode
7699 sign extension, we first zero-extend from memory
7700 and then perform a shift sequence. */
7701 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7702 *total += COSTS_N_INSNS (2);
7705 *total += COSTS_N_INSNS (1);
7707 /* We don't have the necessary insn, so we need to perform some
7709 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7710 /* An and with constant 255. */
7711 *total += COSTS_N_INSNS (1);
7713 /* A shift sequence. Increase costs slightly to avoid
7714 combining two shifts into an extend operation. */
7715 *total += COSTS_N_INSNS (2) + 1;
7721 switch (GET_MODE (XEXP (x, 0)))
7728 *total = COSTS_N_INSNS (1);
7738 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7742 if (const_ok_for_arm (INTVAL (x))
7743 || const_ok_for_arm (~INTVAL (x)))
7744 *total = COSTS_N_INSNS (1);
7746 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7747 INTVAL (x), NULL_RTX,
7754 *total = COSTS_N_INSNS (3);
7758 *total = COSTS_N_INSNS (1);
7762 *total = COSTS_N_INSNS (1);
7763 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7767 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7768 && (mode == SFmode || !TARGET_VFP_SINGLE))
7769 *total = COSTS_N_INSNS (1);
7771 *total = COSTS_N_INSNS (4);
7775 /* The vec_extract patterns accept memory operands that require an
7776 address reload. Account for the cost of that reload to give the
7777 auto-inc-dec pass an incentive to try to replace them. */
7778 if (TARGET_NEON && MEM_P (SET_DEST (x))
7779 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7781 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7782 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7783 *total += COSTS_N_INSNS (1);
7786 /* Likewise for the vec_set patterns. */
7787 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7788 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7789 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7791 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7792 *total = rtx_cost (mem, code, 0, speed);
7793 if (!neon_vector_mem_operand (mem, 2))
7794 *total += COSTS_N_INSNS (1);
7800 /* We cost this as high as our memory costs to allow this to
7801 be hoisted from loops. */
7802 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7804 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7810 && TARGET_HARD_FLOAT
7812 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7813 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7814 *total = COSTS_N_INSNS (1);
7816 *total = COSTS_N_INSNS (4);
7820 *total = COSTS_N_INSNS (4);
7825 /* Estimates the size cost of thumb1 instructions.
7826 For now most of the code is copied from thumb1_rtx_costs. We need more
7827 fine grain tuning when we have more related test cases. */
7829 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7831 enum machine_mode mode = GET_MODE (x);
7840 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7844 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
7845 defined by RTL expansion, especially for the expansion of
7847 if ((GET_CODE (XEXP (x, 0)) == MULT
7848 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
7849 || (GET_CODE (XEXP (x, 1)) == MULT
7850 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
7851 return COSTS_N_INSNS (2);
7852 /* On purpose fall through for normal RTX. */
7856 return COSTS_N_INSNS (1);
7859 if (CONST_INT_P (XEXP (x, 1)))
7861 /* Thumb1 mul instruction can't operate on const. We must Load it
7862 into a register first. */
7863 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7864 return COSTS_N_INSNS (1) + const_size;
7866 return COSTS_N_INSNS (1);
7869 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7871 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7872 return (COSTS_N_INSNS (words)
7873 + 4 * ((MEM_P (SET_SRC (x)))
7874 + MEM_P (SET_DEST (x))));
7879 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7880 return COSTS_N_INSNS (1);
7881 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7882 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7883 return COSTS_N_INSNS (2);
7884 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7885 if (thumb_shiftable_const (INTVAL (x)))
7886 return COSTS_N_INSNS (2);
7887 return COSTS_N_INSNS (3);
7889 else if ((outer == PLUS || outer == COMPARE)
7890 && INTVAL (x) < 256 && INTVAL (x) > -256)
7892 else if ((outer == IOR || outer == XOR || outer == AND)
7893 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7894 return COSTS_N_INSNS (1);
7895 else if (outer == AND)
7898 /* This duplicates the tests in the andsi3 expander. */
7899 for (i = 9; i <= 31; i++)
7900 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7901 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7902 return COSTS_N_INSNS (2);
7904 else if (outer == ASHIFT || outer == ASHIFTRT
7905 || outer == LSHIFTRT)
7907 return COSTS_N_INSNS (2);
7913 return COSTS_N_INSNS (3);
7931 /* XXX another guess. */
7932 /* Memory costs quite a lot for the first word, but subsequent words
7933 load at the equivalent of a single insn each. */
7934 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7935 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7940 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7945 /* XXX still guessing. */
7946 switch (GET_MODE (XEXP (x, 0)))
7949 return (1 + (mode == DImode ? 4 : 0)
7950 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7953 return (4 + (mode == DImode ? 4 : 0)
7954 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7957 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7968 /* RTX costs when optimizing for size. */
7970 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7973 enum machine_mode mode = GET_MODE (x);
7976 *total = thumb1_size_rtx_costs (x, code, outer_code);
7980 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7984 /* A memory access costs 1 insn if the mode is small, or the address is
7985 a single register, otherwise it costs one insn per word. */
7986 if (REG_P (XEXP (x, 0)))
7987 *total = COSTS_N_INSNS (1);
7989 && GET_CODE (XEXP (x, 0)) == PLUS
7990 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7991 /* This will be split into two instructions.
7992 See arm.md:calculate_pic_address. */
7993 *total = COSTS_N_INSNS (2);
7995 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8002 /* Needs a libcall, so it costs about this. */
8003 *total = COSTS_N_INSNS (2);
8007 if (mode == SImode && REG_P (XEXP (x, 1)))
8009 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8017 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8019 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8022 else if (mode == SImode)
8024 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8025 /* Slightly disparage register shifts, but not by much. */
8026 if (!CONST_INT_P (XEXP (x, 1)))
8027 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8031 /* Needs a libcall. */
8032 *total = COSTS_N_INSNS (2);
8036 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8037 && (mode == SFmode || !TARGET_VFP_SINGLE))
8039 *total = COSTS_N_INSNS (1);
8045 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8046 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8048 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8049 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8050 || subcode1 == ROTATE || subcode1 == ROTATERT
8051 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8052 || subcode1 == ASHIFTRT)
8054 /* It's just the cost of the two operands. */
8059 *total = COSTS_N_INSNS (1);
8063 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8067 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8068 && (mode == SFmode || !TARGET_VFP_SINGLE))
8070 *total = COSTS_N_INSNS (1);
8074 /* A shift as a part of ADD costs nothing. */
8075 if (GET_CODE (XEXP (x, 0)) == MULT
8076 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8078 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8079 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8080 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8085 case AND: case XOR: case IOR:
8088 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8090 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8091 || subcode == LSHIFTRT || subcode == ASHIFTRT
8092 || (code == AND && subcode == NOT))
8094 /* It's just the cost of the two operands. */
8100 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8104 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8108 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8109 && (mode == SFmode || !TARGET_VFP_SINGLE))
8111 *total = COSTS_N_INSNS (1);
8117 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8126 if (cc_register (XEXP (x, 0), VOIDmode))
8129 *total = COSTS_N_INSNS (1);
8133 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8134 && (mode == SFmode || !TARGET_VFP_SINGLE))
8135 *total = COSTS_N_INSNS (1);
8137 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8142 return arm_rtx_costs_1 (x, outer_code, total, 0);
8145 if (const_ok_for_arm (INTVAL (x)))
8146 /* A multiplication by a constant requires another instruction
8147 to load the constant to a register. */
8148 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8150 else if (const_ok_for_arm (~INTVAL (x)))
8151 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8152 else if (const_ok_for_arm (-INTVAL (x)))
8154 if (outer_code == COMPARE || outer_code == PLUS
8155 || outer_code == MINUS)
8158 *total = COSTS_N_INSNS (1);
8161 *total = COSTS_N_INSNS (2);
8167 *total = COSTS_N_INSNS (2);
8171 *total = COSTS_N_INSNS (4);
8176 && TARGET_HARD_FLOAT
8177 && outer_code == SET
8178 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8179 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8180 *total = COSTS_N_INSNS (1);
8182 *total = COSTS_N_INSNS (4);
8187 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8188 cost of these slightly. */
8189 *total = COSTS_N_INSNS (1) + 1;
8196 if (mode != VOIDmode)
8197 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8199 *total = COSTS_N_INSNS (4); /* How knows? */
8204 /* RTX costs when optimizing for size. */
8206 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8207 int *total, bool speed)
8210 return arm_size_rtx_costs (x, (enum rtx_code) code,
8211 (enum rtx_code) outer_code, total);
8213 return current_tune->rtx_costs (x, (enum rtx_code) code,
8214 (enum rtx_code) outer_code,
8218 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8219 supported on any "slowmul" cores, so it can be ignored. */
8222 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8223 int *total, bool speed)
8225 enum machine_mode mode = GET_MODE (x);
8229 *total = thumb1_rtx_costs (x, code, outer_code);
8236 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8239 *total = COSTS_N_INSNS (20);
8243 if (CONST_INT_P (XEXP (x, 1)))
8245 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8246 & (unsigned HOST_WIDE_INT) 0xffffffff);
8247 int cost, const_ok = const_ok_for_arm (i);
8248 int j, booth_unit_size;
8250 /* Tune as appropriate. */
8251 cost = const_ok ? 4 : 8;
8252 booth_unit_size = 2;
8253 for (j = 0; i && j < 32; j += booth_unit_size)
8255 i >>= booth_unit_size;
8259 *total = COSTS_N_INSNS (cost);
8260 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8264 *total = COSTS_N_INSNS (20);
8268 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8273 /* RTX cost for cores with a fast multiply unit (M variants). */
8276 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8277 int *total, bool speed)
8279 enum machine_mode mode = GET_MODE (x);
8283 *total = thumb1_rtx_costs (x, code, outer_code);
8287 /* ??? should thumb2 use different costs? */
8291 /* There is no point basing this on the tuning, since it is always the
8292 fast variant if it exists at all. */
8294 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8295 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8296 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8298 *total = COSTS_N_INSNS(2);
8305 *total = COSTS_N_INSNS (5);
8309 if (CONST_INT_P (XEXP (x, 1)))
8311 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8312 & (unsigned HOST_WIDE_INT) 0xffffffff);
8313 int cost, const_ok = const_ok_for_arm (i);
8314 int j, booth_unit_size;
8316 /* Tune as appropriate. */
8317 cost = const_ok ? 4 : 8;
8318 booth_unit_size = 8;
8319 for (j = 0; i && j < 32; j += booth_unit_size)
8321 i >>= booth_unit_size;
8325 *total = COSTS_N_INSNS(cost);
8331 *total = COSTS_N_INSNS (4);
8335 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8337 if (TARGET_HARD_FLOAT
8339 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8341 *total = COSTS_N_INSNS (1);
8346 /* Requires a lib call */
8347 *total = COSTS_N_INSNS (20);
8351 return arm_rtx_costs_1 (x, outer_code, total, speed);
8356 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8357 so it can be ignored. */
8360 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8361 int *total, bool speed)
8363 enum machine_mode mode = GET_MODE (x);
8367 *total = thumb1_rtx_costs (x, code, outer_code);
8374 if (GET_CODE (XEXP (x, 0)) != MULT)
8375 return arm_rtx_costs_1 (x, outer_code, total, speed);
8377 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8378 will stall until the multiplication is complete. */
8379 *total = COSTS_N_INSNS (3);
8383 /* There is no point basing this on the tuning, since it is always the
8384 fast variant if it exists at all. */
8386 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8387 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8388 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8390 *total = COSTS_N_INSNS (2);
8397 *total = COSTS_N_INSNS (5);
8401 if (CONST_INT_P (XEXP (x, 1)))
8403 /* If operand 1 is a constant we can more accurately
8404 calculate the cost of the multiply. The multiplier can
8405 retire 15 bits on the first cycle and a further 12 on the
8406 second. We do, of course, have to load the constant into
8407 a register first. */
8408 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8409 /* There's a general overhead of one cycle. */
8411 unsigned HOST_WIDE_INT masked_const;
8416 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8418 masked_const = i & 0xffff8000;
8419 if (masked_const != 0)
8422 masked_const = i & 0xf8000000;
8423 if (masked_const != 0)
8426 *total = COSTS_N_INSNS (cost);
8432 *total = COSTS_N_INSNS (3);
8436 /* Requires a lib call */
8437 *total = COSTS_N_INSNS (20);
8441 return arm_rtx_costs_1 (x, outer_code, total, speed);
8446 /* RTX costs for 9e (and later) cores. */
8449 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8450 int *total, bool speed)
8452 enum machine_mode mode = GET_MODE (x);
8459 *total = COSTS_N_INSNS (3);
8463 *total = thumb1_rtx_costs (x, code, outer_code);
8471 /* There is no point basing this on the tuning, since it is always the
8472 fast variant if it exists at all. */
8474 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8475 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8476 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8478 *total = COSTS_N_INSNS (2);
8485 *total = COSTS_N_INSNS (5);
8491 *total = COSTS_N_INSNS (2);
8495 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8497 if (TARGET_HARD_FLOAT
8499 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8501 *total = COSTS_N_INSNS (1);
8506 *total = COSTS_N_INSNS (20);
8510 return arm_rtx_costs_1 (x, outer_code, total, speed);
8513 /* All address computations that can be done are free, but rtx cost returns
8514 the same for practically all of them. So we weight the different types
8515 of address here in the order (most pref first):
8516 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8518 arm_arm_address_cost (rtx x)
8520 enum rtx_code c = GET_CODE (x);
8522 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8524 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8529 if (CONST_INT_P (XEXP (x, 1)))
8532 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8542 arm_thumb_address_cost (rtx x)
8544 enum rtx_code c = GET_CODE (x);
8549 && REG_P (XEXP (x, 0))
8550 && CONST_INT_P (XEXP (x, 1)))
8557 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8558 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8560 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8563 /* Adjust cost hook for XScale. */
8565 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8567 /* Some true dependencies can have a higher cost depending
8568 on precisely how certain input operands are used. */
8569 if (REG_NOTE_KIND(link) == 0
8570 && recog_memoized (insn) >= 0
8571 && recog_memoized (dep) >= 0)
8573 int shift_opnum = get_attr_shift (insn);
8574 enum attr_type attr_type = get_attr_type (dep);
8576 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8577 operand for INSN. If we have a shifted input operand and the
8578 instruction we depend on is another ALU instruction, then we may
8579 have to account for an additional stall. */
8580 if (shift_opnum != 0
8581 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8583 rtx shifted_operand;
8586 /* Get the shifted operand. */
8587 extract_insn (insn);
8588 shifted_operand = recog_data.operand[shift_opnum];
8590 /* Iterate over all the operands in DEP. If we write an operand
8591 that overlaps with SHIFTED_OPERAND, then we have increase the
8592 cost of this dependency. */
8594 preprocess_constraints ();
8595 for (opno = 0; opno < recog_data.n_operands; opno++)
8597 /* We can ignore strict inputs. */
8598 if (recog_data.operand_type[opno] == OP_IN)
8601 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8613 /* Adjust cost hook for Cortex A9. */
8615 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8617 switch (REG_NOTE_KIND (link))
8624 case REG_DEP_OUTPUT:
8625 if (recog_memoized (insn) >= 0
8626 && recog_memoized (dep) >= 0)
8628 if (GET_CODE (PATTERN (insn)) == SET)
8631 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8633 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8635 enum attr_type attr_type_insn = get_attr_type (insn);
8636 enum attr_type attr_type_dep = get_attr_type (dep);
8638 /* By default all dependencies of the form
8641 have an extra latency of 1 cycle because
8642 of the input and output dependency in this
8643 case. However this gets modeled as an true
8644 dependency and hence all these checks. */
8645 if (REG_P (SET_DEST (PATTERN (insn)))
8646 && REG_P (SET_DEST (PATTERN (dep)))
8647 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8648 SET_DEST (PATTERN (dep))))
8650 /* FMACS is a special case where the dependent
8651 instruction can be issued 3 cycles before
8652 the normal latency in case of an output
8654 if ((attr_type_insn == TYPE_FMACS
8655 || attr_type_insn == TYPE_FMACD)
8656 && (attr_type_dep == TYPE_FMACS
8657 || attr_type_dep == TYPE_FMACD))
8659 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8660 *cost = insn_default_latency (dep) - 3;
8662 *cost = insn_default_latency (dep);
8667 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8668 *cost = insn_default_latency (dep) + 1;
8670 *cost = insn_default_latency (dep);
8686 /* Adjust cost hook for FA726TE. */
8688 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8690 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8691 have penalty of 3. */
8692 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8693 && recog_memoized (insn) >= 0
8694 && recog_memoized (dep) >= 0
8695 && get_attr_conds (dep) == CONDS_SET)
8697 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8698 if (get_attr_conds (insn) == CONDS_USE
8699 && get_attr_type (insn) != TYPE_BRANCH)
8705 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8706 || get_attr_conds (insn) == CONDS_USE)
8716 /* Implement TARGET_REGISTER_MOVE_COST.
8718 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8719 it is typically more expensive than a single memory access. We set
8720 the cost to less than two memory accesses so that floating
8721 point to integer conversion does not go through memory. */
8724 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8725 reg_class_t from, reg_class_t to)
8729 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8730 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8732 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8733 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8735 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8742 if (from == HI_REGS || to == HI_REGS)
8749 /* Implement TARGET_MEMORY_MOVE_COST. */
8752 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8753 bool in ATTRIBUTE_UNUSED)
8759 if (GET_MODE_SIZE (mode) < 4)
8762 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8766 /* Vectorizer cost model implementation. */
8768 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8770 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8772 int misalign ATTRIBUTE_UNUSED)
8776 switch (type_of_cost)
8779 return current_tune->vec_costs->scalar_stmt_cost;
8782 return current_tune->vec_costs->scalar_load_cost;
8785 return current_tune->vec_costs->scalar_store_cost;
8788 return current_tune->vec_costs->vec_stmt_cost;
8791 return current_tune->vec_costs->vec_align_load_cost;
8794 return current_tune->vec_costs->vec_store_cost;
8797 return current_tune->vec_costs->vec_to_scalar_cost;
8800 return current_tune->vec_costs->scalar_to_vec_cost;
8802 case unaligned_load:
8803 return current_tune->vec_costs->vec_unalign_load_cost;
8805 case unaligned_store:
8806 return current_tune->vec_costs->vec_unalign_store_cost;
8808 case cond_branch_taken:
8809 return current_tune->vec_costs->cond_taken_branch_cost;
8811 case cond_branch_not_taken:
8812 return current_tune->vec_costs->cond_not_taken_branch_cost;
8815 case vec_promote_demote:
8816 return current_tune->vec_costs->vec_stmt_cost;
8819 elements = TYPE_VECTOR_SUBPARTS (vectype);
8820 return elements / 2 + 1;
8827 /* Implement targetm.vectorize.add_stmt_cost. */
8830 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8831 struct _stmt_vec_info *stmt_info, int misalign,
8832 enum vect_cost_model_location where)
8834 unsigned *cost = (unsigned *) data;
8835 unsigned retval = 0;
8837 if (flag_vect_cost_model)
8839 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8840 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8842 /* Statements in an inner loop relative to the loop being
8843 vectorized are weighted more heavily. The value here is
8844 arbitrary and could potentially be improved with analysis. */
8845 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8846 count *= 50; /* FIXME. */
8848 retval = (unsigned) (count * stmt_cost);
8849 cost[where] += retval;
8855 /* Return true if and only if this insn can dual-issue only as older. */
8857 cortexa7_older_only (rtx insn)
8859 if (recog_memoized (insn) < 0)
8862 if (get_attr_insn (insn) == INSN_MOV)
8865 switch (get_attr_type (insn))
8868 case TYPE_LOAD_BYTE:
8897 /* Return true if and only if this insn can dual-issue as younger. */
8899 cortexa7_younger (FILE *file, int verbose, rtx insn)
8901 if (recog_memoized (insn) < 0)
8904 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
8908 if (get_attr_insn (insn) == INSN_MOV)
8911 switch (get_attr_type (insn))
8913 case TYPE_SIMPLE_ALU_IMM:
8914 case TYPE_SIMPLE_ALU_SHIFT:
8924 /* Look for an instruction that can dual issue only as an older
8925 instruction, and move it in front of any instructions that can
8926 dual-issue as younger, while preserving the relative order of all
8927 other instructions in the ready list. This is a hueuristic to help
8928 dual-issue in later cycles, by postponing issue of more flexible
8929 instructions. This heuristic may affect dual issue opportunities
8930 in the current cycle. */
8932 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
8936 int first_older_only = -1, first_younger = -1;
8940 ";; sched_reorder for cycle %d with %d insns in ready list\n",
8944 /* Traverse the ready list from the head (the instruction to issue
8945 first), and looking for the first instruction that can issue as
8946 younger and the first instruction that can dual-issue only as
8948 for (i = *n_readyp - 1; i >= 0; i--)
8950 rtx insn = ready[i];
8951 if (cortexa7_older_only (insn))
8953 first_older_only = i;
8955 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
8958 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
8962 /* Nothing to reorder because either no younger insn found or insn
8963 that can dual-issue only as older appears before any insn that
8964 can dual-issue as younger. */
8965 if (first_younger == -1)
8968 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
8972 /* Nothing to reorder because no older-only insn in the ready list. */
8973 if (first_older_only == -1)
8976 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
8980 /* Move first_older_only insn before first_younger. */
8982 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
8983 INSN_UID(ready [first_older_only]),
8984 INSN_UID(ready [first_younger]));
8985 rtx first_older_only_insn = ready [first_older_only];
8986 for (i = first_older_only; i < first_younger; i++)
8988 ready[i] = ready[i+1];
8991 ready[i] = first_older_only_insn;
8995 /* Implement TARGET_SCHED_REORDER. */
8997 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9003 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9006 /* Do nothing for other cores. */
9010 return arm_issue_rate ();
9013 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9014 It corrects the value of COST based on the relationship between
9015 INSN and DEP through the dependence LINK. It returns the new
9016 value. There is a per-core adjust_cost hook to adjust scheduler costs
9017 and the per-core hook can choose to completely override the generic
9018 adjust_cost function. Only put bits of code into arm_adjust_cost that
9019 are common across all cores. */
9021 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9025 /* When generating Thumb-1 code, we want to place flag-setting operations
9026 close to a conditional branch which depends on them, so that we can
9027 omit the comparison. */
9029 && REG_NOTE_KIND (link) == 0
9030 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9031 && recog_memoized (dep) >= 0
9032 && get_attr_conds (dep) == CONDS_SET)
9035 if (current_tune->sched_adjust_cost != NULL)
9037 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9041 /* XXX Is this strictly true? */
9042 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9043 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9046 /* Call insns don't incur a stall, even if they follow a load. */
9047 if (REG_NOTE_KIND (link) == 0
9051 if ((i_pat = single_set (insn)) != NULL
9052 && MEM_P (SET_SRC (i_pat))
9053 && (d_pat = single_set (dep)) != NULL
9054 && MEM_P (SET_DEST (d_pat)))
9056 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9057 /* This is a load after a store, there is no conflict if the load reads
9058 from a cached area. Assume that loads from the stack, and from the
9059 constant pool are cached, and that others will miss. This is a
9062 if ((GET_CODE (src_mem) == SYMBOL_REF
9063 && CONSTANT_POOL_ADDRESS_P (src_mem))
9064 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9065 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9066 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9074 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9077 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9079 return (optimize > 0) ? 2 : 0;
9083 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9085 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9088 static bool fp_consts_inited = false;
9090 static REAL_VALUE_TYPE value_fp0;
9093 init_fp_table (void)
9097 r = REAL_VALUE_ATOF ("0", DFmode);
9099 fp_consts_inited = true;
9102 /* Return TRUE if rtx X is a valid immediate FP constant. */
9104 arm_const_double_rtx (rtx x)
9108 if (!fp_consts_inited)
9111 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9112 if (REAL_VALUE_MINUS_ZERO (r))
9115 if (REAL_VALUES_EQUAL (r, value_fp0))
9121 /* VFPv3 has a fairly wide range of representable immediates, formed from
9122 "quarter-precision" floating-point values. These can be evaluated using this
9123 formula (with ^ for exponentiation):
9127 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9128 16 <= n <= 31 and 0 <= r <= 7.
9130 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9132 - A (most-significant) is the sign bit.
9133 - BCD are the exponent (encoded as r XOR 3).
9134 - EFGH are the mantissa (encoded as n - 16).
9137 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9138 fconst[sd] instruction, or -1 if X isn't suitable. */
9140 vfp3_const_double_index (rtx x)
9142 REAL_VALUE_TYPE r, m;
9144 unsigned HOST_WIDE_INT mantissa, mant_hi;
9145 unsigned HOST_WIDE_INT mask;
9146 HOST_WIDE_INT m1, m2;
9147 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9149 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9152 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9154 /* We can't represent these things, so detect them first. */
9155 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9158 /* Extract sign, exponent and mantissa. */
9159 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9160 r = real_value_abs (&r);
9161 exponent = REAL_EXP (&r);
9162 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9163 highest (sign) bit, with a fixed binary point at bit point_pos.
9164 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9165 bits for the mantissa, this may fail (low bits would be lost). */
9166 real_ldexp (&m, &r, point_pos - exponent);
9167 REAL_VALUE_TO_INT (&m1, &m2, m);
9171 /* If there are bits set in the low part of the mantissa, we can't
9172 represent this value. */
9176 /* Now make it so that mantissa contains the most-significant bits, and move
9177 the point_pos to indicate that the least-significant bits have been
9179 point_pos -= HOST_BITS_PER_WIDE_INT;
9182 /* We can permit four significant bits of mantissa only, plus a high bit
9183 which is always 1. */
9184 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9185 if ((mantissa & mask) != 0)
9188 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9189 mantissa >>= point_pos - 5;
9191 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9192 floating-point immediate zero with Neon using an integer-zero load, but
9193 that case is handled elsewhere.) */
9197 gcc_assert (mantissa >= 16 && mantissa <= 31);
9199 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9200 normalized significands are in the range [1, 2). (Our mantissa is shifted
9201 left 4 places at this point relative to normalized IEEE754 values). GCC
9202 internally uses [0.5, 1) (see real.c), so the exponent returned from
9203 REAL_EXP must be altered. */
9204 exponent = 5 - exponent;
9206 if (exponent < 0 || exponent > 7)
9209 /* Sign, mantissa and exponent are now in the correct form to plug into the
9210 formula described in the comment above. */
9211 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9214 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9216 vfp3_const_double_rtx (rtx x)
9221 return vfp3_const_double_index (x) != -1;
9224 /* Recognize immediates which can be used in various Neon instructions. Legal
9225 immediates are described by the following table (for VMVN variants, the
9226 bitwise inverse of the constant shown is recognized. In either case, VMOV
9227 is output and the correct instruction to use for a given constant is chosen
9228 by the assembler). The constant shown is replicated across all elements of
9229 the destination vector.
9231 insn elems variant constant (binary)
9232 ---- ----- ------- -----------------
9233 vmov i32 0 00000000 00000000 00000000 abcdefgh
9234 vmov i32 1 00000000 00000000 abcdefgh 00000000
9235 vmov i32 2 00000000 abcdefgh 00000000 00000000
9236 vmov i32 3 abcdefgh 00000000 00000000 00000000
9237 vmov i16 4 00000000 abcdefgh
9238 vmov i16 5 abcdefgh 00000000
9239 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9240 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9241 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9242 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9243 vmvn i16 10 00000000 abcdefgh
9244 vmvn i16 11 abcdefgh 00000000
9245 vmov i32 12 00000000 00000000 abcdefgh 11111111
9246 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9247 vmov i32 14 00000000 abcdefgh 11111111 11111111
9248 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9250 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9251 eeeeeeee ffffffff gggggggg hhhhhhhh
9252 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9253 vmov f32 19 00000000 00000000 00000000 00000000
9255 For case 18, B = !b. Representable values are exactly those accepted by
9256 vfp3_const_double_index, but are output as floating-point numbers rather
9259 For case 19, we will change it to vmov.i32 when assembling.
9261 Variants 0-5 (inclusive) may also be used as immediates for the second
9262 operand of VORR/VBIC instructions.
9264 The INVERSE argument causes the bitwise inverse of the given operand to be
9265 recognized instead (used for recognizing legal immediates for the VAND/VORN
9266 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9267 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9268 output, rather than the real insns vbic/vorr).
9270 INVERSE makes no difference to the recognition of float vectors.
9272 The return value is the variant of immediate as shown in the above table, or
9273 -1 if the given value doesn't match any of the listed patterns.
9276 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9277 rtx *modconst, int *elementwidth)
9279 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9281 for (i = 0; i < idx; i += (STRIDE)) \
9286 immtype = (CLASS); \
9287 elsize = (ELSIZE); \
9291 unsigned int i, elsize = 0, idx = 0, n_elts;
9292 unsigned int innersize;
9293 unsigned char bytes[16];
9294 int immtype = -1, matches;
9295 unsigned int invmask = inverse ? 0xff : 0;
9296 bool vector = GET_CODE (op) == CONST_VECTOR;
9300 n_elts = CONST_VECTOR_NUNITS (op);
9301 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9306 if (mode == VOIDmode)
9308 innersize = GET_MODE_SIZE (mode);
9311 /* Vectors of float constants. */
9312 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9314 rtx el0 = CONST_VECTOR_ELT (op, 0);
9317 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9320 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9322 for (i = 1; i < n_elts; i++)
9324 rtx elt = CONST_VECTOR_ELT (op, i);
9327 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9329 if (!REAL_VALUES_EQUAL (r0, re))
9334 *modconst = CONST_VECTOR_ELT (op, 0);
9339 if (el0 == CONST0_RTX (GET_MODE (el0)))
9345 /* Splat vector constant out into a byte vector. */
9346 for (i = 0; i < n_elts; i++)
9348 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9349 unsigned HOST_WIDE_INT elpart;
9350 unsigned int part, parts;
9352 if (CONST_INT_P (el))
9354 elpart = INTVAL (el);
9357 else if (CONST_DOUBLE_P (el))
9359 elpart = CONST_DOUBLE_LOW (el);
9365 for (part = 0; part < parts; part++)
9368 for (byte = 0; byte < innersize; byte++)
9370 bytes[idx++] = (elpart & 0xff) ^ invmask;
9371 elpart >>= BITS_PER_UNIT;
9373 if (CONST_DOUBLE_P (el))
9374 elpart = CONST_DOUBLE_HIGH (el);
9379 gcc_assert (idx == GET_MODE_SIZE (mode));
9383 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9384 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9386 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9387 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9389 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9390 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9392 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9393 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9395 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9397 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9399 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9400 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9402 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9403 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9405 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9406 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9408 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9409 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9411 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9413 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9415 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9416 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9418 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9419 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9421 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9422 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9424 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9425 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9427 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9429 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9430 && bytes[i] == bytes[(i + 8) % idx]);
9438 *elementwidth = elsize;
9442 unsigned HOST_WIDE_INT imm = 0;
9444 /* Un-invert bytes of recognized vector, if necessary. */
9446 for (i = 0; i < idx; i++)
9447 bytes[i] ^= invmask;
9451 /* FIXME: Broken on 32-bit H_W_I hosts. */
9452 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9454 for (i = 0; i < 8; i++)
9455 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9456 << (i * BITS_PER_UNIT);
9458 *modconst = GEN_INT (imm);
9462 unsigned HOST_WIDE_INT imm = 0;
9464 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9465 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9467 *modconst = GEN_INT (imm);
9475 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9476 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9477 float elements), and a modified constant (whatever should be output for a
9478 VMOV) in *MODCONST. */
9481 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9482 rtx *modconst, int *elementwidth)
9486 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9492 *modconst = tmpconst;
9495 *elementwidth = tmpwidth;
9500 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9501 the immediate is valid, write a constant suitable for using as an operand
9502 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9503 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9506 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9507 rtx *modconst, int *elementwidth)
9511 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9513 if (retval < 0 || retval > 5)
9517 *modconst = tmpconst;
9520 *elementwidth = tmpwidth;
9525 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9526 the immediate is valid, write a constant suitable for using as an operand
9527 to VSHR/VSHL to *MODCONST and the corresponding element width to
9528 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9529 because they have different limitations. */
9532 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9533 rtx *modconst, int *elementwidth,
9536 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9537 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9538 unsigned HOST_WIDE_INT last_elt = 0;
9539 unsigned HOST_WIDE_INT maxshift;
9541 /* Split vector constant out into a byte vector. */
9542 for (i = 0; i < n_elts; i++)
9544 rtx el = CONST_VECTOR_ELT (op, i);
9545 unsigned HOST_WIDE_INT elpart;
9547 if (CONST_INT_P (el))
9548 elpart = INTVAL (el);
9549 else if (CONST_DOUBLE_P (el))
9554 if (i != 0 && elpart != last_elt)
9560 /* Shift less than element size. */
9561 maxshift = innersize * 8;
9565 /* Left shift immediate value can be from 0 to <size>-1. */
9566 if (last_elt >= maxshift)
9571 /* Right shift immediate value can be from 1 to <size>. */
9572 if (last_elt == 0 || last_elt > maxshift)
9577 *elementwidth = innersize * 8;
9580 *modconst = CONST_VECTOR_ELT (op, 0);
9585 /* Return a string suitable for output of Neon immediate logic operation
9589 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9590 int inverse, int quad)
9592 int width, is_valid;
9593 static char templ[40];
9595 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9597 gcc_assert (is_valid != 0);
9600 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9602 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9607 /* Return a string suitable for output of Neon immediate shift operation
9608 (VSHR or VSHL) MNEM. */
9611 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9612 enum machine_mode mode, int quad,
9615 int width, is_valid;
9616 static char templ[40];
9618 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9619 gcc_assert (is_valid != 0);
9622 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9624 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9629 /* Output a sequence of pairwise operations to implement a reduction.
9630 NOTE: We do "too much work" here, because pairwise operations work on two
9631 registers-worth of operands in one go. Unfortunately we can't exploit those
9632 extra calculations to do the full operation in fewer steps, I don't think.
9633 Although all vector elements of the result but the first are ignored, we
9634 actually calculate the same result in each of the elements. An alternative
9635 such as initially loading a vector with zero to use as each of the second
9636 operands would use up an additional register and take an extra instruction,
9637 for no particular gain. */
9640 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9641 rtx (*reduc) (rtx, rtx, rtx))
9643 enum machine_mode inner = GET_MODE_INNER (mode);
9644 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9647 for (i = parts / 2; i >= 1; i /= 2)
9649 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9650 emit_insn (reduc (dest, tmpsum, tmpsum));
9655 /* If VALS is a vector constant that can be loaded into a register
9656 using VDUP, generate instructions to do so and return an RTX to
9657 assign to the register. Otherwise return NULL_RTX. */
9660 neon_vdup_constant (rtx vals)
9662 enum machine_mode mode = GET_MODE (vals);
9663 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9664 int n_elts = GET_MODE_NUNITS (mode);
9665 bool all_same = true;
9669 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9672 for (i = 0; i < n_elts; ++i)
9674 x = XVECEXP (vals, 0, i);
9675 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9680 /* The elements are not all the same. We could handle repeating
9681 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9682 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9686 /* We can load this constant by using VDUP and a constant in a
9687 single ARM register. This will be cheaper than a vector
9690 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9691 return gen_rtx_VEC_DUPLICATE (mode, x);
9694 /* Generate code to load VALS, which is a PARALLEL containing only
9695 constants (for vec_init) or CONST_VECTOR, efficiently into a
9696 register. Returns an RTX to copy into the register, or NULL_RTX
9697 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9700 neon_make_constant (rtx vals)
9702 enum machine_mode mode = GET_MODE (vals);
9704 rtx const_vec = NULL_RTX;
9705 int n_elts = GET_MODE_NUNITS (mode);
9709 if (GET_CODE (vals) == CONST_VECTOR)
9711 else if (GET_CODE (vals) == PARALLEL)
9713 /* A CONST_VECTOR must contain only CONST_INTs and
9714 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9715 Only store valid constants in a CONST_VECTOR. */
9716 for (i = 0; i < n_elts; ++i)
9718 rtx x = XVECEXP (vals, 0, i);
9719 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9722 if (n_const == n_elts)
9723 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9728 if (const_vec != NULL
9729 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9730 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9732 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9733 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9734 pipeline cycle; creating the constant takes one or two ARM
9737 else if (const_vec != NULL_RTX)
9738 /* Load from constant pool. On Cortex-A8 this takes two cycles
9739 (for either double or quad vectors). We can not take advantage
9740 of single-cycle VLD1 because we need a PC-relative addressing
9744 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9745 We can not construct an initializer. */
9749 /* Initialize vector TARGET to VALS. */
9752 neon_expand_vector_init (rtx target, rtx vals)
9754 enum machine_mode mode = GET_MODE (target);
9755 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9756 int n_elts = GET_MODE_NUNITS (mode);
9757 int n_var = 0, one_var = -1;
9758 bool all_same = true;
9762 for (i = 0; i < n_elts; ++i)
9764 x = XVECEXP (vals, 0, i);
9765 if (!CONSTANT_P (x))
9766 ++n_var, one_var = i;
9768 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9774 rtx constant = neon_make_constant (vals);
9775 if (constant != NULL_RTX)
9777 emit_move_insn (target, constant);
9782 /* Splat a single non-constant element if we can. */
9783 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9785 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9786 emit_insn (gen_rtx_SET (VOIDmode, target,
9787 gen_rtx_VEC_DUPLICATE (mode, x)));
9791 /* One field is non-constant. Load constant then overwrite varying
9792 field. This is more efficient than using the stack. */
9795 rtx copy = copy_rtx (vals);
9796 rtx index = GEN_INT (one_var);
9798 /* Load constant part of vector, substitute neighboring value for
9800 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9801 neon_expand_vector_init (target, copy);
9803 /* Insert variable. */
9804 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9808 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9811 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9814 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9817 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9820 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9823 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9826 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9829 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9832 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9840 /* Construct the vector in memory one field at a time
9841 and load the whole vector. */
9842 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9843 for (i = 0; i < n_elts; i++)
9844 emit_move_insn (adjust_address_nv (mem, inner_mode,
9845 i * GET_MODE_SIZE (inner_mode)),
9846 XVECEXP (vals, 0, i));
9847 emit_move_insn (target, mem);
9850 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9851 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9852 reported source locations are bogus. */
9855 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9860 gcc_assert (CONST_INT_P (operand));
9862 lane = INTVAL (operand);
9864 if (lane < low || lane >= high)
9868 /* Bounds-check lanes. */
9871 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9873 bounds_check (operand, low, high, "lane out of range");
9876 /* Bounds-check constants. */
9879 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9881 bounds_check (operand, low, high, "constant out of range");
9885 neon_element_bits (enum machine_mode mode)
9888 return GET_MODE_BITSIZE (mode);
9890 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9894 /* Predicates for `match_operand' and `match_operator'. */
9896 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9897 WB is true if full writeback address modes are allowed and is false
9898 if limited writeback address modes (POST_INC and PRE_DEC) are
9902 arm_coproc_mem_operand (rtx op, bool wb)
9906 /* Reject eliminable registers. */
9907 if (! (reload_in_progress || reload_completed)
9908 && ( reg_mentioned_p (frame_pointer_rtx, op)
9909 || reg_mentioned_p (arg_pointer_rtx, op)
9910 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9911 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9912 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9913 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9916 /* Constants are converted into offsets from labels. */
9922 if (reload_completed
9923 && (GET_CODE (ind) == LABEL_REF
9924 || (GET_CODE (ind) == CONST
9925 && GET_CODE (XEXP (ind, 0)) == PLUS
9926 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9927 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9930 /* Match: (mem (reg)). */
9932 return arm_address_register_rtx_p (ind, 0);
9934 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9935 acceptable in any case (subject to verification by
9936 arm_address_register_rtx_p). We need WB to be true to accept
9937 PRE_INC and POST_DEC. */
9938 if (GET_CODE (ind) == POST_INC
9939 || GET_CODE (ind) == PRE_DEC
9941 && (GET_CODE (ind) == PRE_INC
9942 || GET_CODE (ind) == POST_DEC)))
9943 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9946 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9947 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9948 && GET_CODE (XEXP (ind, 1)) == PLUS
9949 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9950 ind = XEXP (ind, 1);
9955 if (GET_CODE (ind) == PLUS
9956 && REG_P (XEXP (ind, 0))
9957 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9958 && CONST_INT_P (XEXP (ind, 1))
9959 && INTVAL (XEXP (ind, 1)) > -1024
9960 && INTVAL (XEXP (ind, 1)) < 1024
9961 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9967 /* Return TRUE if OP is a memory operand which we can load or store a vector
9968 to/from. TYPE is one of the following values:
9969 0 - Vector load/stor (vldr)
9970 1 - Core registers (ldm)
9971 2 - Element/structure loads (vld1)
9974 neon_vector_mem_operand (rtx op, int type)
9978 /* Reject eliminable registers. */
9979 if (! (reload_in_progress || reload_completed)
9980 && ( reg_mentioned_p (frame_pointer_rtx, op)
9981 || reg_mentioned_p (arg_pointer_rtx, op)
9982 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9983 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9984 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9985 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9988 /* Constants are converted into offsets from labels. */
9994 if (reload_completed
9995 && (GET_CODE (ind) == LABEL_REF
9996 || (GET_CODE (ind) == CONST
9997 && GET_CODE (XEXP (ind, 0)) == PLUS
9998 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9999 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10002 /* Match: (mem (reg)). */
10004 return arm_address_register_rtx_p (ind, 0);
10006 /* Allow post-increment with Neon registers. */
10007 if ((type != 1 && GET_CODE (ind) == POST_INC)
10008 || (type == 0 && GET_CODE (ind) == PRE_DEC))
10009 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10011 /* FIXME: vld1 allows register post-modify. */
10017 && GET_CODE (ind) == PLUS
10018 && REG_P (XEXP (ind, 0))
10019 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10020 && CONST_INT_P (XEXP (ind, 1))
10021 && INTVAL (XEXP (ind, 1)) > -1024
10022 /* For quad modes, we restrict the constant offset to be slightly less
10023 than what the instruction format permits. We have no such constraint
10024 on double mode offsets. (This must match arm_legitimate_index_p.) */
10025 && (INTVAL (XEXP (ind, 1))
10026 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10027 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10033 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10036 neon_struct_mem_operand (rtx op)
10040 /* Reject eliminable registers. */
10041 if (! (reload_in_progress || reload_completed)
10042 && ( reg_mentioned_p (frame_pointer_rtx, op)
10043 || reg_mentioned_p (arg_pointer_rtx, op)
10044 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10045 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10046 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10047 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10050 /* Constants are converted into offsets from labels. */
10054 ind = XEXP (op, 0);
10056 if (reload_completed
10057 && (GET_CODE (ind) == LABEL_REF
10058 || (GET_CODE (ind) == CONST
10059 && GET_CODE (XEXP (ind, 0)) == PLUS
10060 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10061 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10064 /* Match: (mem (reg)). */
10066 return arm_address_register_rtx_p (ind, 0);
10068 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10069 if (GET_CODE (ind) == POST_INC
10070 || GET_CODE (ind) == PRE_DEC)
10071 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10076 /* Return true if X is a register that will be eliminated later on. */
10078 arm_eliminable_register (rtx x)
10080 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10081 || REGNO (x) == ARG_POINTER_REGNUM
10082 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10083 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10086 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10087 coprocessor registers. Otherwise return NO_REGS. */
10090 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10092 if (mode == HFmode)
10094 if (!TARGET_NEON_FP16)
10095 return GENERAL_REGS;
10096 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10098 return GENERAL_REGS;
10101 /* The neon move patterns handle all legitimate vector and struct
10104 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10105 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10106 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10107 || VALID_NEON_STRUCT_MODE (mode)))
10110 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10113 return GENERAL_REGS;
10116 /* Values which must be returned in the most-significant end of the return
10120 arm_return_in_msb (const_tree valtype)
10122 return (TARGET_AAPCS_BASED
10123 && BYTES_BIG_ENDIAN
10124 && (AGGREGATE_TYPE_P (valtype)
10125 || TREE_CODE (valtype) == COMPLEX_TYPE
10126 || FIXED_POINT_TYPE_P (valtype)));
10129 /* Return TRUE if X references a SYMBOL_REF. */
10131 symbol_mentioned_p (rtx x)
10136 if (GET_CODE (x) == SYMBOL_REF)
10139 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10140 are constant offsets, not symbols. */
10141 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10144 fmt = GET_RTX_FORMAT (GET_CODE (x));
10146 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10152 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10153 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10156 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10163 /* Return TRUE if X references a LABEL_REF. */
10165 label_mentioned_p (rtx x)
10170 if (GET_CODE (x) == LABEL_REF)
10173 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10174 instruction, but they are constant offsets, not symbols. */
10175 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10178 fmt = GET_RTX_FORMAT (GET_CODE (x));
10179 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10185 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10186 if (label_mentioned_p (XVECEXP (x, i, j)))
10189 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10197 tls_mentioned_p (rtx x)
10199 switch (GET_CODE (x))
10202 return tls_mentioned_p (XEXP (x, 0));
10205 if (XINT (x, 1) == UNSPEC_TLS)
10213 /* Must not copy any rtx that uses a pc-relative address. */
10216 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10218 if (GET_CODE (*x) == UNSPEC
10219 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10220 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10226 arm_cannot_copy_insn_p (rtx insn)
10228 /* The tls call insn cannot be copied, as it is paired with a data
10230 if (recog_memoized (insn) == CODE_FOR_tlscall)
10233 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10237 minmax_code (rtx x)
10239 enum rtx_code code = GET_CODE (x);
10252 gcc_unreachable ();
10256 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10259 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10260 int *mask, bool *signed_sat)
10262 /* The high bound must be a power of two minus one. */
10263 int log = exact_log2 (INTVAL (hi_bound) + 1);
10267 /* The low bound is either zero (for usat) or one less than the
10268 negation of the high bound (for ssat). */
10269 if (INTVAL (lo_bound) == 0)
10274 *signed_sat = false;
10279 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10284 *signed_sat = true;
10292 /* Return 1 if memory locations are adjacent. */
10294 adjacent_mem_locations (rtx a, rtx b)
10296 /* We don't guarantee to preserve the order of these memory refs. */
10297 if (volatile_refs_p (a) || volatile_refs_p (b))
10300 if ((REG_P (XEXP (a, 0))
10301 || (GET_CODE (XEXP (a, 0)) == PLUS
10302 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10303 && (REG_P (XEXP (b, 0))
10304 || (GET_CODE (XEXP (b, 0)) == PLUS
10305 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10307 HOST_WIDE_INT val0 = 0, val1 = 0;
10311 if (GET_CODE (XEXP (a, 0)) == PLUS)
10313 reg0 = XEXP (XEXP (a, 0), 0);
10314 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10317 reg0 = XEXP (a, 0);
10319 if (GET_CODE (XEXP (b, 0)) == PLUS)
10321 reg1 = XEXP (XEXP (b, 0), 0);
10322 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10325 reg1 = XEXP (b, 0);
10327 /* Don't accept any offset that will require multiple
10328 instructions to handle, since this would cause the
10329 arith_adjacentmem pattern to output an overlong sequence. */
10330 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10333 /* Don't allow an eliminable register: register elimination can make
10334 the offset too large. */
10335 if (arm_eliminable_register (reg0))
10338 val_diff = val1 - val0;
10342 /* If the target has load delay slots, then there's no benefit
10343 to using an ldm instruction unless the offset is zero and
10344 we are optimizing for size. */
10345 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10346 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10347 && (val_diff == 4 || val_diff == -4));
10350 return ((REGNO (reg0) == REGNO (reg1))
10351 && (val_diff == 4 || val_diff == -4));
10357 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10358 for load operations, false for store operations. CONSECUTIVE is true
10359 if the register numbers in the operation must be consecutive in the register
10360 bank. RETURN_PC is true if value is to be loaded in PC.
10361 The pattern we are trying to match for load is:
10362 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10363 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10366 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10369 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10370 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10371 3. If consecutive is TRUE, then for kth register being loaded,
10372 REGNO (R_dk) = REGNO (R_d0) + k.
10373 The pattern for store is similar. */
10375 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10376 bool consecutive, bool return_pc)
10378 HOST_WIDE_INT count = XVECLEN (op, 0);
10379 rtx reg, mem, addr;
10381 unsigned first_regno;
10382 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10384 bool addr_reg_in_reglist = false;
10385 bool update = false;
10390 /* If not in SImode, then registers must be consecutive
10391 (e.g., VLDM instructions for DFmode). */
10392 gcc_assert ((mode == SImode) || consecutive);
10393 /* Setting return_pc for stores is illegal. */
10394 gcc_assert (!return_pc || load);
10396 /* Set up the increments and the regs per val based on the mode. */
10397 reg_increment = GET_MODE_SIZE (mode);
10398 regs_per_val = reg_increment / 4;
10399 offset_adj = return_pc ? 1 : 0;
10402 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10403 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10406 /* Check if this is a write-back. */
10407 elt = XVECEXP (op, 0, offset_adj);
10408 if (GET_CODE (SET_SRC (elt)) == PLUS)
10414 /* The offset adjustment must be the number of registers being
10415 popped times the size of a single register. */
10416 if (!REG_P (SET_DEST (elt))
10417 || !REG_P (XEXP (SET_SRC (elt), 0))
10418 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10419 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10420 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10421 ((count - 1 - offset_adj) * reg_increment))
10425 i = i + offset_adj;
10426 base = base + offset_adj;
10427 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10428 success depends on the type: VLDM can do just one reg,
10429 LDM must do at least two. */
10430 if ((count <= i) && (mode == SImode))
10433 elt = XVECEXP (op, 0, i - 1);
10434 if (GET_CODE (elt) != SET)
10439 reg = SET_DEST (elt);
10440 mem = SET_SRC (elt);
10444 reg = SET_SRC (elt);
10445 mem = SET_DEST (elt);
10448 if (!REG_P (reg) || !MEM_P (mem))
10451 regno = REGNO (reg);
10452 first_regno = regno;
10453 addr = XEXP (mem, 0);
10454 if (GET_CODE (addr) == PLUS)
10456 if (!CONST_INT_P (XEXP (addr, 1)))
10459 offset = INTVAL (XEXP (addr, 1));
10460 addr = XEXP (addr, 0);
10466 /* Don't allow SP to be loaded unless it is also the base register. It
10467 guarantees that SP is reset correctly when an LDM instruction
10468 is interruptted. Otherwise, we might end up with a corrupt stack. */
10469 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10472 for (; i < count; i++)
10474 elt = XVECEXP (op, 0, i);
10475 if (GET_CODE (elt) != SET)
10480 reg = SET_DEST (elt);
10481 mem = SET_SRC (elt);
10485 reg = SET_SRC (elt);
10486 mem = SET_DEST (elt);
10490 || GET_MODE (reg) != mode
10491 || REGNO (reg) <= regno
10494 (unsigned int) (first_regno + regs_per_val * (i - base))))
10495 /* Don't allow SP to be loaded unless it is also the base register. It
10496 guarantees that SP is reset correctly when an LDM instruction
10497 is interrupted. Otherwise, we might end up with a corrupt stack. */
10498 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10500 || GET_MODE (mem) != mode
10501 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10502 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10503 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10504 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10505 offset + (i - base) * reg_increment))
10506 && (!REG_P (XEXP (mem, 0))
10507 || offset + (i - base) * reg_increment != 0)))
10510 regno = REGNO (reg);
10511 if (regno == REGNO (addr))
10512 addr_reg_in_reglist = true;
10517 if (update && addr_reg_in_reglist)
10520 /* For Thumb-1, address register is always modified - either by write-back
10521 or by explicit load. If the pattern does not describe an update,
10522 then the address register must be in the list of loaded registers. */
10524 return update || addr_reg_in_reglist;
10530 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10531 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10532 instruction. ADD_OFFSET is nonzero if the base address register needs
10533 to be modified with an add instruction before we can use it. */
10536 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10537 int nops, HOST_WIDE_INT add_offset)
10539 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10540 if the offset isn't small enough. The reason 2 ldrs are faster
10541 is because these ARMs are able to do more than one cache access
10542 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10543 whilst the ARM8 has a double bandwidth cache. This means that
10544 these cores can do both an instruction fetch and a data fetch in
10545 a single cycle, so the trick of calculating the address into a
10546 scratch register (one of the result regs) and then doing a load
10547 multiple actually becomes slower (and no smaller in code size).
10548 That is the transformation
10550 ldr rd1, [rbase + offset]
10551 ldr rd2, [rbase + offset + 4]
10555 add rd1, rbase, offset
10556 ldmia rd1, {rd1, rd2}
10558 produces worse code -- '3 cycles + any stalls on rd2' instead of
10559 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10560 access per cycle, the first sequence could never complete in less
10561 than 6 cycles, whereas the ldm sequence would only take 5 and
10562 would make better use of sequential accesses if not hitting the
10565 We cheat here and test 'arm_ld_sched' which we currently know to
10566 only be true for the ARM8, ARM9 and StrongARM. If this ever
10567 changes, then the test below needs to be reworked. */
10568 if (nops == 2 && arm_ld_sched && add_offset != 0)
10571 /* XScale has load-store double instructions, but they have stricter
10572 alignment requirements than load-store multiple, so we cannot
10575 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10576 the pipeline until completion.
10584 An ldr instruction takes 1-3 cycles, but does not block the
10593 Best case ldr will always win. However, the more ldr instructions
10594 we issue, the less likely we are to be able to schedule them well.
10595 Using ldr instructions also increases code size.
10597 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10598 for counts of 3 or 4 regs. */
10599 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10604 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10605 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10606 an array ORDER which describes the sequence to use when accessing the
10607 offsets that produces an ascending order. In this sequence, each
10608 offset must be larger by exactly 4 than the previous one. ORDER[0]
10609 must have been filled in with the lowest offset by the caller.
10610 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10611 we use to verify that ORDER produces an ascending order of registers.
10612 Return true if it was possible to construct such an order, false if
10616 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10617 int *unsorted_regs)
10620 for (i = 1; i < nops; i++)
10624 order[i] = order[i - 1];
10625 for (j = 0; j < nops; j++)
10626 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10628 /* We must find exactly one offset that is higher than the
10629 previous one by 4. */
10630 if (order[i] != order[i - 1])
10634 if (order[i] == order[i - 1])
10636 /* The register numbers must be ascending. */
10637 if (unsorted_regs != NULL
10638 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10644 /* Used to determine in a peephole whether a sequence of load
10645 instructions can be changed into a load-multiple instruction.
10646 NOPS is the number of separate load instructions we are examining. The
10647 first NOPS entries in OPERANDS are the destination registers, the
10648 next NOPS entries are memory operands. If this function is
10649 successful, *BASE is set to the common base register of the memory
10650 accesses; *LOAD_OFFSET is set to the first memory location's offset
10651 from that base register.
10652 REGS is an array filled in with the destination register numbers.
10653 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10654 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10655 the sequence of registers in REGS matches the loads from ascending memory
10656 locations, and the function verifies that the register numbers are
10657 themselves ascending. If CHECK_REGS is false, the register numbers
10658 are stored in the order they are found in the operands. */
10660 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10661 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10663 int unsorted_regs[MAX_LDM_STM_OPS];
10664 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10665 int order[MAX_LDM_STM_OPS];
10666 rtx base_reg_rtx = NULL;
10670 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10671 easily extended if required. */
10672 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10674 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10676 /* Loop over the operands and check that the memory references are
10677 suitable (i.e. immediate offsets from the same base register). At
10678 the same time, extract the target register, and the memory
10680 for (i = 0; i < nops; i++)
10685 /* Convert a subreg of a mem into the mem itself. */
10686 if (GET_CODE (operands[nops + i]) == SUBREG)
10687 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10689 gcc_assert (MEM_P (operands[nops + i]));
10691 /* Don't reorder volatile memory references; it doesn't seem worth
10692 looking for the case where the order is ok anyway. */
10693 if (MEM_VOLATILE_P (operands[nops + i]))
10696 offset = const0_rtx;
10698 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10699 || (GET_CODE (reg) == SUBREG
10700 && REG_P (reg = SUBREG_REG (reg))))
10701 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10702 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10703 || (GET_CODE (reg) == SUBREG
10704 && REG_P (reg = SUBREG_REG (reg))))
10705 && (CONST_INT_P (offset
10706 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10710 base_reg = REGNO (reg);
10711 base_reg_rtx = reg;
10712 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10715 else if (base_reg != (int) REGNO (reg))
10716 /* Not addressed from the same base register. */
10719 unsorted_regs[i] = (REG_P (operands[i])
10720 ? REGNO (operands[i])
10721 : REGNO (SUBREG_REG (operands[i])));
10723 /* If it isn't an integer register, or if it overwrites the
10724 base register but isn't the last insn in the list, then
10725 we can't do this. */
10726 if (unsorted_regs[i] < 0
10727 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10728 || unsorted_regs[i] > 14
10729 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10732 /* Don't allow SP to be loaded unless it is also the base
10733 register. It guarantees that SP is reset correctly when
10734 an LDM instruction is interrupted. Otherwise, we might
10735 end up with a corrupt stack. */
10736 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10739 unsorted_offsets[i] = INTVAL (offset);
10740 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10744 /* Not a suitable memory address. */
10748 /* All the useful information has now been extracted from the
10749 operands into unsorted_regs and unsorted_offsets; additionally,
10750 order[0] has been set to the lowest offset in the list. Sort
10751 the offsets into order, verifying that they are adjacent, and
10752 check that the register numbers are ascending. */
10753 if (!compute_offset_order (nops, unsorted_offsets, order,
10754 check_regs ? unsorted_regs : NULL))
10758 memcpy (saved_order, order, sizeof order);
10764 for (i = 0; i < nops; i++)
10765 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10767 *load_offset = unsorted_offsets[order[0]];
10771 && !peep2_reg_dead_p (nops, base_reg_rtx))
10774 if (unsorted_offsets[order[0]] == 0)
10775 ldm_case = 1; /* ldmia */
10776 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10777 ldm_case = 2; /* ldmib */
10778 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10779 ldm_case = 3; /* ldmda */
10780 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10781 ldm_case = 4; /* ldmdb */
10782 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10783 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10788 if (!multiple_operation_profitable_p (false, nops,
10790 ? unsorted_offsets[order[0]] : 0))
10796 /* Used to determine in a peephole whether a sequence of store instructions can
10797 be changed into a store-multiple instruction.
10798 NOPS is the number of separate store instructions we are examining.
10799 NOPS_TOTAL is the total number of instructions recognized by the peephole
10801 The first NOPS entries in OPERANDS are the source registers, the next
10802 NOPS entries are memory operands. If this function is successful, *BASE is
10803 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10804 to the first memory location's offset from that base register. REGS is an
10805 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10806 likewise filled with the corresponding rtx's.
10807 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10808 numbers to an ascending order of stores.
10809 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10810 from ascending memory locations, and the function verifies that the register
10811 numbers are themselves ascending. If CHECK_REGS is false, the register
10812 numbers are stored in the order they are found in the operands. */
10814 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10815 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10816 HOST_WIDE_INT *load_offset, bool check_regs)
10818 int unsorted_regs[MAX_LDM_STM_OPS];
10819 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10820 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10821 int order[MAX_LDM_STM_OPS];
10823 rtx base_reg_rtx = NULL;
10826 /* Write back of base register is currently only supported for Thumb 1. */
10827 int base_writeback = TARGET_THUMB1;
10829 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10830 easily extended if required. */
10831 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10833 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10835 /* Loop over the operands and check that the memory references are
10836 suitable (i.e. immediate offsets from the same base register). At
10837 the same time, extract the target register, and the memory
10839 for (i = 0; i < nops; i++)
10844 /* Convert a subreg of a mem into the mem itself. */
10845 if (GET_CODE (operands[nops + i]) == SUBREG)
10846 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10848 gcc_assert (MEM_P (operands[nops + i]));
10850 /* Don't reorder volatile memory references; it doesn't seem worth
10851 looking for the case where the order is ok anyway. */
10852 if (MEM_VOLATILE_P (operands[nops + i]))
10855 offset = const0_rtx;
10857 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10858 || (GET_CODE (reg) == SUBREG
10859 && REG_P (reg = SUBREG_REG (reg))))
10860 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10861 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10862 || (GET_CODE (reg) == SUBREG
10863 && REG_P (reg = SUBREG_REG (reg))))
10864 && (CONST_INT_P (offset
10865 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10867 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10868 ? operands[i] : SUBREG_REG (operands[i]));
10869 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10873 base_reg = REGNO (reg);
10874 base_reg_rtx = reg;
10875 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10878 else if (base_reg != (int) REGNO (reg))
10879 /* Not addressed from the same base register. */
10882 /* If it isn't an integer register, then we can't do this. */
10883 if (unsorted_regs[i] < 0
10884 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10885 /* The effects are unpredictable if the base register is
10886 both updated and stored. */
10887 || (base_writeback && unsorted_regs[i] == base_reg)
10888 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10889 || unsorted_regs[i] > 14)
10892 unsorted_offsets[i] = INTVAL (offset);
10893 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10897 /* Not a suitable memory address. */
10901 /* All the useful information has now been extracted from the
10902 operands into unsorted_regs and unsorted_offsets; additionally,
10903 order[0] has been set to the lowest offset in the list. Sort
10904 the offsets into order, verifying that they are adjacent, and
10905 check that the register numbers are ascending. */
10906 if (!compute_offset_order (nops, unsorted_offsets, order,
10907 check_regs ? unsorted_regs : NULL))
10911 memcpy (saved_order, order, sizeof order);
10917 for (i = 0; i < nops; i++)
10919 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10921 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10924 *load_offset = unsorted_offsets[order[0]];
10928 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10931 if (unsorted_offsets[order[0]] == 0)
10932 stm_case = 1; /* stmia */
10933 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10934 stm_case = 2; /* stmib */
10935 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10936 stm_case = 3; /* stmda */
10937 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10938 stm_case = 4; /* stmdb */
10942 if (!multiple_operation_profitable_p (false, nops, 0))
10948 /* Routines for use in generating RTL. */
10950 /* Generate a load-multiple instruction. COUNT is the number of loads in
10951 the instruction; REGS and MEMS are arrays containing the operands.
10952 BASEREG is the base register to be used in addressing the memory operands.
10953 WBACK_OFFSET is nonzero if the instruction should update the base
10957 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10958 HOST_WIDE_INT wback_offset)
10963 if (!multiple_operation_profitable_p (false, count, 0))
10969 for (i = 0; i < count; i++)
10970 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10972 if (wback_offset != 0)
10973 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10975 seq = get_insns ();
10981 result = gen_rtx_PARALLEL (VOIDmode,
10982 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10983 if (wback_offset != 0)
10985 XVECEXP (result, 0, 0)
10986 = gen_rtx_SET (VOIDmode, basereg,
10987 plus_constant (Pmode, basereg, wback_offset));
10992 for (j = 0; i < count; i++, j++)
10993 XVECEXP (result, 0, i)
10994 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10999 /* Generate a store-multiple instruction. COUNT is the number of stores in
11000 the instruction; REGS and MEMS are arrays containing the operands.
11001 BASEREG is the base register to be used in addressing the memory operands.
11002 WBACK_OFFSET is nonzero if the instruction should update the base
11006 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11007 HOST_WIDE_INT wback_offset)
11012 if (GET_CODE (basereg) == PLUS)
11013 basereg = XEXP (basereg, 0);
11015 if (!multiple_operation_profitable_p (false, count, 0))
11021 for (i = 0; i < count; i++)
11022 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11024 if (wback_offset != 0)
11025 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11027 seq = get_insns ();
11033 result = gen_rtx_PARALLEL (VOIDmode,
11034 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11035 if (wback_offset != 0)
11037 XVECEXP (result, 0, 0)
11038 = gen_rtx_SET (VOIDmode, basereg,
11039 plus_constant (Pmode, basereg, wback_offset));
11044 for (j = 0; i < count; i++, j++)
11045 XVECEXP (result, 0, i)
11046 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11051 /* Generate either a load-multiple or a store-multiple instruction. This
11052 function can be used in situations where we can start with a single MEM
11053 rtx and adjust its address upwards.
11054 COUNT is the number of operations in the instruction, not counting a
11055 possible update of the base register. REGS is an array containing the
11057 BASEREG is the base register to be used in addressing the memory operands,
11058 which are constructed from BASEMEM.
11059 WRITE_BACK specifies whether the generated instruction should include an
11060 update of the base register.
11061 OFFSETP is used to pass an offset to and from this function; this offset
11062 is not used when constructing the address (instead BASEMEM should have an
11063 appropriate offset in its address), it is used only for setting
11064 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11067 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11068 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11070 rtx mems[MAX_LDM_STM_OPS];
11071 HOST_WIDE_INT offset = *offsetp;
11074 gcc_assert (count <= MAX_LDM_STM_OPS);
11076 if (GET_CODE (basereg) == PLUS)
11077 basereg = XEXP (basereg, 0);
11079 for (i = 0; i < count; i++)
11081 rtx addr = plus_constant (Pmode, basereg, i * 4);
11082 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11090 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11091 write_back ? 4 * count : 0);
11093 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11094 write_back ? 4 * count : 0);
11098 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11099 rtx basemem, HOST_WIDE_INT *offsetp)
11101 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11106 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11107 rtx basemem, HOST_WIDE_INT *offsetp)
11109 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11113 /* Called from a peephole2 expander to turn a sequence of loads into an
11114 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11115 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11116 is true if we can reorder the registers because they are used commutatively
11118 Returns true iff we could generate a new instruction. */
11121 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11123 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11124 rtx mems[MAX_LDM_STM_OPS];
11125 int i, j, base_reg;
11127 HOST_WIDE_INT offset;
11128 int write_back = FALSE;
11132 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11133 &base_reg, &offset, !sort_regs);
11139 for (i = 0; i < nops - 1; i++)
11140 for (j = i + 1; j < nops; j++)
11141 if (regs[i] > regs[j])
11147 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11151 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11152 gcc_assert (ldm_case == 1 || ldm_case == 5);
11158 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11159 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11161 if (!TARGET_THUMB1)
11163 base_reg = regs[0];
11164 base_reg_rtx = newbase;
11168 for (i = 0; i < nops; i++)
11170 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11171 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11174 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11175 write_back ? offset + i * 4 : 0));
11179 /* Called from a peephole2 expander to turn a sequence of stores into an
11180 STM instruction. OPERANDS are the operands found by the peephole matcher;
11181 NOPS indicates how many separate stores we are trying to combine.
11182 Returns true iff we could generate a new instruction. */
11185 gen_stm_seq (rtx *operands, int nops)
11188 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11189 rtx mems[MAX_LDM_STM_OPS];
11192 HOST_WIDE_INT offset;
11193 int write_back = FALSE;
11196 bool base_reg_dies;
11198 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11199 mem_order, &base_reg, &offset, true);
11204 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11206 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11209 gcc_assert (base_reg_dies);
11215 gcc_assert (base_reg_dies);
11216 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11220 addr = plus_constant (Pmode, base_reg_rtx, offset);
11222 for (i = 0; i < nops; i++)
11224 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11225 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11228 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11229 write_back ? offset + i * 4 : 0));
11233 /* Called from a peephole2 expander to turn a sequence of stores that are
11234 preceded by constant loads into an STM instruction. OPERANDS are the
11235 operands found by the peephole matcher; NOPS indicates how many
11236 separate stores we are trying to combine; there are 2 * NOPS
11237 instructions in the peephole.
11238 Returns true iff we could generate a new instruction. */
11241 gen_const_stm_seq (rtx *operands, int nops)
11243 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11244 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11245 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11246 rtx mems[MAX_LDM_STM_OPS];
11249 HOST_WIDE_INT offset;
11250 int write_back = FALSE;
11253 bool base_reg_dies;
11255 HARD_REG_SET allocated;
11257 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11258 mem_order, &base_reg, &offset, false);
11263 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11265 /* If the same register is used more than once, try to find a free
11267 CLEAR_HARD_REG_SET (allocated);
11268 for (i = 0; i < nops; i++)
11270 for (j = i + 1; j < nops; j++)
11271 if (regs[i] == regs[j])
11273 rtx t = peep2_find_free_register (0, nops * 2,
11274 TARGET_THUMB1 ? "l" : "r",
11275 SImode, &allocated);
11279 regs[i] = REGNO (t);
11283 /* Compute an ordering that maps the register numbers to an ascending
11286 for (i = 0; i < nops; i++)
11287 if (regs[i] < regs[reg_order[0]])
11290 for (i = 1; i < nops; i++)
11292 int this_order = reg_order[i - 1];
11293 for (j = 0; j < nops; j++)
11294 if (regs[j] > regs[reg_order[i - 1]]
11295 && (this_order == reg_order[i - 1]
11296 || regs[j] < regs[this_order]))
11298 reg_order[i] = this_order;
11301 /* Ensure that registers that must be live after the instruction end
11302 up with the correct value. */
11303 for (i = 0; i < nops; i++)
11305 int this_order = reg_order[i];
11306 if ((this_order != mem_order[i]
11307 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11308 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11312 /* Load the constants. */
11313 for (i = 0; i < nops; i++)
11315 rtx op = operands[2 * nops + mem_order[i]];
11316 sorted_regs[i] = regs[reg_order[i]];
11317 emit_move_insn (reg_rtxs[reg_order[i]], op);
11320 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11322 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11325 gcc_assert (base_reg_dies);
11331 gcc_assert (base_reg_dies);
11332 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11336 addr = plus_constant (Pmode, base_reg_rtx, offset);
11338 for (i = 0; i < nops; i++)
11340 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11341 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11344 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11345 write_back ? offset + i * 4 : 0));
11349 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11350 unaligned copies on processors which support unaligned semantics for those
11351 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11352 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11353 An interleave factor of 1 (the minimum) will perform no interleaving.
11354 Load/store multiple are used for aligned addresses where possible. */
11357 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11358 HOST_WIDE_INT length,
11359 unsigned int interleave_factor)
11361 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11362 int *regnos = XALLOCAVEC (int, interleave_factor);
11363 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11364 HOST_WIDE_INT i, j;
11365 HOST_WIDE_INT remaining = length, words;
11366 rtx halfword_tmp = NULL, byte_tmp = NULL;
11368 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11369 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11370 HOST_WIDE_INT srcoffset, dstoffset;
11371 HOST_WIDE_INT src_autoinc, dst_autoinc;
11374 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11376 /* Use hard registers if we have aligned source or destination so we can use
11377 load/store multiple with contiguous registers. */
11378 if (dst_aligned || src_aligned)
11379 for (i = 0; i < interleave_factor; i++)
11380 regs[i] = gen_rtx_REG (SImode, i);
11382 for (i = 0; i < interleave_factor; i++)
11383 regs[i] = gen_reg_rtx (SImode);
11385 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11386 src = copy_addr_to_reg (XEXP (srcbase, 0));
11388 srcoffset = dstoffset = 0;
11390 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11391 For copying the last bytes we want to subtract this offset again. */
11392 src_autoinc = dst_autoinc = 0;
11394 for (i = 0; i < interleave_factor; i++)
11397 /* Copy BLOCK_SIZE_BYTES chunks. */
11399 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11402 if (src_aligned && interleave_factor > 1)
11404 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11405 TRUE, srcbase, &srcoffset));
11406 src_autoinc += UNITS_PER_WORD * interleave_factor;
11410 for (j = 0; j < interleave_factor; j++)
11412 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11414 mem = adjust_automodify_address (srcbase, SImode, addr,
11415 srcoffset + j * UNITS_PER_WORD);
11416 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11418 srcoffset += block_size_bytes;
11422 if (dst_aligned && interleave_factor > 1)
11424 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11425 TRUE, dstbase, &dstoffset));
11426 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11430 for (j = 0; j < interleave_factor; j++)
11432 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11434 mem = adjust_automodify_address (dstbase, SImode, addr,
11435 dstoffset + j * UNITS_PER_WORD);
11436 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11438 dstoffset += block_size_bytes;
11441 remaining -= block_size_bytes;
11444 /* Copy any whole words left (note these aren't interleaved with any
11445 subsequent halfword/byte load/stores in the interests of simplicity). */
11447 words = remaining / UNITS_PER_WORD;
11449 gcc_assert (words < interleave_factor);
11451 if (src_aligned && words > 1)
11453 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11455 src_autoinc += UNITS_PER_WORD * words;
11459 for (j = 0; j < words; j++)
11461 addr = plus_constant (Pmode, src,
11462 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11463 mem = adjust_automodify_address (srcbase, SImode, addr,
11464 srcoffset + j * UNITS_PER_WORD);
11465 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11467 srcoffset += words * UNITS_PER_WORD;
11470 if (dst_aligned && words > 1)
11472 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11474 dst_autoinc += words * UNITS_PER_WORD;
11478 for (j = 0; j < words; j++)
11480 addr = plus_constant (Pmode, dst,
11481 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11482 mem = adjust_automodify_address (dstbase, SImode, addr,
11483 dstoffset + j * UNITS_PER_WORD);
11484 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11486 dstoffset += words * UNITS_PER_WORD;
11489 remaining -= words * UNITS_PER_WORD;
11491 gcc_assert (remaining < 4);
11493 /* Copy a halfword if necessary. */
11495 if (remaining >= 2)
11497 halfword_tmp = gen_reg_rtx (SImode);
11499 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11500 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11501 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11503 /* Either write out immediately, or delay until we've loaded the last
11504 byte, depending on interleave factor. */
11505 if (interleave_factor == 1)
11507 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11508 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11509 emit_insn (gen_unaligned_storehi (mem,
11510 gen_lowpart (HImode, halfword_tmp)));
11511 halfword_tmp = NULL;
11519 gcc_assert (remaining < 2);
11521 /* Copy last byte. */
11523 if ((remaining & 1) != 0)
11525 byte_tmp = gen_reg_rtx (SImode);
11527 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11528 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11529 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11531 if (interleave_factor == 1)
11533 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11534 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11535 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11544 /* Store last halfword if we haven't done so already. */
11548 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11549 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11550 emit_insn (gen_unaligned_storehi (mem,
11551 gen_lowpart (HImode, halfword_tmp)));
11555 /* Likewise for last byte. */
11559 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11560 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11561 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11565 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11568 /* From mips_adjust_block_mem:
11570 Helper function for doing a loop-based block operation on memory
11571 reference MEM. Each iteration of the loop will operate on LENGTH
11574 Create a new base register for use within the loop and point it to
11575 the start of MEM. Create a new memory reference that uses this
11576 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11579 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11582 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11584 /* Although the new mem does not refer to a known location,
11585 it does keep up to LENGTH bytes of alignment. */
11586 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11587 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11590 /* From mips_block_move_loop:
11592 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11593 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11594 the memory regions do not overlap. */
11597 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11598 unsigned int interleave_factor,
11599 HOST_WIDE_INT bytes_per_iter)
11601 rtx label, src_reg, dest_reg, final_src, test;
11602 HOST_WIDE_INT leftover;
11604 leftover = length % bytes_per_iter;
11605 length -= leftover;
11607 /* Create registers and memory references for use within the loop. */
11608 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11609 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11611 /* Calculate the value that SRC_REG should have after the last iteration of
11613 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11614 0, 0, OPTAB_WIDEN);
11616 /* Emit the start of the loop. */
11617 label = gen_label_rtx ();
11618 emit_label (label);
11620 /* Emit the loop body. */
11621 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11622 interleave_factor);
11624 /* Move on to the next block. */
11625 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11626 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11628 /* Emit the loop condition. */
11629 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11630 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11632 /* Mop up any left-over bytes. */
11634 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11637 /* Emit a block move when either the source or destination is unaligned (not
11638 aligned to a four-byte boundary). This may need further tuning depending on
11639 core type, optimize_size setting, etc. */
11642 arm_movmemqi_unaligned (rtx *operands)
11644 HOST_WIDE_INT length = INTVAL (operands[2]);
11648 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11649 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11650 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11651 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11652 or dst_aligned though: allow more interleaving in those cases since the
11653 resulting code can be smaller. */
11654 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11655 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11658 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11659 interleave_factor, bytes_per_iter);
11661 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11662 interleave_factor);
11666 /* Note that the loop created by arm_block_move_unaligned_loop may be
11667 subject to loop unrolling, which makes tuning this condition a little
11670 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11672 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11679 arm_gen_movmemqi (rtx *operands)
11681 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11682 HOST_WIDE_INT srcoffset, dstoffset;
11684 rtx src, dst, srcbase, dstbase;
11685 rtx part_bytes_reg = NULL;
11688 if (!CONST_INT_P (operands[2])
11689 || !CONST_INT_P (operands[3])
11690 || INTVAL (operands[2]) > 64)
11693 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11694 return arm_movmemqi_unaligned (operands);
11696 if (INTVAL (operands[3]) & 3)
11699 dstbase = operands[0];
11700 srcbase = operands[1];
11702 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11703 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11705 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11706 out_words_to_go = INTVAL (operands[2]) / 4;
11707 last_bytes = INTVAL (operands[2]) & 3;
11708 dstoffset = srcoffset = 0;
11710 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11711 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11713 for (i = 0; in_words_to_go >= 2; i+=4)
11715 if (in_words_to_go > 4)
11716 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11717 TRUE, srcbase, &srcoffset));
11719 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11720 src, FALSE, srcbase,
11723 if (out_words_to_go)
11725 if (out_words_to_go > 4)
11726 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11727 TRUE, dstbase, &dstoffset));
11728 else if (out_words_to_go != 1)
11729 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11730 out_words_to_go, dst,
11733 dstbase, &dstoffset));
11736 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11737 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11738 if (last_bytes != 0)
11740 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11746 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11747 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11750 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11751 if (out_words_to_go)
11755 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11756 sreg = copy_to_reg (mem);
11758 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11759 emit_move_insn (mem, sreg);
11762 gcc_assert (!in_words_to_go); /* Sanity check */
11765 if (in_words_to_go)
11767 gcc_assert (in_words_to_go > 0);
11769 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11770 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11773 gcc_assert (!last_bytes || part_bytes_reg);
11775 if (BYTES_BIG_ENDIAN && last_bytes)
11777 rtx tmp = gen_reg_rtx (SImode);
11779 /* The bytes we want are in the top end of the word. */
11780 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11781 GEN_INT (8 * (4 - last_bytes))));
11782 part_bytes_reg = tmp;
11786 mem = adjust_automodify_address (dstbase, QImode,
11787 plus_constant (Pmode, dst,
11789 dstoffset + last_bytes - 1);
11790 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11794 tmp = gen_reg_rtx (SImode);
11795 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11796 part_bytes_reg = tmp;
11803 if (last_bytes > 1)
11805 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11806 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11810 rtx tmp = gen_reg_rtx (SImode);
11811 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11812 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11813 part_bytes_reg = tmp;
11820 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11821 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11828 /* Select a dominance comparison mode if possible for a test of the general
11829 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11830 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11831 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11832 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11833 In all cases OP will be either EQ or NE, but we don't need to know which
11834 here. If we are unable to support a dominance comparison we return
11835 CC mode. This will then fail to match for the RTL expressions that
11836 generate this call. */
11838 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11840 enum rtx_code cond1, cond2;
11843 /* Currently we will probably get the wrong result if the individual
11844 comparisons are not simple. This also ensures that it is safe to
11845 reverse a comparison if necessary. */
11846 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11848 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11852 /* The if_then_else variant of this tests the second condition if the
11853 first passes, but is true if the first fails. Reverse the first
11854 condition to get a true "inclusive-or" expression. */
11855 if (cond_or == DOM_CC_NX_OR_Y)
11856 cond1 = reverse_condition (cond1);
11858 /* If the comparisons are not equal, and one doesn't dominate the other,
11859 then we can't do this. */
11861 && !comparison_dominates_p (cond1, cond2)
11862 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11867 enum rtx_code temp = cond1;
11875 if (cond_or == DOM_CC_X_AND_Y)
11880 case EQ: return CC_DEQmode;
11881 case LE: return CC_DLEmode;
11882 case LEU: return CC_DLEUmode;
11883 case GE: return CC_DGEmode;
11884 case GEU: return CC_DGEUmode;
11885 default: gcc_unreachable ();
11889 if (cond_or == DOM_CC_X_AND_Y)
11901 gcc_unreachable ();
11905 if (cond_or == DOM_CC_X_AND_Y)
11917 gcc_unreachable ();
11921 if (cond_or == DOM_CC_X_AND_Y)
11922 return CC_DLTUmode;
11927 return CC_DLTUmode;
11929 return CC_DLEUmode;
11933 gcc_unreachable ();
11937 if (cond_or == DOM_CC_X_AND_Y)
11938 return CC_DGTUmode;
11943 return CC_DGTUmode;
11945 return CC_DGEUmode;
11949 gcc_unreachable ();
11952 /* The remaining cases only occur when both comparisons are the
11955 gcc_assert (cond1 == cond2);
11959 gcc_assert (cond1 == cond2);
11963 gcc_assert (cond1 == cond2);
11967 gcc_assert (cond1 == cond2);
11968 return CC_DLEUmode;
11971 gcc_assert (cond1 == cond2);
11972 return CC_DGEUmode;
11975 gcc_unreachable ();
11980 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11982 /* All floating point compares return CCFP if it is an equality
11983 comparison, and CCFPE otherwise. */
11984 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12007 gcc_unreachable ();
12011 /* A compare with a shifted operand. Because of canonicalization, the
12012 comparison will have to be swapped when we emit the assembler. */
12013 if (GET_MODE (y) == SImode
12014 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12015 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12016 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12017 || GET_CODE (x) == ROTATERT))
12020 /* This operation is performed swapped, but since we only rely on the Z
12021 flag we don't need an additional mode. */
12022 if (GET_MODE (y) == SImode
12023 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12024 && GET_CODE (x) == NEG
12025 && (op == EQ || op == NE))
12028 /* This is a special case that is used by combine to allow a
12029 comparison of a shifted byte load to be split into a zero-extend
12030 followed by a comparison of the shifted integer (only valid for
12031 equalities and unsigned inequalities). */
12032 if (GET_MODE (x) == SImode
12033 && GET_CODE (x) == ASHIFT
12034 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12035 && GET_CODE (XEXP (x, 0)) == SUBREG
12036 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12037 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12038 && (op == EQ || op == NE
12039 || op == GEU || op == GTU || op == LTU || op == LEU)
12040 && CONST_INT_P (y))
12043 /* A construct for a conditional compare, if the false arm contains
12044 0, then both conditions must be true, otherwise either condition
12045 must be true. Not all conditions are possible, so CCmode is
12046 returned if it can't be done. */
12047 if (GET_CODE (x) == IF_THEN_ELSE
12048 && (XEXP (x, 2) == const0_rtx
12049 || XEXP (x, 2) == const1_rtx)
12050 && COMPARISON_P (XEXP (x, 0))
12051 && COMPARISON_P (XEXP (x, 1)))
12052 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12053 INTVAL (XEXP (x, 2)));
12055 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12056 if (GET_CODE (x) == AND
12057 && (op == EQ || op == NE)
12058 && COMPARISON_P (XEXP (x, 0))
12059 && COMPARISON_P (XEXP (x, 1)))
12060 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12063 if (GET_CODE (x) == IOR
12064 && (op == EQ || op == NE)
12065 && COMPARISON_P (XEXP (x, 0))
12066 && COMPARISON_P (XEXP (x, 1)))
12067 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12070 /* An operation (on Thumb) where we want to test for a single bit.
12071 This is done by shifting that bit up into the top bit of a
12072 scratch register; we can then branch on the sign bit. */
12074 && GET_MODE (x) == SImode
12075 && (op == EQ || op == NE)
12076 && GET_CODE (x) == ZERO_EXTRACT
12077 && XEXP (x, 1) == const1_rtx)
12080 /* An operation that sets the condition codes as a side-effect, the
12081 V flag is not set correctly, so we can only use comparisons where
12082 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12084 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12085 if (GET_MODE (x) == SImode
12087 && (op == EQ || op == NE || op == LT || op == GE)
12088 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12089 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12090 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12091 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12092 || GET_CODE (x) == LSHIFTRT
12093 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12094 || GET_CODE (x) == ROTATERT
12095 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12096 return CC_NOOVmode;
12098 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12101 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12102 && GET_CODE (x) == PLUS
12103 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12106 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12112 /* A DImode comparison against zero can be implemented by
12113 or'ing the two halves together. */
12114 if (y == const0_rtx)
12117 /* We can do an equality test in three Thumb instructions. */
12127 /* DImode unsigned comparisons can be implemented by cmp +
12128 cmpeq without a scratch register. Not worth doing in
12139 /* DImode signed and unsigned comparisons can be implemented
12140 by cmp + sbcs with a scratch register, but that does not
12141 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12142 gcc_assert (op != EQ && op != NE);
12146 gcc_unreachable ();
12150 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12151 return GET_MODE (x);
12156 /* X and Y are two things to compare using CODE. Emit the compare insn and
12157 return the rtx for register 0 in the proper mode. FP means this is a
12158 floating point compare: I don't think that it is needed on the arm. */
12160 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12162 enum machine_mode mode;
12164 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12166 /* We might have X as a constant, Y as a register because of the predicates
12167 used for cmpdi. If so, force X to a register here. */
12168 if (dimode_comparison && !REG_P (x))
12169 x = force_reg (DImode, x);
12171 mode = SELECT_CC_MODE (code, x, y);
12172 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12174 if (dimode_comparison
12175 && mode != CC_CZmode)
12179 /* To compare two non-zero values for equality, XOR them and
12180 then compare against zero. Not used for ARM mode; there
12181 CC_CZmode is cheaper. */
12182 if (mode == CC_Zmode && y != const0_rtx)
12184 gcc_assert (!reload_completed);
12185 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12189 /* A scratch register is required. */
12190 if (reload_completed)
12191 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12193 scratch = gen_rtx_SCRATCH (SImode);
12195 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12196 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12197 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12200 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12205 /* Generate a sequence of insns that will generate the correct return
12206 address mask depending on the physical architecture that the program
12209 arm_gen_return_addr_mask (void)
12211 rtx reg = gen_reg_rtx (Pmode);
12213 emit_insn (gen_return_addr_mask (reg));
12218 arm_reload_in_hi (rtx *operands)
12220 rtx ref = operands[1];
12222 HOST_WIDE_INT offset = 0;
12224 if (GET_CODE (ref) == SUBREG)
12226 offset = SUBREG_BYTE (ref);
12227 ref = SUBREG_REG (ref);
12232 /* We have a pseudo which has been spilt onto the stack; there
12233 are two cases here: the first where there is a simple
12234 stack-slot replacement and a second where the stack-slot is
12235 out of range, or is used as a subreg. */
12236 if (reg_equiv_mem (REGNO (ref)))
12238 ref = reg_equiv_mem (REGNO (ref));
12239 base = find_replacement (&XEXP (ref, 0));
12242 /* The slot is out of range, or was dressed up in a SUBREG. */
12243 base = reg_equiv_address (REGNO (ref));
12246 base = find_replacement (&XEXP (ref, 0));
12248 /* Handle the case where the address is too complex to be offset by 1. */
12249 if (GET_CODE (base) == MINUS
12250 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12252 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12254 emit_set_insn (base_plus, base);
12257 else if (GET_CODE (base) == PLUS)
12259 /* The addend must be CONST_INT, or we would have dealt with it above. */
12260 HOST_WIDE_INT hi, lo;
12262 offset += INTVAL (XEXP (base, 1));
12263 base = XEXP (base, 0);
12265 /* Rework the address into a legal sequence of insns. */
12266 /* Valid range for lo is -4095 -> 4095 */
12269 : -((-offset) & 0xfff));
12271 /* Corner case, if lo is the max offset then we would be out of range
12272 once we have added the additional 1 below, so bump the msb into the
12273 pre-loading insn(s). */
12277 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12278 ^ (HOST_WIDE_INT) 0x80000000)
12279 - (HOST_WIDE_INT) 0x80000000);
12281 gcc_assert (hi + lo == offset);
12285 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12287 /* Get the base address; addsi3 knows how to handle constants
12288 that require more than one insn. */
12289 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12295 /* Operands[2] may overlap operands[0] (though it won't overlap
12296 operands[1]), that's why we asked for a DImode reg -- so we can
12297 use the bit that does not overlap. */
12298 if (REGNO (operands[2]) == REGNO (operands[0]))
12299 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12301 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12303 emit_insn (gen_zero_extendqisi2 (scratch,
12304 gen_rtx_MEM (QImode,
12305 plus_constant (Pmode, base,
12307 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12308 gen_rtx_MEM (QImode,
12309 plus_constant (Pmode, base,
12311 if (!BYTES_BIG_ENDIAN)
12312 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12313 gen_rtx_IOR (SImode,
12316 gen_rtx_SUBREG (SImode, operands[0], 0),
12320 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12321 gen_rtx_IOR (SImode,
12322 gen_rtx_ASHIFT (SImode, scratch,
12324 gen_rtx_SUBREG (SImode, operands[0], 0)));
12327 /* Handle storing a half-word to memory during reload by synthesizing as two
12328 byte stores. Take care not to clobber the input values until after we
12329 have moved them somewhere safe. This code assumes that if the DImode
12330 scratch in operands[2] overlaps either the input value or output address
12331 in some way, then that value must die in this insn (we absolutely need
12332 two scratch registers for some corner cases). */
12334 arm_reload_out_hi (rtx *operands)
12336 rtx ref = operands[0];
12337 rtx outval = operands[1];
12339 HOST_WIDE_INT offset = 0;
12341 if (GET_CODE (ref) == SUBREG)
12343 offset = SUBREG_BYTE (ref);
12344 ref = SUBREG_REG (ref);
12349 /* We have a pseudo which has been spilt onto the stack; there
12350 are two cases here: the first where there is a simple
12351 stack-slot replacement and a second where the stack-slot is
12352 out of range, or is used as a subreg. */
12353 if (reg_equiv_mem (REGNO (ref)))
12355 ref = reg_equiv_mem (REGNO (ref));
12356 base = find_replacement (&XEXP (ref, 0));
12359 /* The slot is out of range, or was dressed up in a SUBREG. */
12360 base = reg_equiv_address (REGNO (ref));
12363 base = find_replacement (&XEXP (ref, 0));
12365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12367 /* Handle the case where the address is too complex to be offset by 1. */
12368 if (GET_CODE (base) == MINUS
12369 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12371 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12373 /* Be careful not to destroy OUTVAL. */
12374 if (reg_overlap_mentioned_p (base_plus, outval))
12376 /* Updating base_plus might destroy outval, see if we can
12377 swap the scratch and base_plus. */
12378 if (!reg_overlap_mentioned_p (scratch, outval))
12381 scratch = base_plus;
12386 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12388 /* Be conservative and copy OUTVAL into the scratch now,
12389 this should only be necessary if outval is a subreg
12390 of something larger than a word. */
12391 /* XXX Might this clobber base? I can't see how it can,
12392 since scratch is known to overlap with OUTVAL, and
12393 must be wider than a word. */
12394 emit_insn (gen_movhi (scratch_hi, outval));
12395 outval = scratch_hi;
12399 emit_set_insn (base_plus, base);
12402 else if (GET_CODE (base) == PLUS)
12404 /* The addend must be CONST_INT, or we would have dealt with it above. */
12405 HOST_WIDE_INT hi, lo;
12407 offset += INTVAL (XEXP (base, 1));
12408 base = XEXP (base, 0);
12410 /* Rework the address into a legal sequence of insns. */
12411 /* Valid range for lo is -4095 -> 4095 */
12414 : -((-offset) & 0xfff));
12416 /* Corner case, if lo is the max offset then we would be out of range
12417 once we have added the additional 1 below, so bump the msb into the
12418 pre-loading insn(s). */
12422 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12423 ^ (HOST_WIDE_INT) 0x80000000)
12424 - (HOST_WIDE_INT) 0x80000000);
12426 gcc_assert (hi + lo == offset);
12430 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12432 /* Be careful not to destroy OUTVAL. */
12433 if (reg_overlap_mentioned_p (base_plus, outval))
12435 /* Updating base_plus might destroy outval, see if we
12436 can swap the scratch and base_plus. */
12437 if (!reg_overlap_mentioned_p (scratch, outval))
12440 scratch = base_plus;
12445 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12447 /* Be conservative and copy outval into scratch now,
12448 this should only be necessary if outval is a
12449 subreg of something larger than a word. */
12450 /* XXX Might this clobber base? I can't see how it
12451 can, since scratch is known to overlap with
12453 emit_insn (gen_movhi (scratch_hi, outval));
12454 outval = scratch_hi;
12458 /* Get the base address; addsi3 knows how to handle constants
12459 that require more than one insn. */
12460 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12466 if (BYTES_BIG_ENDIAN)
12468 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12469 plus_constant (Pmode, base,
12471 gen_lowpart (QImode, outval)));
12472 emit_insn (gen_lshrsi3 (scratch,
12473 gen_rtx_SUBREG (SImode, outval, 0),
12475 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12477 gen_lowpart (QImode, scratch)));
12481 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12483 gen_lowpart (QImode, outval)));
12484 emit_insn (gen_lshrsi3 (scratch,
12485 gen_rtx_SUBREG (SImode, outval, 0),
12487 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12488 plus_constant (Pmode, base,
12490 gen_lowpart (QImode, scratch)));
12494 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12495 (padded to the size of a word) should be passed in a register. */
12498 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12500 if (TARGET_AAPCS_BASED)
12501 return must_pass_in_stack_var_size (mode, type);
12503 return must_pass_in_stack_var_size_or_pad (mode, type);
12507 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12508 Return true if an argument passed on the stack should be padded upwards,
12509 i.e. if the least-significant byte has useful data.
12510 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12511 aggregate types are placed in the lowest memory address. */
12514 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12516 if (!TARGET_AAPCS_BASED)
12517 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12519 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12526 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12527 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12528 register has useful data, and return the opposite if the most
12529 significant byte does. */
12532 arm_pad_reg_upward (enum machine_mode mode,
12533 tree type, int first ATTRIBUTE_UNUSED)
12535 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12537 /* For AAPCS, small aggregates, small fixed-point types,
12538 and small complex types are always padded upwards. */
12541 if ((AGGREGATE_TYPE_P (type)
12542 || TREE_CODE (type) == COMPLEX_TYPE
12543 || FIXED_POINT_TYPE_P (type))
12544 && int_size_in_bytes (type) <= 4)
12549 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12550 && GET_MODE_SIZE (mode) <= 4)
12555 /* Otherwise, use default padding. */
12556 return !BYTES_BIG_ENDIAN;
12559 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12560 assuming that the address in the base register is word aligned. */
12562 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12564 HOST_WIDE_INT max_offset;
12566 /* Offset must be a multiple of 4 in Thumb mode. */
12567 if (TARGET_THUMB2 && ((offset & 3) != 0))
12572 else if (TARGET_ARM)
12577 return ((offset <= max_offset) && (offset >= -max_offset));
12580 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12581 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12582 Assumes that the address in the base register RN is word aligned. Pattern
12583 guarantees that both memory accesses use the same base register,
12584 the offsets are constants within the range, and the gap between the offsets is 4.
12585 If preload complete then check that registers are legal. WBACK indicates whether
12586 address is updated. LOAD indicates whether memory access is load or store. */
12588 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12589 bool wback, bool load)
12591 unsigned int t, t2, n;
12593 if (!reload_completed)
12596 if (!offset_ok_for_ldrd_strd (offset))
12603 if ((TARGET_THUMB2)
12604 && ((wback && (n == t || n == t2))
12605 || (t == SP_REGNUM)
12606 || (t == PC_REGNUM)
12607 || (t2 == SP_REGNUM)
12608 || (t2 == PC_REGNUM)
12609 || (!load && (n == PC_REGNUM))
12610 || (load && (t == t2))
12611 /* Triggers Cortex-M3 LDRD errata. */
12612 || (!wback && load && fix_cm3_ldrd && (n == t))))
12616 && ((wback && (n == t || n == t2))
12617 || (t2 == PC_REGNUM)
12618 || (t % 2 != 0) /* First destination register is not even. */
12620 /* PC can be used as base register (for offset addressing only),
12621 but it is depricated. */
12622 || (n == PC_REGNUM)))
12629 /* Print a symbolic form of X to the debug file, F. */
12631 arm_print_value (FILE *f, rtx x)
12633 switch (GET_CODE (x))
12636 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12640 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12648 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12650 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12651 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12659 fprintf (f, "\"%s\"", XSTR (x, 0));
12663 fprintf (f, "`%s'", XSTR (x, 0));
12667 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12671 arm_print_value (f, XEXP (x, 0));
12675 arm_print_value (f, XEXP (x, 0));
12677 arm_print_value (f, XEXP (x, 1));
12685 fprintf (f, "????");
12690 /* Routines for manipulation of the constant pool. */
12692 /* Arm instructions cannot load a large constant directly into a
12693 register; they have to come from a pc relative load. The constant
12694 must therefore be placed in the addressable range of the pc
12695 relative load. Depending on the precise pc relative load
12696 instruction the range is somewhere between 256 bytes and 4k. This
12697 means that we often have to dump a constant inside a function, and
12698 generate code to branch around it.
12700 It is important to minimize this, since the branches will slow
12701 things down and make the code larger.
12703 Normally we can hide the table after an existing unconditional
12704 branch so that there is no interruption of the flow, but in the
12705 worst case the code looks like this:
12723 We fix this by performing a scan after scheduling, which notices
12724 which instructions need to have their operands fetched from the
12725 constant table and builds the table.
12727 The algorithm starts by building a table of all the constants that
12728 need fixing up and all the natural barriers in the function (places
12729 where a constant table can be dropped without breaking the flow).
12730 For each fixup we note how far the pc-relative replacement will be
12731 able to reach and the offset of the instruction into the function.
12733 Having built the table we then group the fixes together to form
12734 tables that are as large as possible (subject to addressing
12735 constraints) and emit each table of constants after the last
12736 barrier that is within range of all the instructions in the group.
12737 If a group does not contain a barrier, then we forcibly create one
12738 by inserting a jump instruction into the flow. Once the table has
12739 been inserted, the insns are then modified to reference the
12740 relevant entry in the pool.
12742 Possible enhancements to the algorithm (not implemented) are:
12744 1) For some processors and object formats, there may be benefit in
12745 aligning the pools to the start of cache lines; this alignment
12746 would need to be taken into account when calculating addressability
12749 /* These typedefs are located at the start of this file, so that
12750 they can be used in the prototypes there. This comment is to
12751 remind readers of that fact so that the following structures
12752 can be understood more easily.
12754 typedef struct minipool_node Mnode;
12755 typedef struct minipool_fixup Mfix; */
12757 struct minipool_node
12759 /* Doubly linked chain of entries. */
12762 /* The maximum offset into the code that this entry can be placed. While
12763 pushing fixes for forward references, all entries are sorted in order
12764 of increasing max_address. */
12765 HOST_WIDE_INT max_address;
12766 /* Similarly for an entry inserted for a backwards ref. */
12767 HOST_WIDE_INT min_address;
12768 /* The number of fixes referencing this entry. This can become zero
12769 if we "unpush" an entry. In this case we ignore the entry when we
12770 come to emit the code. */
12772 /* The offset from the start of the minipool. */
12773 HOST_WIDE_INT offset;
12774 /* The value in table. */
12776 /* The mode of value. */
12777 enum machine_mode mode;
12778 /* The size of the value. With iWMMXt enabled
12779 sizes > 4 also imply an alignment of 8-bytes. */
12783 struct minipool_fixup
12787 HOST_WIDE_INT address;
12789 enum machine_mode mode;
12793 HOST_WIDE_INT forwards;
12794 HOST_WIDE_INT backwards;
12797 /* Fixes less than a word need padding out to a word boundary. */
12798 #define MINIPOOL_FIX_SIZE(mode) \
12799 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12801 static Mnode * minipool_vector_head;
12802 static Mnode * minipool_vector_tail;
12803 static rtx minipool_vector_label;
12804 static int minipool_pad;
12806 /* The linked list of all minipool fixes required for this function. */
12807 Mfix * minipool_fix_head;
12808 Mfix * minipool_fix_tail;
12809 /* The fix entry for the current minipool, once it has been placed. */
12810 Mfix * minipool_barrier;
12812 /* Determines if INSN is the start of a jump table. Returns the end
12813 of the TABLE or NULL_RTX. */
12815 is_jump_table (rtx insn)
12819 if (jump_to_label_p (insn)
12820 && ((table = next_real_insn (JUMP_LABEL (insn)))
12821 == next_real_insn (insn))
12824 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12825 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12831 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12832 #define JUMP_TABLES_IN_TEXT_SECTION 0
12835 static HOST_WIDE_INT
12836 get_jump_table_size (rtx insn)
12838 /* ADDR_VECs only take room if read-only data does into the text
12840 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12842 rtx body = PATTERN (insn);
12843 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12844 HOST_WIDE_INT size;
12845 HOST_WIDE_INT modesize;
12847 modesize = GET_MODE_SIZE (GET_MODE (body));
12848 size = modesize * XVECLEN (body, elt);
12852 /* Round up size of TBB table to a halfword boundary. */
12853 size = (size + 1) & ~(HOST_WIDE_INT)1;
12856 /* No padding necessary for TBH. */
12859 /* Add two bytes for alignment on Thumb. */
12864 gcc_unreachable ();
12872 /* Return the maximum amount of padding that will be inserted before
12875 static HOST_WIDE_INT
12876 get_label_padding (rtx label)
12878 HOST_WIDE_INT align, min_insn_size;
12880 align = 1 << label_to_alignment (label);
12881 min_insn_size = TARGET_THUMB ? 2 : 4;
12882 return align > min_insn_size ? align - min_insn_size : 0;
12885 /* Move a minipool fix MP from its current location to before MAX_MP.
12886 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12887 constraints may need updating. */
12889 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12890 HOST_WIDE_INT max_address)
12892 /* The code below assumes these are different. */
12893 gcc_assert (mp != max_mp);
12895 if (max_mp == NULL)
12897 if (max_address < mp->max_address)
12898 mp->max_address = max_address;
12902 if (max_address > max_mp->max_address - mp->fix_size)
12903 mp->max_address = max_mp->max_address - mp->fix_size;
12905 mp->max_address = max_address;
12907 /* Unlink MP from its current position. Since max_mp is non-null,
12908 mp->prev must be non-null. */
12909 mp->prev->next = mp->next;
12910 if (mp->next != NULL)
12911 mp->next->prev = mp->prev;
12913 minipool_vector_tail = mp->prev;
12915 /* Re-insert it before MAX_MP. */
12917 mp->prev = max_mp->prev;
12920 if (mp->prev != NULL)
12921 mp->prev->next = mp;
12923 minipool_vector_head = mp;
12926 /* Save the new entry. */
12929 /* Scan over the preceding entries and adjust their addresses as
12931 while (mp->prev != NULL
12932 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12934 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12941 /* Add a constant to the minipool for a forward reference. Returns the
12942 node added or NULL if the constant will not fit in this pool. */
12944 add_minipool_forward_ref (Mfix *fix)
12946 /* If set, max_mp is the first pool_entry that has a lower
12947 constraint than the one we are trying to add. */
12948 Mnode * max_mp = NULL;
12949 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12952 /* If the minipool starts before the end of FIX->INSN then this FIX
12953 can not be placed into the current pool. Furthermore, adding the
12954 new constant pool entry may cause the pool to start FIX_SIZE bytes
12956 if (minipool_vector_head &&
12957 (fix->address + get_attr_length (fix->insn)
12958 >= minipool_vector_head->max_address - fix->fix_size))
12961 /* Scan the pool to see if a constant with the same value has
12962 already been added. While we are doing this, also note the
12963 location where we must insert the constant if it doesn't already
12965 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12967 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12968 && fix->mode == mp->mode
12969 && (!LABEL_P (fix->value)
12970 || (CODE_LABEL_NUMBER (fix->value)
12971 == CODE_LABEL_NUMBER (mp->value)))
12972 && rtx_equal_p (fix->value, mp->value))
12974 /* More than one fix references this entry. */
12976 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12979 /* Note the insertion point if necessary. */
12981 && mp->max_address > max_address)
12984 /* If we are inserting an 8-bytes aligned quantity and
12985 we have not already found an insertion point, then
12986 make sure that all such 8-byte aligned quantities are
12987 placed at the start of the pool. */
12988 if (ARM_DOUBLEWORD_ALIGN
12990 && fix->fix_size >= 8
12991 && mp->fix_size < 8)
12994 max_address = mp->max_address;
12998 /* The value is not currently in the minipool, so we need to create
12999 a new entry for it. If MAX_MP is NULL, the entry will be put on
13000 the end of the list since the placement is less constrained than
13001 any existing entry. Otherwise, we insert the new fix before
13002 MAX_MP and, if necessary, adjust the constraints on the other
13005 mp->fix_size = fix->fix_size;
13006 mp->mode = fix->mode;
13007 mp->value = fix->value;
13009 /* Not yet required for a backwards ref. */
13010 mp->min_address = -65536;
13012 if (max_mp == NULL)
13014 mp->max_address = max_address;
13016 mp->prev = minipool_vector_tail;
13018 if (mp->prev == NULL)
13020 minipool_vector_head = mp;
13021 minipool_vector_label = gen_label_rtx ();
13024 mp->prev->next = mp;
13026 minipool_vector_tail = mp;
13030 if (max_address > max_mp->max_address - mp->fix_size)
13031 mp->max_address = max_mp->max_address - mp->fix_size;
13033 mp->max_address = max_address;
13036 mp->prev = max_mp->prev;
13038 if (mp->prev != NULL)
13039 mp->prev->next = mp;
13041 minipool_vector_head = mp;
13044 /* Save the new entry. */
13047 /* Scan over the preceding entries and adjust their addresses as
13049 while (mp->prev != NULL
13050 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13052 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13060 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13061 HOST_WIDE_INT min_address)
13063 HOST_WIDE_INT offset;
13065 /* The code below assumes these are different. */
13066 gcc_assert (mp != min_mp);
13068 if (min_mp == NULL)
13070 if (min_address > mp->min_address)
13071 mp->min_address = min_address;
13075 /* We will adjust this below if it is too loose. */
13076 mp->min_address = min_address;
13078 /* Unlink MP from its current position. Since min_mp is non-null,
13079 mp->next must be non-null. */
13080 mp->next->prev = mp->prev;
13081 if (mp->prev != NULL)
13082 mp->prev->next = mp->next;
13084 minipool_vector_head = mp->next;
13086 /* Reinsert it after MIN_MP. */
13088 mp->next = min_mp->next;
13090 if (mp->next != NULL)
13091 mp->next->prev = mp;
13093 minipool_vector_tail = mp;
13099 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13101 mp->offset = offset;
13102 if (mp->refcount > 0)
13103 offset += mp->fix_size;
13105 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13106 mp->next->min_address = mp->min_address + mp->fix_size;
13112 /* Add a constant to the minipool for a backward reference. Returns the
13113 node added or NULL if the constant will not fit in this pool.
13115 Note that the code for insertion for a backwards reference can be
13116 somewhat confusing because the calculated offsets for each fix do
13117 not take into account the size of the pool (which is still under
13120 add_minipool_backward_ref (Mfix *fix)
13122 /* If set, min_mp is the last pool_entry that has a lower constraint
13123 than the one we are trying to add. */
13124 Mnode *min_mp = NULL;
13125 /* This can be negative, since it is only a constraint. */
13126 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13129 /* If we can't reach the current pool from this insn, or if we can't
13130 insert this entry at the end of the pool without pushing other
13131 fixes out of range, then we don't try. This ensures that we
13132 can't fail later on. */
13133 if (min_address >= minipool_barrier->address
13134 || (minipool_vector_tail->min_address + fix->fix_size
13135 >= minipool_barrier->address))
13138 /* Scan the pool to see if a constant with the same value has
13139 already been added. While we are doing this, also note the
13140 location where we must insert the constant if it doesn't already
13142 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13144 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13145 && fix->mode == mp->mode
13146 && (!LABEL_P (fix->value)
13147 || (CODE_LABEL_NUMBER (fix->value)
13148 == CODE_LABEL_NUMBER (mp->value)))
13149 && rtx_equal_p (fix->value, mp->value)
13150 /* Check that there is enough slack to move this entry to the
13151 end of the table (this is conservative). */
13152 && (mp->max_address
13153 > (minipool_barrier->address
13154 + minipool_vector_tail->offset
13155 + minipool_vector_tail->fix_size)))
13158 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13161 if (min_mp != NULL)
13162 mp->min_address += fix->fix_size;
13165 /* Note the insertion point if necessary. */
13166 if (mp->min_address < min_address)
13168 /* For now, we do not allow the insertion of 8-byte alignment
13169 requiring nodes anywhere but at the start of the pool. */
13170 if (ARM_DOUBLEWORD_ALIGN
13171 && fix->fix_size >= 8 && mp->fix_size < 8)
13176 else if (mp->max_address
13177 < minipool_barrier->address + mp->offset + fix->fix_size)
13179 /* Inserting before this entry would push the fix beyond
13180 its maximum address (which can happen if we have
13181 re-located a forwards fix); force the new fix to come
13183 if (ARM_DOUBLEWORD_ALIGN
13184 && fix->fix_size >= 8 && mp->fix_size < 8)
13189 min_address = mp->min_address + fix->fix_size;
13192 /* Do not insert a non-8-byte aligned quantity before 8-byte
13193 aligned quantities. */
13194 else if (ARM_DOUBLEWORD_ALIGN
13195 && fix->fix_size < 8
13196 && mp->fix_size >= 8)
13199 min_address = mp->min_address + fix->fix_size;
13204 /* We need to create a new entry. */
13206 mp->fix_size = fix->fix_size;
13207 mp->mode = fix->mode;
13208 mp->value = fix->value;
13210 mp->max_address = minipool_barrier->address + 65536;
13212 mp->min_address = min_address;
13214 if (min_mp == NULL)
13217 mp->next = minipool_vector_head;
13219 if (mp->next == NULL)
13221 minipool_vector_tail = mp;
13222 minipool_vector_label = gen_label_rtx ();
13225 mp->next->prev = mp;
13227 minipool_vector_head = mp;
13231 mp->next = min_mp->next;
13235 if (mp->next != NULL)
13236 mp->next->prev = mp;
13238 minipool_vector_tail = mp;
13241 /* Save the new entry. */
13249 /* Scan over the following entries and adjust their offsets. */
13250 while (mp->next != NULL)
13252 if (mp->next->min_address < mp->min_address + mp->fix_size)
13253 mp->next->min_address = mp->min_address + mp->fix_size;
13256 mp->next->offset = mp->offset + mp->fix_size;
13258 mp->next->offset = mp->offset;
13267 assign_minipool_offsets (Mfix *barrier)
13269 HOST_WIDE_INT offset = 0;
13272 minipool_barrier = barrier;
13274 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13276 mp->offset = offset;
13278 if (mp->refcount > 0)
13279 offset += mp->fix_size;
13283 /* Output the literal table */
13285 dump_minipool (rtx scan)
13291 if (ARM_DOUBLEWORD_ALIGN)
13292 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13293 if (mp->refcount > 0 && mp->fix_size >= 8)
13300 fprintf (dump_file,
13301 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13302 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13304 scan = emit_label_after (gen_label_rtx (), scan);
13305 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13306 scan = emit_label_after (minipool_vector_label, scan);
13308 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13310 if (mp->refcount > 0)
13314 fprintf (dump_file,
13315 ";; Offset %u, min %ld, max %ld ",
13316 (unsigned) mp->offset, (unsigned long) mp->min_address,
13317 (unsigned long) mp->max_address);
13318 arm_print_value (dump_file, mp->value);
13319 fputc ('\n', dump_file);
13322 switch (mp->fix_size)
13324 #ifdef HAVE_consttable_1
13326 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13330 #ifdef HAVE_consttable_2
13332 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13336 #ifdef HAVE_consttable_4
13338 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13342 #ifdef HAVE_consttable_8
13344 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13348 #ifdef HAVE_consttable_16
13350 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13355 gcc_unreachable ();
13363 minipool_vector_head = minipool_vector_tail = NULL;
13364 scan = emit_insn_after (gen_consttable_end (), scan);
13365 scan = emit_barrier_after (scan);
13368 /* Return the cost of forcibly inserting a barrier after INSN. */
13370 arm_barrier_cost (rtx insn)
13372 /* Basing the location of the pool on the loop depth is preferable,
13373 but at the moment, the basic block information seems to be
13374 corrupt by this stage of the compilation. */
13375 int base_cost = 50;
13376 rtx next = next_nonnote_insn (insn);
13378 if (next != NULL && LABEL_P (next))
13381 switch (GET_CODE (insn))
13384 /* It will always be better to place the table before the label, rather
13393 return base_cost - 10;
13396 return base_cost + 10;
13400 /* Find the best place in the insn stream in the range
13401 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13402 Create the barrier by inserting a jump and add a new fix entry for
13405 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13407 HOST_WIDE_INT count = 0;
13409 rtx from = fix->insn;
13410 /* The instruction after which we will insert the jump. */
13411 rtx selected = NULL;
13413 /* The address at which the jump instruction will be placed. */
13414 HOST_WIDE_INT selected_address;
13416 HOST_WIDE_INT max_count = max_address - fix->address;
13417 rtx label = gen_label_rtx ();
13419 selected_cost = arm_barrier_cost (from);
13420 selected_address = fix->address;
13422 while (from && count < max_count)
13427 /* This code shouldn't have been called if there was a natural barrier
13429 gcc_assert (!BARRIER_P (from));
13431 /* Count the length of this insn. This must stay in sync with the
13432 code that pushes minipool fixes. */
13433 if (LABEL_P (from))
13434 count += get_label_padding (from);
13436 count += get_attr_length (from);
13438 /* If there is a jump table, add its length. */
13439 tmp = is_jump_table (from);
13442 count += get_jump_table_size (tmp);
13444 /* Jump tables aren't in a basic block, so base the cost on
13445 the dispatch insn. If we select this location, we will
13446 still put the pool after the table. */
13447 new_cost = arm_barrier_cost (from);
13449 if (count < max_count
13450 && (!selected || new_cost <= selected_cost))
13453 selected_cost = new_cost;
13454 selected_address = fix->address + count;
13457 /* Continue after the dispatch table. */
13458 from = NEXT_INSN (tmp);
13462 new_cost = arm_barrier_cost (from);
13464 if (count < max_count
13465 && (!selected || new_cost <= selected_cost))
13468 selected_cost = new_cost;
13469 selected_address = fix->address + count;
13472 from = NEXT_INSN (from);
13475 /* Make sure that we found a place to insert the jump. */
13476 gcc_assert (selected);
13478 /* Make sure we do not split a call and its corresponding
13479 CALL_ARG_LOCATION note. */
13480 if (CALL_P (selected))
13482 rtx next = NEXT_INSN (selected);
13483 if (next && NOTE_P (next)
13484 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13488 /* Create a new JUMP_INSN that branches around a barrier. */
13489 from = emit_jump_insn_after (gen_jump (label), selected);
13490 JUMP_LABEL (from) = label;
13491 barrier = emit_barrier_after (from);
13492 emit_label_after (label, barrier);
13494 /* Create a minipool barrier entry for the new barrier. */
13495 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13496 new_fix->insn = barrier;
13497 new_fix->address = selected_address;
13498 new_fix->next = fix->next;
13499 fix->next = new_fix;
13504 /* Record that there is a natural barrier in the insn stream at
13507 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13509 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13512 fix->address = address;
13515 if (minipool_fix_head != NULL)
13516 minipool_fix_tail->next = fix;
13518 minipool_fix_head = fix;
13520 minipool_fix_tail = fix;
13523 /* Record INSN, which will need fixing up to load a value from the
13524 minipool. ADDRESS is the offset of the insn since the start of the
13525 function; LOC is a pointer to the part of the insn which requires
13526 fixing; VALUE is the constant that must be loaded, which is of type
13529 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13530 enum machine_mode mode, rtx value)
13532 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13535 fix->address = address;
13538 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13539 fix->value = value;
13540 fix->forwards = get_attr_pool_range (insn);
13541 fix->backwards = get_attr_neg_pool_range (insn);
13542 fix->minipool = NULL;
13544 /* If an insn doesn't have a range defined for it, then it isn't
13545 expecting to be reworked by this code. Better to stop now than
13546 to generate duff assembly code. */
13547 gcc_assert (fix->forwards || fix->backwards);
13549 /* If an entry requires 8-byte alignment then assume all constant pools
13550 require 4 bytes of padding. Trying to do this later on a per-pool
13551 basis is awkward because existing pool entries have to be modified. */
13552 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13557 fprintf (dump_file,
13558 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13559 GET_MODE_NAME (mode),
13560 INSN_UID (insn), (unsigned long) address,
13561 -1 * (long)fix->backwards, (long)fix->forwards);
13562 arm_print_value (dump_file, fix->value);
13563 fprintf (dump_file, "\n");
13566 /* Add it to the chain of fixes. */
13569 if (minipool_fix_head != NULL)
13570 minipool_fix_tail->next = fix;
13572 minipool_fix_head = fix;
13574 minipool_fix_tail = fix;
13577 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13578 Returns the number of insns needed, or 99 if we don't know how to
13581 arm_const_double_inline_cost (rtx val)
13583 rtx lowpart, highpart;
13584 enum machine_mode mode;
13586 mode = GET_MODE (val);
13588 if (mode == VOIDmode)
13591 gcc_assert (GET_MODE_SIZE (mode) == 8);
13593 lowpart = gen_lowpart (SImode, val);
13594 highpart = gen_highpart_mode (SImode, mode, val);
13596 gcc_assert (CONST_INT_P (lowpart));
13597 gcc_assert (CONST_INT_P (highpart));
13599 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13600 NULL_RTX, NULL_RTX, 0, 0)
13601 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13602 NULL_RTX, NULL_RTX, 0, 0));
13605 /* Return true if it is worthwhile to split a 64-bit constant into two
13606 32-bit operations. This is the case if optimizing for size, or
13607 if we have load delay slots, or if one 32-bit part can be done with
13608 a single data operation. */
13610 arm_const_double_by_parts (rtx val)
13612 enum machine_mode mode = GET_MODE (val);
13615 if (optimize_size || arm_ld_sched)
13618 if (mode == VOIDmode)
13621 part = gen_highpart_mode (SImode, mode, val);
13623 gcc_assert (CONST_INT_P (part));
13625 if (const_ok_for_arm (INTVAL (part))
13626 || const_ok_for_arm (~INTVAL (part)))
13629 part = gen_lowpart (SImode, val);
13631 gcc_assert (CONST_INT_P (part));
13633 if (const_ok_for_arm (INTVAL (part))
13634 || const_ok_for_arm (~INTVAL (part)))
13640 /* Return true if it is possible to inline both the high and low parts
13641 of a 64-bit constant into 32-bit data processing instructions. */
13643 arm_const_double_by_immediates (rtx val)
13645 enum machine_mode mode = GET_MODE (val);
13648 if (mode == VOIDmode)
13651 part = gen_highpart_mode (SImode, mode, val);
13653 gcc_assert (CONST_INT_P (part));
13655 if (!const_ok_for_arm (INTVAL (part)))
13658 part = gen_lowpart (SImode, val);
13660 gcc_assert (CONST_INT_P (part));
13662 if (!const_ok_for_arm (INTVAL (part)))
13668 /* Scan INSN and note any of its operands that need fixing.
13669 If DO_PUSHES is false we do not actually push any of the fixups
13672 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13676 extract_insn (insn);
13678 if (!constrain_operands (1))
13679 fatal_insn_not_found (insn);
13681 if (recog_data.n_alternatives == 0)
13684 /* Fill in recog_op_alt with information about the constraints of
13686 preprocess_constraints ();
13688 for (opno = 0; opno < recog_data.n_operands; opno++)
13690 /* Things we need to fix can only occur in inputs. */
13691 if (recog_data.operand_type[opno] != OP_IN)
13694 /* If this alternative is a memory reference, then any mention
13695 of constants in this alternative is really to fool reload
13696 into allowing us to accept one there. We need to fix them up
13697 now so that we output the right code. */
13698 if (recog_op_alt[opno][which_alternative].memory_ok)
13700 rtx op = recog_data.operand[opno];
13702 if (CONSTANT_P (op))
13705 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13706 recog_data.operand_mode[opno], op);
13708 else if (MEM_P (op)
13709 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13710 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13714 rtx cop = avoid_constant_pool_reference (op);
13716 /* Casting the address of something to a mode narrower
13717 than a word can cause avoid_constant_pool_reference()
13718 to return the pool reference itself. That's no good to
13719 us here. Lets just hope that we can use the
13720 constant pool value directly. */
13722 cop = get_pool_constant (XEXP (op, 0));
13724 push_minipool_fix (insn, address,
13725 recog_data.operand_loc[opno],
13726 recog_data.operand_mode[opno], cop);
13736 /* Rewrite move insn into subtract of 0 if the condition codes will
13737 be useful in next conditional jump insn. */
13740 thumb1_reorg (void)
13746 rtx set, dest, src;
13748 rtx prev, insn = BB_END (bb);
13750 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
13751 insn = PREV_INSN (insn);
13753 /* Find the last cbranchsi4_insn in basic block BB. */
13754 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
13757 /* Find the first non-note insn before INSN in basic block BB. */
13758 gcc_assert (insn != BB_HEAD (bb));
13759 prev = PREV_INSN (insn);
13760 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
13761 prev = PREV_INSN (prev);
13763 set = single_set (prev);
13767 dest = SET_DEST (set);
13768 src = SET_SRC (set);
13769 if (!low_register_operand (dest, SImode)
13770 || !low_register_operand (src, SImode))
13773 pat = PATTERN (insn);
13774 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
13775 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
13776 in INSN. Don't need to check dest since cprop_hardreg pass propagates
13778 if (REGNO (op0) == REGNO (src))
13780 dest = copy_rtx (dest);
13781 src = copy_rtx (src);
13782 src = gen_rtx_MINUS (SImode, src, const0_rtx);
13783 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
13784 INSN_CODE (prev) = -1;
13785 /* Set test register in INSN to dest. */
13786 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
13787 INSN_CODE (insn) = -1;
13792 /* Convert instructions to their cc-clobbering variant if possible, since
13793 that allows us to use smaller encodings. */
13796 thumb2_reorg (void)
13801 INIT_REG_SET (&live);
13803 /* We are freeing block_for_insn in the toplev to keep compatibility
13804 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13805 compute_bb_for_insn ();
13812 COPY_REG_SET (&live, DF_LR_OUT (bb));
13813 df_simulate_initialize_backwards (bb, &live);
13814 FOR_BB_INSNS_REVERSE (bb, insn)
13816 if (NONJUMP_INSN_P (insn)
13817 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13818 && GET_CODE (PATTERN (insn)) == SET)
13820 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13821 rtx pat = PATTERN (insn);
13822 rtx dst = XEXP (pat, 0);
13823 rtx src = XEXP (pat, 1);
13824 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13826 if (!OBJECT_P (src))
13827 op0 = XEXP (src, 0);
13829 if (BINARY_P (src))
13830 op1 = XEXP (src, 1);
13832 if (low_register_operand (dst, SImode))
13834 switch (GET_CODE (src))
13837 /* Adding two registers and storing the result
13838 in the first source is already a 16-bit
13840 if (rtx_equal_p (dst, op0)
13841 && register_operand (op1, SImode))
13844 if (low_register_operand (op0, SImode))
13846 /* ADDS <Rd>,<Rn>,<Rm> */
13847 if (low_register_operand (op1, SImode))
13849 /* ADDS <Rdn>,#<imm8> */
13850 /* SUBS <Rdn>,#<imm8> */
13851 else if (rtx_equal_p (dst, op0)
13852 && CONST_INT_P (op1)
13853 && IN_RANGE (INTVAL (op1), -255, 255))
13855 /* ADDS <Rd>,<Rn>,#<imm3> */
13856 /* SUBS <Rd>,<Rn>,#<imm3> */
13857 else if (CONST_INT_P (op1)
13858 && IN_RANGE (INTVAL (op1), -7, 7))
13864 /* RSBS <Rd>,<Rn>,#0
13865 Not handled here: see NEG below. */
13866 /* SUBS <Rd>,<Rn>,#<imm3>
13868 Not handled here: see PLUS above. */
13869 /* SUBS <Rd>,<Rn>,<Rm> */
13870 if (low_register_operand (op0, SImode)
13871 && low_register_operand (op1, SImode))
13876 /* MULS <Rdm>,<Rn>,<Rdm>
13877 As an exception to the rule, this is only used
13878 when optimizing for size since MULS is slow on all
13879 known implementations. We do not even want to use
13880 MULS in cold code, if optimizing for speed, so we
13881 test the global flag here. */
13882 if (!optimize_size)
13884 /* else fall through. */
13888 /* ANDS <Rdn>,<Rm> */
13889 if (rtx_equal_p (dst, op0)
13890 && low_register_operand (op1, SImode))
13892 else if (rtx_equal_p (dst, op1)
13893 && low_register_operand (op0, SImode))
13894 action = SWAP_CONV;
13900 /* ASRS <Rdn>,<Rm> */
13901 /* LSRS <Rdn>,<Rm> */
13902 /* LSLS <Rdn>,<Rm> */
13903 if (rtx_equal_p (dst, op0)
13904 && low_register_operand (op1, SImode))
13906 /* ASRS <Rd>,<Rm>,#<imm5> */
13907 /* LSRS <Rd>,<Rm>,#<imm5> */
13908 /* LSLS <Rd>,<Rm>,#<imm5> */
13909 else if (low_register_operand (op0, SImode)
13910 && CONST_INT_P (op1)
13911 && IN_RANGE (INTVAL (op1), 0, 31))
13916 /* RORS <Rdn>,<Rm> */
13917 if (rtx_equal_p (dst, op0)
13918 && low_register_operand (op1, SImode))
13924 /* MVNS <Rd>,<Rm> */
13925 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13926 if (low_register_operand (op0, SImode))
13931 /* MOVS <Rd>,#<imm8> */
13932 if (CONST_INT_P (src)
13933 && IN_RANGE (INTVAL (src), 0, 255))
13938 /* MOVS and MOV<c> with registers have different
13939 encodings, so are not relevant here. */
13947 if (action != SKIP)
13949 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13950 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13953 if (action == SWAP_CONV)
13955 src = copy_rtx (src);
13956 XEXP (src, 0) = op1;
13957 XEXP (src, 1) = op0;
13958 pat = gen_rtx_SET (VOIDmode, dst, src);
13959 vec = gen_rtvec (2, pat, clobber);
13961 else /* action == CONV */
13962 vec = gen_rtvec (2, pat, clobber);
13964 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13965 INSN_CODE (insn) = -1;
13969 if (NONDEBUG_INSN_P (insn))
13970 df_simulate_one_insn_backwards (bb, insn, &live);
13974 CLEAR_REG_SET (&live);
13977 /* Gcc puts the pool in the wrong place for ARM, since we can only
13978 load addresses a limited distance around the pc. We do some
13979 special munging to move the constant pool values to the correct
13980 point in the code. */
13985 HOST_WIDE_INT address = 0;
13990 else if (TARGET_THUMB2)
13993 /* Ensure all insns that must be split have been split at this point.
13994 Otherwise, the pool placement code below may compute incorrect
13995 insn lengths. Note that when optimizing, all insns have already
13996 been split at this point. */
13998 split_all_insns_noflow ();
14000 minipool_fix_head = minipool_fix_tail = NULL;
14002 /* The first insn must always be a note, or the code below won't
14003 scan it properly. */
14004 insn = get_insns ();
14005 gcc_assert (NOTE_P (insn));
14008 /* Scan all the insns and record the operands that will need fixing. */
14009 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14011 if (BARRIER_P (insn))
14012 push_minipool_barrier (insn, address);
14013 else if (INSN_P (insn))
14017 note_invalid_constants (insn, address, true);
14018 address += get_attr_length (insn);
14020 /* If the insn is a vector jump, add the size of the table
14021 and skip the table. */
14022 if ((table = is_jump_table (insn)) != NULL)
14024 address += get_jump_table_size (table);
14028 else if (LABEL_P (insn))
14029 /* Add the worst-case padding due to alignment. We don't add
14030 the _current_ padding because the minipool insertions
14031 themselves might change it. */
14032 address += get_label_padding (insn);
14035 fix = minipool_fix_head;
14037 /* Now scan the fixups and perform the required changes. */
14042 Mfix * last_added_fix;
14043 Mfix * last_barrier = NULL;
14046 /* Skip any further barriers before the next fix. */
14047 while (fix && BARRIER_P (fix->insn))
14050 /* No more fixes. */
14054 last_added_fix = NULL;
14056 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14058 if (BARRIER_P (ftmp->insn))
14060 if (ftmp->address >= minipool_vector_head->max_address)
14063 last_barrier = ftmp;
14065 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14068 last_added_fix = ftmp; /* Keep track of the last fix added. */
14071 /* If we found a barrier, drop back to that; any fixes that we
14072 could have reached but come after the barrier will now go in
14073 the next mini-pool. */
14074 if (last_barrier != NULL)
14076 /* Reduce the refcount for those fixes that won't go into this
14078 for (fdel = last_barrier->next;
14079 fdel && fdel != ftmp;
14082 fdel->minipool->refcount--;
14083 fdel->minipool = NULL;
14086 ftmp = last_barrier;
14090 /* ftmp is first fix that we can't fit into this pool and
14091 there no natural barriers that we could use. Insert a
14092 new barrier in the code somewhere between the previous
14093 fix and this one, and arrange to jump around it. */
14094 HOST_WIDE_INT max_address;
14096 /* The last item on the list of fixes must be a barrier, so
14097 we can never run off the end of the list of fixes without
14098 last_barrier being set. */
14101 max_address = minipool_vector_head->max_address;
14102 /* Check that there isn't another fix that is in range that
14103 we couldn't fit into this pool because the pool was
14104 already too large: we need to put the pool before such an
14105 instruction. The pool itself may come just after the
14106 fix because create_fix_barrier also allows space for a
14107 jump instruction. */
14108 if (ftmp->address < max_address)
14109 max_address = ftmp->address + 1;
14111 last_barrier = create_fix_barrier (last_added_fix, max_address);
14114 assign_minipool_offsets (last_barrier);
14118 if (!BARRIER_P (ftmp->insn)
14119 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14126 /* Scan over the fixes we have identified for this pool, fixing them
14127 up and adding the constants to the pool itself. */
14128 for (this_fix = fix; this_fix && ftmp != this_fix;
14129 this_fix = this_fix->next)
14130 if (!BARRIER_P (this_fix->insn))
14133 = plus_constant (Pmode,
14134 gen_rtx_LABEL_REF (VOIDmode,
14135 minipool_vector_label),
14136 this_fix->minipool->offset);
14137 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14140 dump_minipool (last_barrier->insn);
14144 /* From now on we must synthesize any constants that we can't handle
14145 directly. This can happen if the RTL gets split during final
14146 instruction generation. */
14147 after_arm_reorg = 1;
14149 /* Free the minipool memory. */
14150 obstack_free (&minipool_obstack, minipool_startobj);
14153 /* Routines to output assembly language. */
14155 /* If the rtx is the correct value then return the string of the number.
14156 In this way we can ensure that valid double constants are generated even
14157 when cross compiling. */
14159 fp_immediate_constant (rtx x)
14163 if (!fp_consts_inited)
14166 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14168 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14172 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14173 static const char *
14174 fp_const_from_val (REAL_VALUE_TYPE *r)
14176 if (!fp_consts_inited)
14179 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14183 /* OPERANDS[0] is the entire list of insns that constitute pop,
14184 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14185 is in the list, UPDATE is true iff the list contains explicit
14186 update of base register. */
14188 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14194 const char *conditional;
14195 int num_saves = XVECLEN (operands[0], 0);
14196 unsigned int regno;
14197 unsigned int regno_base = REGNO (operands[1]);
14200 offset += update ? 1 : 0;
14201 offset += return_pc ? 1 : 0;
14203 /* Is the base register in the list? */
14204 for (i = offset; i < num_saves; i++)
14206 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14207 /* If SP is in the list, then the base register must be SP. */
14208 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14209 /* If base register is in the list, there must be no explicit update. */
14210 if (regno == regno_base)
14211 gcc_assert (!update);
14214 conditional = reverse ? "%?%D0" : "%?%d0";
14215 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14217 /* Output pop (not stmfd) because it has a shorter encoding. */
14218 gcc_assert (update);
14219 sprintf (pattern, "pop%s\t{", conditional);
14223 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14224 It's just a convention, their semantics are identical. */
14225 if (regno_base == SP_REGNUM)
14226 sprintf (pattern, "ldm%sfd\t", conditional);
14227 else if (TARGET_UNIFIED_ASM)
14228 sprintf (pattern, "ldmia%s\t", conditional);
14230 sprintf (pattern, "ldm%sia\t", conditional);
14232 strcat (pattern, reg_names[regno_base]);
14234 strcat (pattern, "!, {");
14236 strcat (pattern, ", {");
14239 /* Output the first destination register. */
14241 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14243 /* Output the rest of the destination registers. */
14244 for (i = offset + 1; i < num_saves; i++)
14246 strcat (pattern, ", ");
14248 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14251 strcat (pattern, "}");
14253 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14254 strcat (pattern, "^");
14256 output_asm_insn (pattern, &cond);
14260 /* Output the assembly for a store multiple. */
14263 vfp_output_fstmd (rtx * operands)
14270 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14271 p = strlen (pattern);
14273 gcc_assert (REG_P (operands[1]));
14275 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14276 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14278 p += sprintf (&pattern[p], ", d%d", base + i);
14280 strcpy (&pattern[p], "}");
14282 output_asm_insn (pattern, operands);
14287 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14288 number of bytes pushed. */
14291 vfp_emit_fstmd (int base_reg, int count)
14298 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14299 register pairs are stored by a store multiple insn. We avoid this
14300 by pushing an extra pair. */
14301 if (count == 2 && !arm_arch6)
14303 if (base_reg == LAST_VFP_REGNUM - 3)
14308 /* FSTMD may not store more than 16 doubleword registers at once. Split
14309 larger stores into multiple parts (up to a maximum of two, in
14314 /* NOTE: base_reg is an internal register number, so each D register
14316 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14317 saved += vfp_emit_fstmd (base_reg, 16);
14321 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14322 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14324 reg = gen_rtx_REG (DFmode, base_reg);
14327 XVECEXP (par, 0, 0)
14328 = gen_rtx_SET (VOIDmode,
14331 gen_rtx_PRE_MODIFY (Pmode,
14334 (Pmode, stack_pointer_rtx,
14337 gen_rtx_UNSPEC (BLKmode,
14338 gen_rtvec (1, reg),
14339 UNSPEC_PUSH_MULT));
14341 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14342 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14343 RTX_FRAME_RELATED_P (tmp) = 1;
14344 XVECEXP (dwarf, 0, 0) = tmp;
14346 tmp = gen_rtx_SET (VOIDmode,
14347 gen_frame_mem (DFmode, stack_pointer_rtx),
14349 RTX_FRAME_RELATED_P (tmp) = 1;
14350 XVECEXP (dwarf, 0, 1) = tmp;
14352 for (i = 1; i < count; i++)
14354 reg = gen_rtx_REG (DFmode, base_reg);
14356 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14358 tmp = gen_rtx_SET (VOIDmode,
14359 gen_frame_mem (DFmode,
14360 plus_constant (Pmode,
14364 RTX_FRAME_RELATED_P (tmp) = 1;
14365 XVECEXP (dwarf, 0, i + 1) = tmp;
14368 par = emit_insn (par);
14369 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14370 RTX_FRAME_RELATED_P (par) = 1;
14375 /* Emit a call instruction with pattern PAT. ADDR is the address of
14376 the call target. */
14379 arm_emit_call_insn (rtx pat, rtx addr)
14383 insn = emit_call_insn (pat);
14385 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14386 If the call might use such an entry, add a use of the PIC register
14387 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14388 if (TARGET_VXWORKS_RTP
14390 && GET_CODE (addr) == SYMBOL_REF
14391 && (SYMBOL_REF_DECL (addr)
14392 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14393 : !SYMBOL_REF_LOCAL_P (addr)))
14395 require_pic_register ();
14396 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14400 /* Output a 'call' insn. */
14402 output_call (rtx *operands)
14404 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14406 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14407 if (REGNO (operands[0]) == LR_REGNUM)
14409 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14410 output_asm_insn ("mov%?\t%0, %|lr", operands);
14413 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14415 if (TARGET_INTERWORK || arm_arch4t)
14416 output_asm_insn ("bx%?\t%0", operands);
14418 output_asm_insn ("mov%?\t%|pc, %0", operands);
14423 /* Output a 'call' insn that is a reference in memory. This is
14424 disabled for ARMv5 and we prefer a blx instead because otherwise
14425 there's a significant performance overhead. */
14427 output_call_mem (rtx *operands)
14429 gcc_assert (!arm_arch5);
14430 if (TARGET_INTERWORK)
14432 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14433 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14434 output_asm_insn ("bx%?\t%|ip", operands);
14436 else if (regno_use_in (LR_REGNUM, operands[0]))
14438 /* LR is used in the memory address. We load the address in the
14439 first instruction. It's safe to use IP as the target of the
14440 load since the call will kill it anyway. */
14441 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14442 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14444 output_asm_insn ("bx%?\t%|ip", operands);
14446 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14450 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14451 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14458 /* Output a move from arm registers to arm registers of a long double
14459 OPERANDS[0] is the destination.
14460 OPERANDS[1] is the source. */
14462 output_mov_long_double_arm_from_arm (rtx *operands)
14464 /* We have to be careful here because the two might overlap. */
14465 int dest_start = REGNO (operands[0]);
14466 int src_start = REGNO (operands[1]);
14470 if (dest_start < src_start)
14472 for (i = 0; i < 3; i++)
14474 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14475 ops[1] = gen_rtx_REG (SImode, src_start + i);
14476 output_asm_insn ("mov%?\t%0, %1", ops);
14481 for (i = 2; i >= 0; i--)
14483 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14484 ops[1] = gen_rtx_REG (SImode, src_start + i);
14485 output_asm_insn ("mov%?\t%0, %1", ops);
14493 arm_emit_movpair (rtx dest, rtx src)
14495 /* If the src is an immediate, simplify it. */
14496 if (CONST_INT_P (src))
14498 HOST_WIDE_INT val = INTVAL (src);
14499 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14500 if ((val >> 16) & 0x0000ffff)
14501 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14503 GEN_INT ((val >> 16) & 0x0000ffff));
14506 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14507 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14510 /* Output a move between double words. It must be REG<-MEM
14513 output_move_double (rtx *operands, bool emit, int *count)
14515 enum rtx_code code0 = GET_CODE (operands[0]);
14516 enum rtx_code code1 = GET_CODE (operands[1]);
14521 /* The only case when this might happen is when
14522 you are looking at the length of a DImode instruction
14523 that has an invalid constant in it. */
14524 if (code0 == REG && code1 != MEM)
14526 gcc_assert (!emit);
14533 unsigned int reg0 = REGNO (operands[0]);
14535 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14537 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14539 switch (GET_CODE (XEXP (operands[1], 0)))
14546 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14547 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14549 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14554 gcc_assert (TARGET_LDRD);
14556 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14563 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14565 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14573 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14575 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14580 gcc_assert (TARGET_LDRD);
14582 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14587 /* Autoicrement addressing modes should never have overlapping
14588 base and destination registers, and overlapping index registers
14589 are already prohibited, so this doesn't need to worry about
14591 otherops[0] = operands[0];
14592 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14593 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14595 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14597 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14599 /* Registers overlap so split out the increment. */
14602 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14603 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14610 /* Use a single insn if we can.
14611 FIXME: IWMMXT allows offsets larger than ldrd can
14612 handle, fix these up with a pair of ldr. */
14614 || !CONST_INT_P (otherops[2])
14615 || (INTVAL (otherops[2]) > -256
14616 && INTVAL (otherops[2]) < 256))
14619 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14625 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14626 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14636 /* Use a single insn if we can.
14637 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14638 fix these up with a pair of ldr. */
14640 || !CONST_INT_P (otherops[2])
14641 || (INTVAL (otherops[2]) > -256
14642 && INTVAL (otherops[2]) < 256))
14645 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14651 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14652 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14662 /* We might be able to use ldrd %0, %1 here. However the range is
14663 different to ldr/adr, and it is broken on some ARMv7-M
14664 implementations. */
14665 /* Use the second register of the pair to avoid problematic
14667 otherops[1] = operands[1];
14669 output_asm_insn ("adr%?\t%0, %1", otherops);
14670 operands[1] = otherops[0];
14674 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14676 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14683 /* ??? This needs checking for thumb2. */
14685 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14686 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14688 otherops[0] = operands[0];
14689 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14690 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14692 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14694 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14696 switch ((int) INTVAL (otherops[2]))
14700 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14706 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14712 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14716 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14717 operands[1] = otherops[0];
14719 && (REG_P (otherops[2])
14721 || (CONST_INT_P (otherops[2])
14722 && INTVAL (otherops[2]) > -256
14723 && INTVAL (otherops[2]) < 256)))
14725 if (reg_overlap_mentioned_p (operands[0],
14729 /* Swap base and index registers over to
14730 avoid a conflict. */
14732 otherops[1] = otherops[2];
14735 /* If both registers conflict, it will usually
14736 have been fixed by a splitter. */
14737 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14738 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14742 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14743 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14750 otherops[0] = operands[0];
14752 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14757 if (CONST_INT_P (otherops[2]))
14761 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14762 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14764 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14770 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14776 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14783 return "ldr%(d%)\t%0, [%1]";
14785 return "ldm%(ia%)\t%1, %M0";
14789 otherops[1] = adjust_address (operands[1], SImode, 4);
14790 /* Take care of overlapping base/data reg. */
14791 if (reg_mentioned_p (operands[0], operands[1]))
14795 output_asm_insn ("ldr%?\t%0, %1", otherops);
14796 output_asm_insn ("ldr%?\t%0, %1", operands);
14806 output_asm_insn ("ldr%?\t%0, %1", operands);
14807 output_asm_insn ("ldr%?\t%0, %1", otherops);
14817 /* Constraints should ensure this. */
14818 gcc_assert (code0 == MEM && code1 == REG);
14819 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14821 switch (GET_CODE (XEXP (operands[0], 0)))
14827 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14829 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14834 gcc_assert (TARGET_LDRD);
14836 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14843 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14845 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14853 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14855 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14860 gcc_assert (TARGET_LDRD);
14862 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14867 otherops[0] = operands[1];
14868 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14869 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14871 /* IWMMXT allows offsets larger than ldrd can handle,
14872 fix these up with a pair of ldr. */
14874 && CONST_INT_P (otherops[2])
14875 && (INTVAL(otherops[2]) <= -256
14876 || INTVAL(otherops[2]) >= 256))
14878 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14882 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14883 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14892 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14893 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14899 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14902 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14907 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14912 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14913 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14915 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14919 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14926 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14933 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14938 && (REG_P (otherops[2])
14940 || (CONST_INT_P (otherops[2])
14941 && INTVAL (otherops[2]) > -256
14942 && INTVAL (otherops[2]) < 256)))
14944 otherops[0] = operands[1];
14945 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14947 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14953 otherops[0] = adjust_address (operands[0], SImode, 4);
14954 otherops[1] = operands[1];
14957 output_asm_insn ("str%?\t%1, %0", operands);
14958 output_asm_insn ("str%?\t%H1, %0", otherops);
14968 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14969 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14972 output_move_quad (rtx *operands)
14974 if (REG_P (operands[0]))
14976 /* Load, or reg->reg move. */
14978 if (MEM_P (operands[1]))
14980 switch (GET_CODE (XEXP (operands[1], 0)))
14983 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14988 output_asm_insn ("adr%?\t%0, %1", operands);
14989 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14993 gcc_unreachable ();
15001 gcc_assert (REG_P (operands[1]));
15003 dest = REGNO (operands[0]);
15004 src = REGNO (operands[1]);
15006 /* This seems pretty dumb, but hopefully GCC won't try to do it
15009 for (i = 0; i < 4; i++)
15011 ops[0] = gen_rtx_REG (SImode, dest + i);
15012 ops[1] = gen_rtx_REG (SImode, src + i);
15013 output_asm_insn ("mov%?\t%0, %1", ops);
15016 for (i = 3; i >= 0; i--)
15018 ops[0] = gen_rtx_REG (SImode, dest + i);
15019 ops[1] = gen_rtx_REG (SImode, src + i);
15020 output_asm_insn ("mov%?\t%0, %1", ops);
15026 gcc_assert (MEM_P (operands[0]));
15027 gcc_assert (REG_P (operands[1]));
15028 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15030 switch (GET_CODE (XEXP (operands[0], 0)))
15033 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15037 gcc_unreachable ();
15044 /* Output a VFP load or store instruction. */
15047 output_move_vfp (rtx *operands)
15049 rtx reg, mem, addr, ops[2];
15050 int load = REG_P (operands[0]);
15051 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15052 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15055 enum machine_mode mode;
15057 reg = operands[!load];
15058 mem = operands[load];
15060 mode = GET_MODE (reg);
15062 gcc_assert (REG_P (reg));
15063 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15064 gcc_assert (mode == SFmode
15068 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15069 gcc_assert (MEM_P (mem));
15071 addr = XEXP (mem, 0);
15073 switch (GET_CODE (addr))
15076 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15077 ops[0] = XEXP (addr, 0);
15082 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15083 ops[0] = XEXP (addr, 0);
15088 templ = "f%s%c%%?\t%%%s0, %%1%s";
15094 sprintf (buff, templ,
15095 load ? "ld" : "st",
15098 integer_p ? "\t%@ int" : "");
15099 output_asm_insn (buff, ops);
15104 /* Output a Neon double-word or quad-word load or store, or a load
15105 or store for larger structure modes.
15107 WARNING: The ordering of elements is weird in big-endian mode,
15108 because the EABI requires that vectors stored in memory appear
15109 as though they were stored by a VSTM, as required by the EABI.
15110 GCC RTL defines element ordering based on in-memory order.
15111 This can be different from the architectural ordering of elements
15112 within a NEON register. The intrinsics defined in arm_neon.h use the
15113 NEON register element ordering, not the GCC RTL element ordering.
15115 For example, the in-memory ordering of a big-endian a quadword
15116 vector with 16-bit elements when stored from register pair {d0,d1}
15117 will be (lowest address first, d0[N] is NEON register element N):
15119 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15121 When necessary, quadword registers (dN, dN+1) are moved to ARM
15122 registers from rN in the order:
15124 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15126 So that STM/LDM can be used on vectors in ARM registers, and the
15127 same memory layout will result as if VSTM/VLDM were used.
15129 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15130 possible, which allows use of appropriate alignment tags.
15131 Note that the choice of "64" is independent of the actual vector
15132 element size; this size simply ensures that the behavior is
15133 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15135 Due to limitations of those instructions, use of VST1.64/VLD1.64
15136 is not possible if:
15137 - the address contains PRE_DEC, or
15138 - the mode refers to more than 4 double-word registers
15140 In those cases, it would be possible to replace VSTM/VLDM by a
15141 sequence of instructions; this is not currently implemented since
15142 this is not certain to actually improve performance. */
15145 output_move_neon (rtx *operands)
15147 rtx reg, mem, addr, ops[2];
15148 int regno, nregs, load = REG_P (operands[0]);
15151 enum machine_mode mode;
15153 reg = operands[!load];
15154 mem = operands[load];
15156 mode = GET_MODE (reg);
15158 gcc_assert (REG_P (reg));
15159 regno = REGNO (reg);
15160 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15161 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15162 || NEON_REGNO_OK_FOR_QUAD (regno));
15163 gcc_assert (VALID_NEON_DREG_MODE (mode)
15164 || VALID_NEON_QREG_MODE (mode)
15165 || VALID_NEON_STRUCT_MODE (mode));
15166 gcc_assert (MEM_P (mem));
15168 addr = XEXP (mem, 0);
15170 /* Strip off const from addresses like (const (plus (...))). */
15171 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15172 addr = XEXP (addr, 0);
15174 switch (GET_CODE (addr))
15177 /* We have to use vldm / vstm for too-large modes. */
15180 templ = "v%smia%%?\t%%0!, %%h1";
15181 ops[0] = XEXP (addr, 0);
15185 templ = "v%s1.64\t%%h1, %%A0";
15192 /* We have to use vldm / vstm in this case, since there is no
15193 pre-decrement form of the vld1 / vst1 instructions. */
15194 templ = "v%smdb%%?\t%%0!, %%h1";
15195 ops[0] = XEXP (addr, 0);
15200 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15201 gcc_unreachable ();
15208 for (i = 0; i < nregs; i++)
15210 /* We're only using DImode here because it's a convenient size. */
15211 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15212 ops[1] = adjust_address (mem, DImode, 8 * i);
15213 if (reg_overlap_mentioned_p (ops[0], mem))
15215 gcc_assert (overlap == -1);
15220 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15221 output_asm_insn (buff, ops);
15226 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15227 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15228 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15229 output_asm_insn (buff, ops);
15236 /* We have to use vldm / vstm for too-large modes. */
15238 templ = "v%smia%%?\t%%m0, %%h1";
15240 templ = "v%s1.64\t%%h1, %%A0";
15246 sprintf (buff, templ, load ? "ld" : "st");
15247 output_asm_insn (buff, ops);
15252 /* Compute and return the length of neon_mov<mode>, where <mode> is
15253 one of VSTRUCT modes: EI, OI, CI or XI. */
15255 arm_attr_length_move_neon (rtx insn)
15257 rtx reg, mem, addr;
15259 enum machine_mode mode;
15261 extract_insn_cached (insn);
15263 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15265 mode = GET_MODE (recog_data.operand[0]);
15276 gcc_unreachable ();
15280 load = REG_P (recog_data.operand[0]);
15281 reg = recog_data.operand[!load];
15282 mem = recog_data.operand[load];
15284 gcc_assert (MEM_P (mem));
15286 mode = GET_MODE (reg);
15287 addr = XEXP (mem, 0);
15289 /* Strip off const from addresses like (const (plus (...))). */
15290 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15291 addr = XEXP (addr, 0);
15293 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15295 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15302 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15306 arm_address_offset_is_imm (rtx insn)
15310 extract_insn_cached (insn);
15312 if (REG_P (recog_data.operand[0]))
15315 mem = recog_data.operand[0];
15317 gcc_assert (MEM_P (mem));
15319 addr = XEXP (mem, 0);
15322 || (GET_CODE (addr) == PLUS
15323 && REG_P (XEXP (addr, 0))
15324 && CONST_INT_P (XEXP (addr, 1))))
15330 /* Output an ADD r, s, #n where n may be too big for one instruction.
15331 If adding zero to one register, output nothing. */
15333 output_add_immediate (rtx *operands)
15335 HOST_WIDE_INT n = INTVAL (operands[2]);
15337 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15340 output_multi_immediate (operands,
15341 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15344 output_multi_immediate (operands,
15345 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15352 /* Output a multiple immediate operation.
15353 OPERANDS is the vector of operands referred to in the output patterns.
15354 INSTR1 is the output pattern to use for the first constant.
15355 INSTR2 is the output pattern to use for subsequent constants.
15356 IMMED_OP is the index of the constant slot in OPERANDS.
15357 N is the constant value. */
15358 static const char *
15359 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15360 int immed_op, HOST_WIDE_INT n)
15362 #if HOST_BITS_PER_WIDE_INT > 32
15368 /* Quick and easy output. */
15369 operands[immed_op] = const0_rtx;
15370 output_asm_insn (instr1, operands);
15375 const char * instr = instr1;
15377 /* Note that n is never zero here (which would give no output). */
15378 for (i = 0; i < 32; i += 2)
15382 operands[immed_op] = GEN_INT (n & (255 << i));
15383 output_asm_insn (instr, operands);
15393 /* Return the name of a shifter operation. */
15394 static const char *
15395 arm_shift_nmem(enum rtx_code code)
15400 return ARM_LSL_NAME;
15416 /* Return the appropriate ARM instruction for the operation code.
15417 The returned result should not be overwritten. OP is the rtx of the
15418 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15421 arithmetic_instr (rtx op, int shift_first_arg)
15423 switch (GET_CODE (op))
15429 return shift_first_arg ? "rsb" : "sub";
15444 return arm_shift_nmem(GET_CODE(op));
15447 gcc_unreachable ();
15451 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15452 for the operation code. The returned result should not be overwritten.
15453 OP is the rtx code of the shift.
15454 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15456 static const char *
15457 shift_op (rtx op, HOST_WIDE_INT *amountp)
15460 enum rtx_code code = GET_CODE (op);
15465 if (!CONST_INT_P (XEXP (op, 1)))
15467 output_operand_lossage ("invalid shift operand");
15472 *amountp = 32 - INTVAL (XEXP (op, 1));
15480 mnem = arm_shift_nmem(code);
15481 if (CONST_INT_P (XEXP (op, 1)))
15483 *amountp = INTVAL (XEXP (op, 1));
15485 else if (REG_P (XEXP (op, 1)))
15492 output_operand_lossage ("invalid shift operand");
15498 /* We never have to worry about the amount being other than a
15499 power of 2, since this case can never be reloaded from a reg. */
15500 if (!CONST_INT_P (XEXP (op, 1)))
15502 output_operand_lossage ("invalid shift operand");
15506 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
15508 /* Amount must be a power of two. */
15509 if (*amountp & (*amountp - 1))
15511 output_operand_lossage ("invalid shift operand");
15515 *amountp = int_log2 (*amountp);
15516 return ARM_LSL_NAME;
15519 output_operand_lossage ("invalid shift operand");
15523 /* This is not 100% correct, but follows from the desire to merge
15524 multiplication by a power of 2 with the recognizer for a
15525 shift. >=32 is not a valid shift for "lsl", so we must try and
15526 output a shift that produces the correct arithmetical result.
15527 Using lsr #32 is identical except for the fact that the carry bit
15528 is not set correctly if we set the flags; but we never use the
15529 carry bit from such an operation, so we can ignore that. */
15530 if (code == ROTATERT)
15531 /* Rotate is just modulo 32. */
15533 else if (*amountp != (*amountp & 31))
15535 if (code == ASHIFT)
15540 /* Shifts of 0 are no-ops. */
15547 /* Obtain the shift from the POWER of two. */
15549 static HOST_WIDE_INT
15550 int_log2 (HOST_WIDE_INT power)
15552 HOST_WIDE_INT shift = 0;
15554 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15556 gcc_assert (shift <= 31);
15563 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15564 because /bin/as is horribly restrictive. The judgement about
15565 whether or not each character is 'printable' (and can be output as
15566 is) or not (and must be printed with an octal escape) must be made
15567 with reference to the *host* character set -- the situation is
15568 similar to that discussed in the comments above pp_c_char in
15569 c-pretty-print.c. */
15571 #define MAX_ASCII_LEN 51
15574 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15577 int len_so_far = 0;
15579 fputs ("\t.ascii\t\"", stream);
15581 for (i = 0; i < len; i++)
15585 if (len_so_far >= MAX_ASCII_LEN)
15587 fputs ("\"\n\t.ascii\t\"", stream);
15593 if (c == '\\' || c == '\"')
15595 putc ('\\', stream);
15603 fprintf (stream, "\\%03o", c);
15608 fputs ("\"\n", stream);
15611 /* Compute the register save mask for registers 0 through 12
15612 inclusive. This code is used by arm_compute_save_reg_mask. */
15614 static unsigned long
15615 arm_compute_save_reg0_reg12_mask (void)
15617 unsigned long func_type = arm_current_func_type ();
15618 unsigned long save_reg_mask = 0;
15621 if (IS_INTERRUPT (func_type))
15623 unsigned int max_reg;
15624 /* Interrupt functions must not corrupt any registers,
15625 even call clobbered ones. If this is a leaf function
15626 we can just examine the registers used by the RTL, but
15627 otherwise we have to assume that whatever function is
15628 called might clobber anything, and so we have to save
15629 all the call-clobbered registers as well. */
15630 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15631 /* FIQ handlers have registers r8 - r12 banked, so
15632 we only need to check r0 - r7, Normal ISRs only
15633 bank r14 and r15, so we must check up to r12.
15634 r13 is the stack pointer which is always preserved,
15635 so we do not need to consider it here. */
15640 for (reg = 0; reg <= max_reg; reg++)
15641 if (df_regs_ever_live_p (reg)
15642 || (! crtl->is_leaf && call_used_regs[reg]))
15643 save_reg_mask |= (1 << reg);
15645 /* Also save the pic base register if necessary. */
15647 && !TARGET_SINGLE_PIC_BASE
15648 && arm_pic_register != INVALID_REGNUM
15649 && crtl->uses_pic_offset_table)
15650 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15652 else if (IS_VOLATILE(func_type))
15654 /* For noreturn functions we historically omitted register saves
15655 altogether. However this really messes up debugging. As a
15656 compromise save just the frame pointers. Combined with the link
15657 register saved elsewhere this should be sufficient to get
15659 if (frame_pointer_needed)
15660 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15661 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15662 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15663 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15664 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15668 /* In the normal case we only need to save those registers
15669 which are call saved and which are used by this function. */
15670 for (reg = 0; reg <= 11; reg++)
15671 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15672 save_reg_mask |= (1 << reg);
15674 /* Handle the frame pointer as a special case. */
15675 if (frame_pointer_needed)
15676 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15678 /* If we aren't loading the PIC register,
15679 don't stack it even though it may be live. */
15681 && !TARGET_SINGLE_PIC_BASE
15682 && arm_pic_register != INVALID_REGNUM
15683 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15684 || crtl->uses_pic_offset_table))
15685 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15687 /* The prologue will copy SP into R0, so save it. */
15688 if (IS_STACKALIGN (func_type))
15689 save_reg_mask |= 1;
15692 /* Save registers so the exception handler can modify them. */
15693 if (crtl->calls_eh_return)
15699 reg = EH_RETURN_DATA_REGNO (i);
15700 if (reg == INVALID_REGNUM)
15702 save_reg_mask |= 1 << reg;
15706 return save_reg_mask;
15710 /* Compute the number of bytes used to store the static chain register on the
15711 stack, above the stack frame. We need to know this accurately to get the
15712 alignment of the rest of the stack frame correct. */
15714 static int arm_compute_static_chain_stack_bytes (void)
15716 unsigned long func_type = arm_current_func_type ();
15717 int static_chain_stack_bytes = 0;
15719 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15720 IS_NESTED (func_type) &&
15721 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15722 static_chain_stack_bytes = 4;
15724 return static_chain_stack_bytes;
15728 /* Compute a bit mask of which registers need to be
15729 saved on the stack for the current function.
15730 This is used by arm_get_frame_offsets, which may add extra registers. */
15732 static unsigned long
15733 arm_compute_save_reg_mask (void)
15735 unsigned int save_reg_mask = 0;
15736 unsigned long func_type = arm_current_func_type ();
15739 if (IS_NAKED (func_type))
15740 /* This should never really happen. */
15743 /* If we are creating a stack frame, then we must save the frame pointer,
15744 IP (which will hold the old stack pointer), LR and the PC. */
15745 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15747 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15750 | (1 << PC_REGNUM);
15752 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15754 /* Decide if we need to save the link register.
15755 Interrupt routines have their own banked link register,
15756 so they never need to save it.
15757 Otherwise if we do not use the link register we do not need to save
15758 it. If we are pushing other registers onto the stack however, we
15759 can save an instruction in the epilogue by pushing the link register
15760 now and then popping it back into the PC. This incurs extra memory
15761 accesses though, so we only do it when optimizing for size, and only
15762 if we know that we will not need a fancy return sequence. */
15763 if (df_regs_ever_live_p (LR_REGNUM)
15766 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15767 && !crtl->calls_eh_return))
15768 save_reg_mask |= 1 << LR_REGNUM;
15770 if (cfun->machine->lr_save_eliminated)
15771 save_reg_mask &= ~ (1 << LR_REGNUM);
15773 if (TARGET_REALLY_IWMMXT
15774 && ((bit_count (save_reg_mask)
15775 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15776 arm_compute_static_chain_stack_bytes())
15779 /* The total number of registers that are going to be pushed
15780 onto the stack is odd. We need to ensure that the stack
15781 is 64-bit aligned before we start to save iWMMXt registers,
15782 and also before we start to create locals. (A local variable
15783 might be a double or long long which we will load/store using
15784 an iWMMXt instruction). Therefore we need to push another
15785 ARM register, so that the stack will be 64-bit aligned. We
15786 try to avoid using the arg registers (r0 -r3) as they might be
15787 used to pass values in a tail call. */
15788 for (reg = 4; reg <= 12; reg++)
15789 if ((save_reg_mask & (1 << reg)) == 0)
15793 save_reg_mask |= (1 << reg);
15796 cfun->machine->sibcall_blocked = 1;
15797 save_reg_mask |= (1 << 3);
15801 /* We may need to push an additional register for use initializing the
15802 PIC base register. */
15803 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15804 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15806 reg = thumb_find_work_register (1 << 4);
15807 if (!call_used_regs[reg])
15808 save_reg_mask |= (1 << reg);
15811 return save_reg_mask;
15815 /* Compute a bit mask of which registers need to be
15816 saved on the stack for the current function. */
15817 static unsigned long
15818 thumb1_compute_save_reg_mask (void)
15820 unsigned long mask;
15824 for (reg = 0; reg < 12; reg ++)
15825 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15829 && !TARGET_SINGLE_PIC_BASE
15830 && arm_pic_register != INVALID_REGNUM
15831 && crtl->uses_pic_offset_table)
15832 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15834 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15835 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15836 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15838 /* LR will also be pushed if any lo regs are pushed. */
15839 if (mask & 0xff || thumb_force_lr_save ())
15840 mask |= (1 << LR_REGNUM);
15842 /* Make sure we have a low work register if we need one.
15843 We will need one if we are going to push a high register,
15844 but we are not currently intending to push a low register. */
15845 if ((mask & 0xff) == 0
15846 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15848 /* Use thumb_find_work_register to choose which register
15849 we will use. If the register is live then we will
15850 have to push it. Use LAST_LO_REGNUM as our fallback
15851 choice for the register to select. */
15852 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15853 /* Make sure the register returned by thumb_find_work_register is
15854 not part of the return value. */
15855 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15856 reg = LAST_LO_REGNUM;
15858 if (! call_used_regs[reg])
15862 /* The 504 below is 8 bytes less than 512 because there are two possible
15863 alignment words. We can't tell here if they will be present or not so we
15864 have to play it safe and assume that they are. */
15865 if ((CALLER_INTERWORKING_SLOT_SIZE +
15866 ROUND_UP_WORD (get_frame_size ()) +
15867 crtl->outgoing_args_size) >= 504)
15869 /* This is the same as the code in thumb1_expand_prologue() which
15870 determines which register to use for stack decrement. */
15871 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15872 if (mask & (1 << reg))
15875 if (reg > LAST_LO_REGNUM)
15877 /* Make sure we have a register available for stack decrement. */
15878 mask |= 1 << LAST_LO_REGNUM;
15886 /* Return the number of bytes required to save VFP registers. */
15888 arm_get_vfp_saved_size (void)
15890 unsigned int regno;
15895 /* Space for saved VFP registers. */
15896 if (TARGET_HARD_FLOAT && TARGET_VFP)
15899 for (regno = FIRST_VFP_REGNUM;
15900 regno < LAST_VFP_REGNUM;
15903 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15904 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15908 /* Workaround ARM10 VFPr1 bug. */
15909 if (count == 2 && !arm_arch6)
15911 saved += count * 8;
15920 if (count == 2 && !arm_arch6)
15922 saved += count * 8;
15929 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15930 everything bar the final return instruction. If simple_return is true,
15931 then do not output epilogue, because it has already been emitted in RTL. */
15933 output_return_instruction (rtx operand, bool really_return, bool reverse,
15934 bool simple_return)
15936 char conditional[10];
15939 unsigned long live_regs_mask;
15940 unsigned long func_type;
15941 arm_stack_offsets *offsets;
15943 func_type = arm_current_func_type ();
15945 if (IS_NAKED (func_type))
15948 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15950 /* If this function was declared non-returning, and we have
15951 found a tail call, then we have to trust that the called
15952 function won't return. */
15957 /* Otherwise, trap an attempted return by aborting. */
15959 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15961 assemble_external_libcall (ops[1]);
15962 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15968 gcc_assert (!cfun->calls_alloca || really_return);
15970 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15972 cfun->machine->return_used_this_function = 1;
15974 offsets = arm_get_frame_offsets ();
15975 live_regs_mask = offsets->saved_regs_mask;
15977 if (!simple_return && live_regs_mask)
15979 const char * return_reg;
15981 /* If we do not have any special requirements for function exit
15982 (e.g. interworking) then we can load the return address
15983 directly into the PC. Otherwise we must load it into LR. */
15985 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15986 return_reg = reg_names[PC_REGNUM];
15988 return_reg = reg_names[LR_REGNUM];
15990 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15992 /* There are three possible reasons for the IP register
15993 being saved. 1) a stack frame was created, in which case
15994 IP contains the old stack pointer, or 2) an ISR routine
15995 corrupted it, or 3) it was saved to align the stack on
15996 iWMMXt. In case 1, restore IP into SP, otherwise just
15998 if (frame_pointer_needed)
16000 live_regs_mask &= ~ (1 << IP_REGNUM);
16001 live_regs_mask |= (1 << SP_REGNUM);
16004 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16007 /* On some ARM architectures it is faster to use LDR rather than
16008 LDM to load a single register. On other architectures, the
16009 cost is the same. In 26 bit mode, or for exception handlers,
16010 we have to use LDM to load the PC so that the CPSR is also
16012 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16013 if (live_regs_mask == (1U << reg))
16016 if (reg <= LAST_ARM_REGNUM
16017 && (reg != LR_REGNUM
16019 || ! IS_INTERRUPT (func_type)))
16021 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16022 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16029 /* Generate the load multiple instruction to restore the
16030 registers. Note we can get here, even if
16031 frame_pointer_needed is true, but only if sp already
16032 points to the base of the saved core registers. */
16033 if (live_regs_mask & (1 << SP_REGNUM))
16035 unsigned HOST_WIDE_INT stack_adjust;
16037 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16038 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16040 if (stack_adjust && arm_arch5 && TARGET_ARM)
16041 if (TARGET_UNIFIED_ASM)
16042 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16044 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16047 /* If we can't use ldmib (SA110 bug),
16048 then try to pop r3 instead. */
16050 live_regs_mask |= 1 << 3;
16052 if (TARGET_UNIFIED_ASM)
16053 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16055 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16059 if (TARGET_UNIFIED_ASM)
16060 sprintf (instr, "pop%s\t{", conditional);
16062 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16064 p = instr + strlen (instr);
16066 for (reg = 0; reg <= SP_REGNUM; reg++)
16067 if (live_regs_mask & (1 << reg))
16069 int l = strlen (reg_names[reg]);
16075 memcpy (p, ", ", 2);
16079 memcpy (p, "%|", 2);
16080 memcpy (p + 2, reg_names[reg], l);
16084 if (live_regs_mask & (1 << LR_REGNUM))
16086 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16087 /* If returning from an interrupt, restore the CPSR. */
16088 if (IS_INTERRUPT (func_type))
16095 output_asm_insn (instr, & operand);
16097 /* See if we need to generate an extra instruction to
16098 perform the actual function return. */
16100 && func_type != ARM_FT_INTERWORKED
16101 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16103 /* The return has already been handled
16104 by loading the LR into the PC. */
16111 switch ((int) ARM_FUNC_TYPE (func_type))
16115 /* ??? This is wrong for unified assembly syntax. */
16116 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16119 case ARM_FT_INTERWORKED:
16120 sprintf (instr, "bx%s\t%%|lr", conditional);
16123 case ARM_FT_EXCEPTION:
16124 /* ??? This is wrong for unified assembly syntax. */
16125 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16129 /* Use bx if it's available. */
16130 if (arm_arch5 || arm_arch4t)
16131 sprintf (instr, "bx%s\t%%|lr", conditional);
16133 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16137 output_asm_insn (instr, & operand);
16143 /* Write the function name into the code section, directly preceding
16144 the function prologue.
16146 Code will be output similar to this:
16148 .ascii "arm_poke_function_name", 0
16151 .word 0xff000000 + (t1 - t0)
16152 arm_poke_function_name
16154 stmfd sp!, {fp, ip, lr, pc}
16157 When performing a stack backtrace, code can inspect the value
16158 of 'pc' stored at 'fp' + 0. If the trace function then looks
16159 at location pc - 12 and the top 8 bits are set, then we know
16160 that there is a function name embedded immediately preceding this
16161 location and has length ((pc[-3]) & 0xff000000).
16163 We assume that pc is declared as a pointer to an unsigned long.
16165 It is of no benefit to output the function name if we are assembling
16166 a leaf function. These function types will not contain a stack
16167 backtrace structure, therefore it is not possible to determine the
16170 arm_poke_function_name (FILE *stream, const char *name)
16172 unsigned long alignlength;
16173 unsigned long length;
16176 length = strlen (name) + 1;
16177 alignlength = ROUND_UP_WORD (length);
16179 ASM_OUTPUT_ASCII (stream, name, length);
16180 ASM_OUTPUT_ALIGN (stream, 2);
16181 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16182 assemble_aligned_integer (UNITS_PER_WORD, x);
16185 /* Place some comments into the assembler stream
16186 describing the current function. */
16188 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16190 unsigned long func_type;
16192 /* ??? Do we want to print some of the below anyway? */
16196 /* Sanity check. */
16197 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16199 func_type = arm_current_func_type ();
16201 switch ((int) ARM_FUNC_TYPE (func_type))
16204 case ARM_FT_NORMAL:
16206 case ARM_FT_INTERWORKED:
16207 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16210 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16213 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16215 case ARM_FT_EXCEPTION:
16216 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16220 if (IS_NAKED (func_type))
16221 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16223 if (IS_VOLATILE (func_type))
16224 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16226 if (IS_NESTED (func_type))
16227 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16228 if (IS_STACKALIGN (func_type))
16229 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16231 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16233 crtl->args.pretend_args_size, frame_size);
16235 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16236 frame_pointer_needed,
16237 cfun->machine->uses_anonymous_args);
16239 if (cfun->machine->lr_save_eliminated)
16240 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16242 if (crtl->calls_eh_return)
16243 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16248 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16249 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16251 arm_stack_offsets *offsets;
16257 /* Emit any call-via-reg trampolines that are needed for v4t support
16258 of call_reg and call_value_reg type insns. */
16259 for (regno = 0; regno < LR_REGNUM; regno++)
16261 rtx label = cfun->machine->call_via[regno];
16265 switch_to_section (function_section (current_function_decl));
16266 targetm.asm_out.internal_label (asm_out_file, "L",
16267 CODE_LABEL_NUMBER (label));
16268 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16272 /* ??? Probably not safe to set this here, since it assumes that a
16273 function will be emitted as assembly immediately after we generate
16274 RTL for it. This does not happen for inline functions. */
16275 cfun->machine->return_used_this_function = 0;
16277 else /* TARGET_32BIT */
16279 /* We need to take into account any stack-frame rounding. */
16280 offsets = arm_get_frame_offsets ();
16282 gcc_assert (!use_return_insn (FALSE, NULL)
16283 || (cfun->machine->return_used_this_function != 0)
16284 || offsets->saved_regs == offsets->outgoing_args
16285 || frame_pointer_needed);
16287 /* Reset the ARM-specific per-function variables. */
16288 after_arm_reorg = 0;
16292 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
16293 number of registers are being pushed, multiple STRD patterns are created for
16294 all register pairs. If odd number of registers are pushed, emit a
16295 combination of STRDs and STR for the prologue saves. */
16297 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16301 rtx par = NULL_RTX;
16302 rtx insn = NULL_RTX;
16303 rtx dwarf = NULL_RTX;
16304 rtx tmp, reg, tmp1;
16306 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16307 if (saved_regs_mask & (1 << i))
16310 gcc_assert (num_regs && num_regs <= 16);
16312 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
16313 registers to push. */
16314 tmp = gen_rtx_SET (VOIDmode,
16316 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16317 RTX_FRAME_RELATED_P (tmp) = 1;
16318 insn = emit_insn (tmp);
16320 /* Create sequence for DWARF info. */
16321 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16323 /* RTLs cannot be shared, hence create new copy for dwarf. */
16324 tmp1 = gen_rtx_SET (VOIDmode,
16326 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16327 RTX_FRAME_RELATED_P (tmp1) = 1;
16328 XVECEXP (dwarf, 0, 0) = tmp1;
16330 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16331 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16333 /* Var j iterates over all the registers to gather all the registers in
16334 saved_regs_mask. Var i gives index of register R_j in stack frame.
16335 A PARALLEL RTX of register-pair is created here, so that pattern for
16336 STRD can be matched. If num_regs is odd, 1st register will be pushed
16337 using STR and remaining registers will be pushed with STRD in pairs.
16338 If num_regs is even, all registers are pushed with STRD in pairs.
16339 Hence, skip first element for odd num_regs. */
16340 for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
16341 if (saved_regs_mask & (1 << j))
16343 /* Create RTX for store. New RTX is created for dwarf as
16344 they are not sharable. */
16345 reg = gen_rtx_REG (SImode, j);
16346 tmp = gen_rtx_SET (SImode,
16349 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16352 tmp1 = gen_rtx_SET (SImode,
16355 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16357 RTX_FRAME_RELATED_P (tmp) = 1;
16358 RTX_FRAME_RELATED_P (tmp1) = 1;
16360 if (((i - (num_regs % 2)) % 2) == 1)
16361 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
16362 be created. Hence create it first. The STRD pattern we are
16364 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
16365 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
16366 where the target registers need not be consecutive. */
16367 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16369 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
16370 even, the reg_j is added as 0th element and if it is odd, reg_i is
16371 added as 1st element of STRD pattern shown above. */
16372 XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
16373 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16375 if (((i - (num_regs % 2)) % 2) == 0)
16376 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
16377 to be loaded are generated in above given STRD pattern, and the
16378 pattern can be emitted now. */
16384 if ((num_regs % 2) == 1)
16386 /* If odd number of registers are pushed, generate STR pattern to store
16388 for (; (saved_regs_mask & (1 << j)) == 0; j--);
16390 tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
16391 stack_pointer_rtx, 4 * i));
16392 reg = gen_rtx_REG (SImode, j);
16393 tmp = gen_rtx_SET (SImode, tmp1, reg);
16394 RTX_FRAME_RELATED_P (tmp) = 1;
16398 tmp1 = gen_rtx_SET (SImode,
16401 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16403 RTX_FRAME_RELATED_P (tmp1) = 1;
16404 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16407 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16408 RTX_FRAME_RELATED_P (insn) = 1;
16412 /* Generate and emit an insn that we will recognize as a push_multi.
16413 Unfortunately, since this insn does not reflect very well the actual
16414 semantics of the operation, we need to annotate the insn for the benefit
16415 of DWARF2 frame unwind information. */
16417 emit_multi_reg_push (unsigned long mask)
16420 int num_dwarf_regs;
16424 int dwarf_par_index;
16427 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16428 if (mask & (1 << i))
16431 gcc_assert (num_regs && num_regs <= 16);
16433 /* We don't record the PC in the dwarf frame information. */
16434 num_dwarf_regs = num_regs;
16435 if (mask & (1 << PC_REGNUM))
16438 /* For the body of the insn we are going to generate an UNSPEC in
16439 parallel with several USEs. This allows the insn to be recognized
16440 by the push_multi pattern in the arm.md file.
16442 The body of the insn looks something like this:
16445 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16446 (const_int:SI <num>)))
16447 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16453 For the frame note however, we try to be more explicit and actually
16454 show each register being stored into the stack frame, plus a (single)
16455 decrement of the stack pointer. We do it this way in order to be
16456 friendly to the stack unwinding code, which only wants to see a single
16457 stack decrement per instruction. The RTL we generate for the note looks
16458 something like this:
16461 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16462 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16463 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16464 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16468 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16469 instead we'd have a parallel expression detailing all
16470 the stores to the various memory addresses so that debug
16471 information is more up-to-date. Remember however while writing
16472 this to take care of the constraints with the push instruction.
16474 Note also that this has to be taken care of for the VFP registers.
16476 For more see PR43399. */
16478 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16479 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16480 dwarf_par_index = 1;
16482 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16484 if (mask & (1 << i))
16486 reg = gen_rtx_REG (SImode, i);
16488 XVECEXP (par, 0, 0)
16489 = gen_rtx_SET (VOIDmode,
16492 gen_rtx_PRE_MODIFY (Pmode,
16495 (Pmode, stack_pointer_rtx,
16498 gen_rtx_UNSPEC (BLKmode,
16499 gen_rtvec (1, reg),
16500 UNSPEC_PUSH_MULT));
16502 if (i != PC_REGNUM)
16504 tmp = gen_rtx_SET (VOIDmode,
16505 gen_frame_mem (SImode, stack_pointer_rtx),
16507 RTX_FRAME_RELATED_P (tmp) = 1;
16508 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16516 for (j = 1, i++; j < num_regs; i++)
16518 if (mask & (1 << i))
16520 reg = gen_rtx_REG (SImode, i);
16522 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16524 if (i != PC_REGNUM)
16527 = gen_rtx_SET (VOIDmode,
16530 plus_constant (Pmode, stack_pointer_rtx,
16533 RTX_FRAME_RELATED_P (tmp) = 1;
16534 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16541 par = emit_insn (par);
16543 tmp = gen_rtx_SET (VOIDmode,
16545 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16546 RTX_FRAME_RELATED_P (tmp) = 1;
16547 XVECEXP (dwarf, 0, 0) = tmp;
16549 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16554 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
16555 SAVED_REGS_MASK shows which registers need to be restored.
16557 Unfortunately, since this insn does not reflect very well the actual
16558 semantics of the operation, we need to annotate the insn for the benefit
16559 of DWARF2 frame unwind information. */
16561 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
16566 rtx dwarf = NULL_RTX;
16572 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16573 offset_adj = return_in_pc ? 1 : 0;
16574 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16575 if (saved_regs_mask & (1 << i))
16578 gcc_assert (num_regs && num_regs <= 16);
16580 /* If SP is in reglist, then we don't emit SP update insn. */
16581 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
16583 /* The parallel needs to hold num_regs SETs
16584 and one SET for the stack update. */
16585 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
16590 XVECEXP (par, 0, 0) = tmp;
16595 /* Increment the stack pointer, based on there being
16596 num_regs 4-byte registers to restore. */
16597 tmp = gen_rtx_SET (VOIDmode,
16599 plus_constant (Pmode,
16602 RTX_FRAME_RELATED_P (tmp) = 1;
16603 XVECEXP (par, 0, offset_adj) = tmp;
16606 /* Now restore every reg, which may include PC. */
16607 for (j = 0, i = 0; j < num_regs; i++)
16608 if (saved_regs_mask & (1 << i))
16610 reg = gen_rtx_REG (SImode, i);
16611 tmp = gen_rtx_SET (VOIDmode,
16615 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
16616 RTX_FRAME_RELATED_P (tmp) = 1;
16617 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
16619 /* We need to maintain a sequence for DWARF info too. As dwarf info
16620 should not have PC, skip PC. */
16621 if (i != PC_REGNUM)
16622 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16628 par = emit_jump_insn (par);
16630 par = emit_insn (par);
16632 REG_NOTES (par) = dwarf;
16635 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16636 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16638 Unfortunately, since this insn does not reflect very well the actual
16639 semantics of the operation, we need to annotate the insn for the benefit
16640 of DWARF2 frame unwind information. */
16642 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16646 rtx dwarf = NULL_RTX;
16649 gcc_assert (num_regs && num_regs <= 32);
16651 /* Workaround ARM10 VFPr1 bug. */
16652 if (num_regs == 2 && !arm_arch6)
16654 if (first_reg == 15)
16660 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16661 there could be up to 32 D-registers to restore.
16662 If there are more than 16 D-registers, make two recursive calls,
16663 each of which emits one pop_multi instruction. */
16666 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16667 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16671 /* The parallel needs to hold num_regs SETs
16672 and one SET for the stack update. */
16673 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16675 /* Increment the stack pointer, based on there being
16676 num_regs 8-byte registers to restore. */
16677 tmp = gen_rtx_SET (VOIDmode,
16679 plus_constant (Pmode, base_reg, 8 * num_regs));
16680 RTX_FRAME_RELATED_P (tmp) = 1;
16681 XVECEXP (par, 0, 0) = tmp;
16683 /* Now show every reg that will be restored, using a SET for each. */
16684 for (j = 0, i=first_reg; j < num_regs; i += 2)
16686 reg = gen_rtx_REG (DFmode, i);
16688 tmp = gen_rtx_SET (VOIDmode,
16692 plus_constant (Pmode, base_reg, 8 * j)));
16693 RTX_FRAME_RELATED_P (tmp) = 1;
16694 XVECEXP (par, 0, j + 1) = tmp;
16696 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16701 par = emit_insn (par);
16702 REG_NOTES (par) = dwarf;
16705 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
16706 number of registers are being popped, multiple LDRD patterns are created for
16707 all register pairs. If odd number of registers are popped, last register is
16708 loaded by using LDR pattern. */
16710 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
16714 rtx par = NULL_RTX;
16715 rtx dwarf = NULL_RTX;
16716 rtx tmp, reg, tmp1;
16719 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16720 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16721 if (saved_regs_mask & (1 << i))
16724 gcc_assert (num_regs && num_regs <= 16);
16726 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
16727 to be popped. So, if num_regs is even, now it will become odd,
16728 and we can generate pop with PC. If num_regs is odd, it will be
16729 even now, and ldr with return can be generated for PC. */
16733 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16735 /* Var j iterates over all the registers to gather all the registers in
16736 saved_regs_mask. Var i gives index of saved registers in stack frame.
16737 A PARALLEL RTX of register-pair is created here, so that pattern for
16738 LDRD can be matched. As PC is always last register to be popped, and
16739 we have already decremented num_regs if PC, we don't have to worry
16740 about PC in this loop. */
16741 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
16742 if (saved_regs_mask & (1 << j))
16744 /* Create RTX for memory load. */
16745 reg = gen_rtx_REG (SImode, j);
16746 tmp = gen_rtx_SET (SImode,
16748 gen_frame_mem (SImode,
16749 plus_constant (Pmode,
16750 stack_pointer_rtx, 4 * i)));
16751 RTX_FRAME_RELATED_P (tmp) = 1;
16755 /* When saved-register index (i) is even, the RTX to be emitted is
16756 yet to be created. Hence create it first. The LDRD pattern we
16757 are generating is :
16758 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
16759 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
16760 where target registers need not be consecutive. */
16761 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16765 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
16766 added as 0th element and if i is odd, reg_i is added as 1st element
16767 of LDRD pattern shown above. */
16768 XVECEXP (par, 0, (i % 2)) = tmp;
16769 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16773 /* When saved-register index (i) is odd, RTXs for both the registers
16774 to be loaded are generated in above given LDRD pattern, and the
16775 pattern can be emitted now. */
16776 par = emit_insn (par);
16777 REG_NOTES (par) = dwarf;
16783 /* If the number of registers pushed is odd AND return_in_pc is false OR
16784 number of registers are even AND return_in_pc is true, last register is
16785 popped using LDR. It can be PC as well. Hence, adjust the stack first and
16786 then LDR with post increment. */
16788 /* Increment the stack pointer, based on there being
16789 num_regs 4-byte registers to restore. */
16790 tmp = gen_rtx_SET (VOIDmode,
16792 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
16793 RTX_FRAME_RELATED_P (tmp) = 1;
16798 if (((num_regs % 2) == 1 && !return_in_pc)
16799 || ((num_regs % 2) == 0 && return_in_pc))
16801 /* Scan for the single register to be popped. Skip until the saved
16802 register is found. */
16803 for (; (saved_regs_mask & (1 << j)) == 0; j++);
16805 /* Gen LDR with post increment here. */
16806 tmp1 = gen_rtx_MEM (SImode,
16807 gen_rtx_POST_INC (SImode,
16808 stack_pointer_rtx));
16809 set_mem_alias_set (tmp1, get_frame_alias_set ());
16811 reg = gen_rtx_REG (SImode, j);
16812 tmp = gen_rtx_SET (SImode, reg, tmp1);
16813 RTX_FRAME_RELATED_P (tmp) = 1;
16814 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16818 /* If return_in_pc, j must be PC_REGNUM. */
16819 gcc_assert (j == PC_REGNUM);
16820 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16821 XVECEXP (par, 0, 0) = ret_rtx;
16822 XVECEXP (par, 0, 1) = tmp;
16823 par = emit_jump_insn (par);
16827 par = emit_insn (tmp);
16830 REG_NOTES (par) = dwarf;
16832 else if ((num_regs % 2) == 1 && return_in_pc)
16834 /* There are 2 registers to be popped. So, generate the pattern
16835 pop_multiple_with_stack_update_and_return to pop in PC. */
16836 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
16842 /* Calculate the size of the return value that is passed in registers. */
16844 arm_size_return_regs (void)
16846 enum machine_mode mode;
16848 if (crtl->return_rtx != 0)
16849 mode = GET_MODE (crtl->return_rtx);
16851 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16853 return GET_MODE_SIZE (mode);
16856 /* Return true if the current function needs to save/restore LR. */
16858 thumb_force_lr_save (void)
16860 return !cfun->machine->lr_save_eliminated
16861 && (!leaf_function_p ()
16862 || thumb_far_jump_used_p ()
16863 || df_regs_ever_live_p (LR_REGNUM));
16867 /* Return true if r3 is used by any of the tail call insns in the
16868 current function. */
16870 any_sibcall_uses_r3 (void)
16875 if (!crtl->tail_call_emit)
16877 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16878 if (e->flags & EDGE_SIBCALL)
16880 rtx call = BB_END (e->src);
16881 if (!CALL_P (call))
16882 call = prev_nonnote_nondebug_insn (call);
16883 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16884 if (find_regno_fusage (call, USE, 3))
16891 /* Compute the distance from register FROM to register TO.
16892 These can be the arg pointer (26), the soft frame pointer (25),
16893 the stack pointer (13) or the hard frame pointer (11).
16894 In thumb mode r7 is used as the soft frame pointer, if needed.
16895 Typical stack layout looks like this:
16897 old stack pointer -> | |
16900 | | saved arguments for
16901 | | vararg functions
16904 hard FP & arg pointer -> | | \
16912 soft frame pointer -> | | /
16917 locals base pointer -> | | /
16922 current stack pointer -> | | /
16925 For a given function some or all of these stack components
16926 may not be needed, giving rise to the possibility of
16927 eliminating some of the registers.
16929 The values returned by this function must reflect the behavior
16930 of arm_expand_prologue() and arm_compute_save_reg_mask().
16932 The sign of the number returned reflects the direction of stack
16933 growth, so the values are positive for all eliminations except
16934 from the soft frame pointer to the hard frame pointer.
16936 SFP may point just inside the local variables block to ensure correct
16940 /* Calculate stack offsets. These are used to calculate register elimination
16941 offsets and in prologue/epilogue code. Also calculates which registers
16942 should be saved. */
16944 static arm_stack_offsets *
16945 arm_get_frame_offsets (void)
16947 struct arm_stack_offsets *offsets;
16948 unsigned long func_type;
16952 HOST_WIDE_INT frame_size;
16955 offsets = &cfun->machine->stack_offsets;
16957 /* We need to know if we are a leaf function. Unfortunately, it
16958 is possible to be called after start_sequence has been called,
16959 which causes get_insns to return the insns for the sequence,
16960 not the function, which will cause leaf_function_p to return
16961 the incorrect result.
16963 to know about leaf functions once reload has completed, and the
16964 frame size cannot be changed after that time, so we can safely
16965 use the cached value. */
16967 if (reload_completed)
16970 /* Initially this is the size of the local variables. It will translated
16971 into an offset once we have determined the size of preceding data. */
16972 frame_size = ROUND_UP_WORD (get_frame_size ());
16974 leaf = leaf_function_p ();
16976 /* Space for variadic functions. */
16977 offsets->saved_args = crtl->args.pretend_args_size;
16979 /* In Thumb mode this is incorrect, but never used. */
16980 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16981 arm_compute_static_chain_stack_bytes();
16985 unsigned int regno;
16987 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16988 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16989 saved = core_saved;
16991 /* We know that SP will be doubleword aligned on entry, and we must
16992 preserve that condition at any subroutine call. We also require the
16993 soft frame pointer to be doubleword aligned. */
16995 if (TARGET_REALLY_IWMMXT)
16997 /* Check for the call-saved iWMMXt registers. */
16998 for (regno = FIRST_IWMMXT_REGNUM;
16999 regno <= LAST_IWMMXT_REGNUM;
17001 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17005 func_type = arm_current_func_type ();
17006 /* Space for saved VFP registers. */
17007 if (! IS_VOLATILE (func_type)
17008 && TARGET_HARD_FLOAT && TARGET_VFP)
17009 saved += arm_get_vfp_saved_size ();
17011 else /* TARGET_THUMB1 */
17013 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17014 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17015 saved = core_saved;
17016 if (TARGET_BACKTRACE)
17020 /* Saved registers include the stack frame. */
17021 offsets->saved_regs = offsets->saved_args + saved +
17022 arm_compute_static_chain_stack_bytes();
17023 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17024 /* A leaf function does not need any stack alignment if it has nothing
17026 if (leaf && frame_size == 0
17027 /* However if it calls alloca(), we have a dynamically allocated
17028 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17029 && ! cfun->calls_alloca)
17031 offsets->outgoing_args = offsets->soft_frame;
17032 offsets->locals_base = offsets->soft_frame;
17036 /* Ensure SFP has the correct alignment. */
17037 if (ARM_DOUBLEWORD_ALIGN
17038 && (offsets->soft_frame & 7))
17040 offsets->soft_frame += 4;
17041 /* Try to align stack by pushing an extra reg. Don't bother doing this
17042 when there is a stack frame as the alignment will be rolled into
17043 the normal stack adjustment. */
17044 if (frame_size + crtl->outgoing_args_size == 0)
17048 /* If it is safe to use r3, then do so. This sometimes
17049 generates better code on Thumb-2 by avoiding the need to
17050 use 32-bit push/pop instructions. */
17051 if (! any_sibcall_uses_r3 ()
17052 && arm_size_return_regs () <= 12
17053 && (offsets->saved_regs_mask & (1 << 3)) == 0)
17058 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17060 /* Avoid fixed registers; they may be changed at
17061 arbitrary times so it's unsafe to restore them
17062 during the epilogue. */
17064 && (offsets->saved_regs_mask & (1 << i)) == 0)
17073 offsets->saved_regs += 4;
17074 offsets->saved_regs_mask |= (1 << reg);
17079 offsets->locals_base = offsets->soft_frame + frame_size;
17080 offsets->outgoing_args = (offsets->locals_base
17081 + crtl->outgoing_args_size);
17083 if (ARM_DOUBLEWORD_ALIGN)
17085 /* Ensure SP remains doubleword aligned. */
17086 if (offsets->outgoing_args & 7)
17087 offsets->outgoing_args += 4;
17088 gcc_assert (!(offsets->outgoing_args & 7));
17095 /* Calculate the relative offsets for the different stack pointers. Positive
17096 offsets are in the direction of stack growth. */
17099 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17101 arm_stack_offsets *offsets;
17103 offsets = arm_get_frame_offsets ();
17105 /* OK, now we have enough information to compute the distances.
17106 There must be an entry in these switch tables for each pair
17107 of registers in ELIMINABLE_REGS, even if some of the entries
17108 seem to be redundant or useless. */
17111 case ARG_POINTER_REGNUM:
17114 case THUMB_HARD_FRAME_POINTER_REGNUM:
17117 case FRAME_POINTER_REGNUM:
17118 /* This is the reverse of the soft frame pointer
17119 to hard frame pointer elimination below. */
17120 return offsets->soft_frame - offsets->saved_args;
17122 case ARM_HARD_FRAME_POINTER_REGNUM:
17123 /* This is only non-zero in the case where the static chain register
17124 is stored above the frame. */
17125 return offsets->frame - offsets->saved_args - 4;
17127 case STACK_POINTER_REGNUM:
17128 /* If nothing has been pushed on the stack at all
17129 then this will return -4. This *is* correct! */
17130 return offsets->outgoing_args - (offsets->saved_args + 4);
17133 gcc_unreachable ();
17135 gcc_unreachable ();
17137 case FRAME_POINTER_REGNUM:
17140 case THUMB_HARD_FRAME_POINTER_REGNUM:
17143 case ARM_HARD_FRAME_POINTER_REGNUM:
17144 /* The hard frame pointer points to the top entry in the
17145 stack frame. The soft frame pointer to the bottom entry
17146 in the stack frame. If there is no stack frame at all,
17147 then they are identical. */
17149 return offsets->frame - offsets->soft_frame;
17151 case STACK_POINTER_REGNUM:
17152 return offsets->outgoing_args - offsets->soft_frame;
17155 gcc_unreachable ();
17157 gcc_unreachable ();
17160 /* You cannot eliminate from the stack pointer.
17161 In theory you could eliminate from the hard frame
17162 pointer to the stack pointer, but this will never
17163 happen, since if a stack frame is not needed the
17164 hard frame pointer will never be used. */
17165 gcc_unreachable ();
17169 /* Given FROM and TO register numbers, say whether this elimination is
17170 allowed. Frame pointer elimination is automatically handled.
17172 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
17173 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
17174 pointer, we must eliminate FRAME_POINTER_REGNUM into
17175 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
17176 ARG_POINTER_REGNUM. */
17179 arm_can_eliminate (const int from, const int to)
17181 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
17182 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
17183 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
17184 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
17188 /* Emit RTL to save coprocessor registers on function entry. Returns the
17189 number of bytes pushed. */
17192 arm_save_coproc_regs(void)
17194 int saved_size = 0;
17196 unsigned start_reg;
17199 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
17200 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
17202 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
17203 insn = gen_rtx_MEM (V2SImode, insn);
17204 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
17205 RTX_FRAME_RELATED_P (insn) = 1;
17209 if (TARGET_HARD_FLOAT && TARGET_VFP)
17211 start_reg = FIRST_VFP_REGNUM;
17213 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
17215 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
17216 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
17218 if (start_reg != reg)
17219 saved_size += vfp_emit_fstmd (start_reg,
17220 (reg - start_reg) / 2);
17221 start_reg = reg + 2;
17224 if (start_reg != reg)
17225 saved_size += vfp_emit_fstmd (start_reg,
17226 (reg - start_reg) / 2);
17232 /* Set the Thumb frame pointer from the stack pointer. */
17235 thumb_set_frame_pointer (arm_stack_offsets *offsets)
17237 HOST_WIDE_INT amount;
17240 amount = offsets->outgoing_args - offsets->locals_base;
17242 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17243 stack_pointer_rtx, GEN_INT (amount)));
17246 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
17247 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
17248 expects the first two operands to be the same. */
17251 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17253 hard_frame_pointer_rtx));
17257 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17258 hard_frame_pointer_rtx,
17259 stack_pointer_rtx));
17261 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
17262 plus_constant (Pmode, stack_pointer_rtx, amount));
17263 RTX_FRAME_RELATED_P (dwarf) = 1;
17264 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17267 RTX_FRAME_RELATED_P (insn) = 1;
17270 /* Generate the prologue instructions for entry into an ARM or Thumb-2
17273 arm_expand_prologue (void)
17278 unsigned long live_regs_mask;
17279 unsigned long func_type;
17281 int saved_pretend_args = 0;
17282 int saved_regs = 0;
17283 unsigned HOST_WIDE_INT args_to_push;
17284 arm_stack_offsets *offsets;
17286 func_type = arm_current_func_type ();
17288 /* Naked functions don't have prologues. */
17289 if (IS_NAKED (func_type))
17292 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
17293 args_to_push = crtl->args.pretend_args_size;
17295 /* Compute which register we will have to save onto the stack. */
17296 offsets = arm_get_frame_offsets ();
17297 live_regs_mask = offsets->saved_regs_mask;
17299 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
17301 if (IS_STACKALIGN (func_type))
17305 /* Handle a word-aligned stack pointer. We generate the following:
17310 <save and restore r0 in normal prologue/epilogue>
17314 The unwinder doesn't need to know about the stack realignment.
17315 Just tell it we saved SP in r0. */
17316 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
17318 r0 = gen_rtx_REG (SImode, 0);
17319 r1 = gen_rtx_REG (SImode, 1);
17321 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
17322 RTX_FRAME_RELATED_P (insn) = 1;
17323 add_reg_note (insn, REG_CFA_REGISTER, NULL);
17325 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
17327 /* ??? The CFA changes here, which may cause GDB to conclude that it
17328 has entered a different function. That said, the unwind info is
17329 correct, individually, before and after this instruction because
17330 we've described the save of SP, which will override the default
17331 handling of SP as restoring from the CFA. */
17332 emit_insn (gen_movsi (stack_pointer_rtx, r1));
17335 /* For APCS frames, if IP register is clobbered
17336 when creating frame, save that register in a special
17338 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
17340 if (IS_INTERRUPT (func_type))
17342 /* Interrupt functions must not corrupt any registers.
17343 Creating a frame pointer however, corrupts the IP
17344 register, so we must push it first. */
17345 emit_multi_reg_push (1 << IP_REGNUM);
17347 /* Do not set RTX_FRAME_RELATED_P on this insn.
17348 The dwarf stack unwinding code only wants to see one
17349 stack decrement per function, and this is not it. If
17350 this instruction is labeled as being part of the frame
17351 creation sequence then dwarf2out_frame_debug_expr will
17352 die when it encounters the assignment of IP to FP
17353 later on, since the use of SP here establishes SP as
17354 the CFA register and not IP.
17356 Anyway this instruction is not really part of the stack
17357 frame creation although it is part of the prologue. */
17359 else if (IS_NESTED (func_type))
17361 /* The Static chain register is the same as the IP register
17362 used as a scratch register during stack frame creation.
17363 To get around this need to find somewhere to store IP
17364 whilst the frame is being created. We try the following
17367 1. The last argument register.
17368 2. A slot on the stack above the frame. (This only
17369 works if the function is not a varargs function).
17370 3. Register r3, after pushing the argument registers
17373 Note - we only need to tell the dwarf2 backend about the SP
17374 adjustment in the second variant; the static chain register
17375 doesn't need to be unwound, as it doesn't contain a value
17376 inherited from the caller. */
17378 if (df_regs_ever_live_p (3) == false)
17379 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17380 else if (args_to_push == 0)
17384 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
17387 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
17388 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
17391 /* Just tell the dwarf backend that we adjusted SP. */
17392 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17393 plus_constant (Pmode, stack_pointer_rtx,
17395 RTX_FRAME_RELATED_P (insn) = 1;
17396 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17400 /* Store the args on the stack. */
17401 if (cfun->machine->uses_anonymous_args)
17402 insn = emit_multi_reg_push
17403 ((0xf0 >> (args_to_push / 4)) & 0xf);
17406 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17407 GEN_INT (- args_to_push)));
17409 RTX_FRAME_RELATED_P (insn) = 1;
17411 saved_pretend_args = 1;
17412 fp_offset = args_to_push;
17415 /* Now reuse r3 to preserve IP. */
17416 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17420 insn = emit_set_insn (ip_rtx,
17421 plus_constant (Pmode, stack_pointer_rtx,
17423 RTX_FRAME_RELATED_P (insn) = 1;
17428 /* Push the argument registers, or reserve space for them. */
17429 if (cfun->machine->uses_anonymous_args)
17430 insn = emit_multi_reg_push
17431 ((0xf0 >> (args_to_push / 4)) & 0xf);
17434 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17435 GEN_INT (- args_to_push)));
17436 RTX_FRAME_RELATED_P (insn) = 1;
17439 /* If this is an interrupt service routine, and the link register
17440 is going to be pushed, and we're not generating extra
17441 push of IP (needed when frame is needed and frame layout if apcs),
17442 subtracting four from LR now will mean that the function return
17443 can be done with a single instruction. */
17444 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
17445 && (live_regs_mask & (1 << LR_REGNUM)) != 0
17446 && !(frame_pointer_needed && TARGET_APCS_FRAME)
17449 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
17451 emit_set_insn (lr, plus_constant (SImode, lr, -4));
17454 if (live_regs_mask)
17456 saved_regs += bit_count (live_regs_mask) * 4;
17457 if (optimize_size && !frame_pointer_needed
17458 && saved_regs == offsets->saved_regs - offsets->saved_args)
17460 /* If no coprocessor registers are being pushed and we don't have
17461 to worry about a frame pointer then push extra registers to
17462 create the stack frame. This is done is a way that does not
17463 alter the frame layout, so is independent of the epilogue. */
17467 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17469 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17470 if (frame && n * 4 >= frame)
17473 live_regs_mask |= (1 << n) - 1;
17474 saved_regs += frame;
17479 && current_tune->prefer_ldrd_strd
17480 && !optimize_function_for_size_p (cfun))
17484 thumb2_emit_strd_push (live_regs_mask);
17488 insn = emit_multi_reg_push (live_regs_mask);
17489 RTX_FRAME_RELATED_P (insn) = 1;
17494 insn = emit_multi_reg_push (live_regs_mask);
17495 RTX_FRAME_RELATED_P (insn) = 1;
17499 if (! IS_VOLATILE (func_type))
17500 saved_regs += arm_save_coproc_regs ();
17502 if (frame_pointer_needed && TARGET_ARM)
17504 /* Create the new frame pointer. */
17505 if (TARGET_APCS_FRAME)
17507 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17508 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17509 RTX_FRAME_RELATED_P (insn) = 1;
17511 if (IS_NESTED (func_type))
17513 /* Recover the static chain register. */
17514 if (!df_regs_ever_live_p (3)
17515 || saved_pretend_args)
17516 insn = gen_rtx_REG (SImode, 3);
17517 else /* if (crtl->args.pretend_args_size == 0) */
17519 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
17520 insn = gen_frame_mem (SImode, insn);
17522 emit_set_insn (ip_rtx, insn);
17523 /* Add a USE to stop propagate_one_insn() from barfing. */
17524 emit_insn (gen_force_register_use (ip_rtx));
17529 insn = GEN_INT (saved_regs - 4);
17530 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17531 stack_pointer_rtx, insn));
17532 RTX_FRAME_RELATED_P (insn) = 1;
17536 if (flag_stack_usage_info)
17537 current_function_static_stack_size
17538 = offsets->outgoing_args - offsets->saved_args;
17540 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17542 /* This add can produce multiple insns for a large constant, so we
17543 need to get tricky. */
17544 rtx last = get_last_insn ();
17546 amount = GEN_INT (offsets->saved_args + saved_regs
17547 - offsets->outgoing_args);
17549 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17553 last = last ? NEXT_INSN (last) : get_insns ();
17554 RTX_FRAME_RELATED_P (last) = 1;
17556 while (last != insn);
17558 /* If the frame pointer is needed, emit a special barrier that
17559 will prevent the scheduler from moving stores to the frame
17560 before the stack adjustment. */
17561 if (frame_pointer_needed)
17562 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17563 hard_frame_pointer_rtx));
17567 if (frame_pointer_needed && TARGET_THUMB2)
17568 thumb_set_frame_pointer (offsets);
17570 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17572 unsigned long mask;
17574 mask = live_regs_mask;
17575 mask &= THUMB2_WORK_REGS;
17576 if (!IS_NESTED (func_type))
17577 mask |= (1 << IP_REGNUM);
17578 arm_load_pic_register (mask);
17581 /* If we are profiling, make sure no instructions are scheduled before
17582 the call to mcount. Similarly if the user has requested no
17583 scheduling in the prolog. Similarly if we want non-call exceptions
17584 using the EABI unwinder, to prevent faulting instructions from being
17585 swapped with a stack adjustment. */
17586 if (crtl->profile || !TARGET_SCHED_PROLOG
17587 || (arm_except_unwind_info (&global_options) == UI_TARGET
17588 && cfun->can_throw_non_call_exceptions))
17589 emit_insn (gen_blockage ());
17591 /* If the link register is being kept alive, with the return address in it,
17592 then make sure that it does not get reused by the ce2 pass. */
17593 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17594 cfun->machine->lr_save_eliminated = 1;
17597 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17599 arm_print_condition (FILE *stream)
17601 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17603 /* Branch conversion is not implemented for Thumb-2. */
17606 output_operand_lossage ("predicated Thumb instruction");
17609 if (current_insn_predicate != NULL)
17611 output_operand_lossage
17612 ("predicated instruction in conditional sequence");
17616 fputs (arm_condition_codes[arm_current_cc], stream);
17618 else if (current_insn_predicate)
17620 enum arm_cond_code code;
17624 output_operand_lossage ("predicated Thumb instruction");
17628 code = get_arm_condition_code (current_insn_predicate);
17629 fputs (arm_condition_codes[code], stream);
17634 /* If CODE is 'd', then the X is a condition operand and the instruction
17635 should only be executed if the condition is true.
17636 if CODE is 'D', then the X is a condition operand and the instruction
17637 should only be executed if the condition is false: however, if the mode
17638 of the comparison is CCFPEmode, then always execute the instruction -- we
17639 do this because in these circumstances !GE does not necessarily imply LT;
17640 in these cases the instruction pattern will take care to make sure that
17641 an instruction containing %d will follow, thereby undoing the effects of
17642 doing this instruction unconditionally.
17643 If CODE is 'N' then X is a floating point operand that must be negated
17645 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17646 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17648 arm_print_operand (FILE *stream, rtx x, int code)
17653 fputs (ASM_COMMENT_START, stream);
17657 fputs (user_label_prefix, stream);
17661 fputs (REGISTER_PREFIX, stream);
17665 arm_print_condition (stream);
17669 /* Nothing in unified syntax, otherwise the current condition code. */
17670 if (!TARGET_UNIFIED_ASM)
17671 arm_print_condition (stream);
17675 /* The current condition code in unified syntax, otherwise nothing. */
17676 if (TARGET_UNIFIED_ASM)
17677 arm_print_condition (stream);
17681 /* The current condition code for a condition code setting instruction.
17682 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17683 if (TARGET_UNIFIED_ASM)
17685 fputc('s', stream);
17686 arm_print_condition (stream);
17690 arm_print_condition (stream);
17691 fputc('s', stream);
17696 /* If the instruction is conditionally executed then print
17697 the current condition code, otherwise print 's'. */
17698 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17699 if (current_insn_predicate)
17700 arm_print_condition (stream);
17702 fputc('s', stream);
17705 /* %# is a "break" sequence. It doesn't output anything, but is used to
17706 separate e.g. operand numbers from following text, if that text consists
17707 of further digits which we don't want to be part of the operand
17715 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17716 r = real_value_negate (&r);
17717 fprintf (stream, "%s", fp_const_from_val (&r));
17721 /* An integer or symbol address without a preceding # sign. */
17723 switch (GET_CODE (x))
17726 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17730 output_addr_const (stream, x);
17734 if (GET_CODE (XEXP (x, 0)) == PLUS
17735 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17737 output_addr_const (stream, x);
17740 /* Fall through. */
17743 output_operand_lossage ("Unsupported operand for code '%c'", code);
17747 /* An integer that we want to print in HEX. */
17749 switch (GET_CODE (x))
17752 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17756 output_operand_lossage ("Unsupported operand for code '%c'", code);
17761 if (CONST_INT_P (x))
17764 val = ARM_SIGN_EXTEND (~INTVAL (x));
17765 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17769 putc ('~', stream);
17770 output_addr_const (stream, x);
17775 /* The low 16 bits of an immediate constant. */
17776 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17780 fprintf (stream, "%s", arithmetic_instr (x, 1));
17784 fprintf (stream, "%s", arithmetic_instr (x, 0));
17792 shift = shift_op (x, &val);
17796 fprintf (stream, ", %s ", shift);
17798 arm_print_operand (stream, XEXP (x, 1), 0);
17800 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17805 /* An explanation of the 'Q', 'R' and 'H' register operands:
17807 In a pair of registers containing a DI or DF value the 'Q'
17808 operand returns the register number of the register containing
17809 the least significant part of the value. The 'R' operand returns
17810 the register number of the register containing the most
17811 significant part of the value.
17813 The 'H' operand returns the higher of the two register numbers.
17814 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17815 same as the 'Q' operand, since the most significant part of the
17816 value is held in the lower number register. The reverse is true
17817 on systems where WORDS_BIG_ENDIAN is false.
17819 The purpose of these operands is to distinguish between cases
17820 where the endian-ness of the values is important (for example
17821 when they are added together), and cases where the endian-ness
17822 is irrelevant, but the order of register operations is important.
17823 For example when loading a value from memory into a register
17824 pair, the endian-ness does not matter. Provided that the value
17825 from the lower memory address is put into the lower numbered
17826 register, and the value from the higher address is put into the
17827 higher numbered register, the load will work regardless of whether
17828 the value being loaded is big-wordian or little-wordian. The
17829 order of the two register loads can matter however, if the address
17830 of the memory location is actually held in one of the registers
17831 being overwritten by the load.
17833 The 'Q' and 'R' constraints are also available for 64-bit
17836 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17838 rtx part = gen_lowpart (SImode, x);
17839 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17843 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17845 output_operand_lossage ("invalid operand for code '%c'", code);
17849 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17853 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17855 enum machine_mode mode = GET_MODE (x);
17858 if (mode == VOIDmode)
17860 part = gen_highpart_mode (SImode, mode, x);
17861 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17865 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17867 output_operand_lossage ("invalid operand for code '%c'", code);
17871 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17875 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17877 output_operand_lossage ("invalid operand for code '%c'", code);
17881 asm_fprintf (stream, "%r", REGNO (x) + 1);
17885 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17887 output_operand_lossage ("invalid operand for code '%c'", code);
17891 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17895 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17897 output_operand_lossage ("invalid operand for code '%c'", code);
17901 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17905 asm_fprintf (stream, "%r",
17906 REG_P (XEXP (x, 0))
17907 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17911 asm_fprintf (stream, "{%r-%r}",
17913 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17916 /* Like 'M', but writing doubleword vector registers, for use by Neon
17920 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17921 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17923 asm_fprintf (stream, "{d%d}", regno);
17925 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17930 /* CONST_TRUE_RTX means always -- that's the default. */
17931 if (x == const_true_rtx)
17934 if (!COMPARISON_P (x))
17936 output_operand_lossage ("invalid operand for code '%c'", code);
17940 fputs (arm_condition_codes[get_arm_condition_code (x)],
17945 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17946 want to do that. */
17947 if (x == const_true_rtx)
17949 output_operand_lossage ("instruction never executed");
17952 if (!COMPARISON_P (x))
17954 output_operand_lossage ("invalid operand for code '%c'", code);
17958 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17959 (get_arm_condition_code (x))],
17969 /* Former Maverick support, removed after GCC-4.7. */
17970 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17975 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17976 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17977 /* Bad value for wCG register number. */
17979 output_operand_lossage ("invalid operand for code '%c'", code);
17984 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17987 /* Print an iWMMXt control register name. */
17989 if (!CONST_INT_P (x)
17991 || INTVAL (x) >= 16)
17992 /* Bad value for wC register number. */
17994 output_operand_lossage ("invalid operand for code '%c'", code);
18000 static const char * wc_reg_names [16] =
18002 "wCID", "wCon", "wCSSF", "wCASF",
18003 "wC4", "wC5", "wC6", "wC7",
18004 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18005 "wC12", "wC13", "wC14", "wC15"
18008 fputs (wc_reg_names [INTVAL (x)], stream);
18012 /* Print the high single-precision register of a VFP double-precision
18016 int mode = GET_MODE (x);
18019 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18021 output_operand_lossage ("invalid operand for code '%c'", code);
18026 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18028 output_operand_lossage ("invalid operand for code '%c'", code);
18032 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18036 /* Print a VFP/Neon double precision or quad precision register name. */
18040 int mode = GET_MODE (x);
18041 int is_quad = (code == 'q');
18044 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18046 output_operand_lossage ("invalid operand for code '%c'", code);
18051 || !IS_VFP_REGNUM (REGNO (x)))
18053 output_operand_lossage ("invalid operand for code '%c'", code);
18058 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18059 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18061 output_operand_lossage ("invalid operand for code '%c'", code);
18065 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18066 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18070 /* These two codes print the low/high doubleword register of a Neon quad
18071 register, respectively. For pair-structure types, can also print
18072 low/high quadword registers. */
18076 int mode = GET_MODE (x);
18079 if ((GET_MODE_SIZE (mode) != 16
18080 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18082 output_operand_lossage ("invalid operand for code '%c'", code);
18087 if (!NEON_REGNO_OK_FOR_QUAD (regno))
18089 output_operand_lossage ("invalid operand for code '%c'", code);
18093 if (GET_MODE_SIZE (mode) == 16)
18094 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18095 + (code == 'f' ? 1 : 0));
18097 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
18098 + (code == 'f' ? 1 : 0));
18102 /* Print a VFPv3 floating-point constant, represented as an integer
18106 int index = vfp3_const_double_index (x);
18107 gcc_assert (index != -1);
18108 fprintf (stream, "%d", index);
18112 /* Print bits representing opcode features for Neon.
18114 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
18115 and polynomials as unsigned.
18117 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
18119 Bit 2 is 1 for rounding functions, 0 otherwise. */
18121 /* Identify the type as 's', 'u', 'p' or 'f'. */
18124 HOST_WIDE_INT bits = INTVAL (x);
18125 fputc ("uspf"[bits & 3], stream);
18129 /* Likewise, but signed and unsigned integers are both 'i'. */
18132 HOST_WIDE_INT bits = INTVAL (x);
18133 fputc ("iipf"[bits & 3], stream);
18137 /* As for 'T', but emit 'u' instead of 'p'. */
18140 HOST_WIDE_INT bits = INTVAL (x);
18141 fputc ("usuf"[bits & 3], stream);
18145 /* Bit 2: rounding (vs none). */
18148 HOST_WIDE_INT bits = INTVAL (x);
18149 fputs ((bits & 4) != 0 ? "r" : "", stream);
18153 /* Memory operand for vld1/vst1 instruction. */
18157 bool postinc = FALSE;
18158 unsigned align, memsize, align_bits;
18160 gcc_assert (MEM_P (x));
18161 addr = XEXP (x, 0);
18162 if (GET_CODE (addr) == POST_INC)
18165 addr = XEXP (addr, 0);
18167 asm_fprintf (stream, "[%r", REGNO (addr));
18169 /* We know the alignment of this access, so we can emit a hint in the
18170 instruction (for some alignments) as an aid to the memory subsystem
18172 align = MEM_ALIGN (x) >> 3;
18173 memsize = MEM_SIZE (x);
18175 /* Only certain alignment specifiers are supported by the hardware. */
18176 if (memsize == 32 && (align % 32) == 0)
18178 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
18180 else if (memsize >= 8 && (align % 8) == 0)
18185 if (align_bits != 0)
18186 asm_fprintf (stream, ":%d", align_bits);
18188 asm_fprintf (stream, "]");
18191 fputs("!", stream);
18199 gcc_assert (MEM_P (x));
18200 addr = XEXP (x, 0);
18201 gcc_assert (REG_P (addr));
18202 asm_fprintf (stream, "[%r]", REGNO (addr));
18206 /* Translate an S register number into a D register number and element index. */
18209 int mode = GET_MODE (x);
18212 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
18214 output_operand_lossage ("invalid operand for code '%c'", code);
18219 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18221 output_operand_lossage ("invalid operand for code '%c'", code);
18225 regno = regno - FIRST_VFP_REGNUM;
18226 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
18231 gcc_assert (CONST_DOUBLE_P (x));
18232 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
18235 /* Register specifier for vld1.16/vst1.16. Translate the S register
18236 number into a D register number and element index. */
18239 int mode = GET_MODE (x);
18242 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
18244 output_operand_lossage ("invalid operand for code '%c'", code);
18249 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18251 output_operand_lossage ("invalid operand for code '%c'", code);
18255 regno = regno - FIRST_VFP_REGNUM;
18256 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
18263 output_operand_lossage ("missing operand");
18267 switch (GET_CODE (x))
18270 asm_fprintf (stream, "%r", REGNO (x));
18274 output_memory_reference_mode = GET_MODE (x);
18275 output_address (XEXP (x, 0));
18282 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
18283 sizeof (fpstr), 0, 1);
18284 fprintf (stream, "#%s", fpstr);
18287 fprintf (stream, "#%s", fp_immediate_constant (x));
18291 gcc_assert (GET_CODE (x) != NEG);
18292 fputc ('#', stream);
18293 if (GET_CODE (x) == HIGH)
18295 fputs (":lower16:", stream);
18299 output_addr_const (stream, x);
18305 /* Target hook for printing a memory address. */
18307 arm_print_operand_address (FILE *stream, rtx x)
18311 int is_minus = GET_CODE (x) == MINUS;
18314 asm_fprintf (stream, "[%r]", REGNO (x));
18315 else if (GET_CODE (x) == PLUS || is_minus)
18317 rtx base = XEXP (x, 0);
18318 rtx index = XEXP (x, 1);
18319 HOST_WIDE_INT offset = 0;
18321 || (REG_P (index) && REGNO (index) == SP_REGNUM))
18323 /* Ensure that BASE is a register. */
18324 /* (one of them must be). */
18325 /* Also ensure the SP is not used as in index register. */
18330 switch (GET_CODE (index))
18333 offset = INTVAL (index);
18336 asm_fprintf (stream, "[%r, #%wd]",
18337 REGNO (base), offset);
18341 asm_fprintf (stream, "[%r, %s%r]",
18342 REGNO (base), is_minus ? "-" : "",
18352 asm_fprintf (stream, "[%r, %s%r",
18353 REGNO (base), is_minus ? "-" : "",
18354 REGNO (XEXP (index, 0)));
18355 arm_print_operand (stream, index, 'S');
18356 fputs ("]", stream);
18361 gcc_unreachable ();
18364 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
18365 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
18367 extern enum machine_mode output_memory_reference_mode;
18369 gcc_assert (REG_P (XEXP (x, 0)));
18371 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
18372 asm_fprintf (stream, "[%r, #%s%d]!",
18373 REGNO (XEXP (x, 0)),
18374 GET_CODE (x) == PRE_DEC ? "-" : "",
18375 GET_MODE_SIZE (output_memory_reference_mode));
18377 asm_fprintf (stream, "[%r], #%s%d",
18378 REGNO (XEXP (x, 0)),
18379 GET_CODE (x) == POST_DEC ? "-" : "",
18380 GET_MODE_SIZE (output_memory_reference_mode));
18382 else if (GET_CODE (x) == PRE_MODIFY)
18384 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
18385 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18386 asm_fprintf (stream, "#%wd]!",
18387 INTVAL (XEXP (XEXP (x, 1), 1)));
18389 asm_fprintf (stream, "%r]!",
18390 REGNO (XEXP (XEXP (x, 1), 1)));
18392 else if (GET_CODE (x) == POST_MODIFY)
18394 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
18395 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18396 asm_fprintf (stream, "#%wd",
18397 INTVAL (XEXP (XEXP (x, 1), 1)));
18399 asm_fprintf (stream, "%r",
18400 REGNO (XEXP (XEXP (x, 1), 1)));
18402 else output_addr_const (stream, x);
18407 asm_fprintf (stream, "[%r]", REGNO (x));
18408 else if (GET_CODE (x) == POST_INC)
18409 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
18410 else if (GET_CODE (x) == PLUS)
18412 gcc_assert (REG_P (XEXP (x, 0)));
18413 if (CONST_INT_P (XEXP (x, 1)))
18414 asm_fprintf (stream, "[%r, #%wd]",
18415 REGNO (XEXP (x, 0)),
18416 INTVAL (XEXP (x, 1)));
18418 asm_fprintf (stream, "[%r, %r]",
18419 REGNO (XEXP (x, 0)),
18420 REGNO (XEXP (x, 1)));
18423 output_addr_const (stream, x);
18427 /* Target hook for indicating whether a punctuation character for
18428 TARGET_PRINT_OPERAND is valid. */
18430 arm_print_operand_punct_valid_p (unsigned char code)
18432 return (code == '@' || code == '|' || code == '.'
18433 || code == '(' || code == ')' || code == '#'
18434 || (TARGET_32BIT && (code == '?'))
18435 || (TARGET_THUMB2 && (code == '!'))
18436 || (TARGET_THUMB && (code == '_')));
18439 /* Target hook for assembling integer objects. The ARM version needs to
18440 handle word-sized values specially. */
18442 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
18444 enum machine_mode mode;
18446 if (size == UNITS_PER_WORD && aligned_p)
18448 fputs ("\t.word\t", asm_out_file);
18449 output_addr_const (asm_out_file, x);
18451 /* Mark symbols as position independent. We only do this in the
18452 .text segment, not in the .data segment. */
18453 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18454 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18456 /* See legitimize_pic_address for an explanation of the
18457 TARGET_VXWORKS_RTP check. */
18458 if (TARGET_VXWORKS_RTP
18459 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18460 fputs ("(GOT)", asm_out_file);
18462 fputs ("(GOTOFF)", asm_out_file);
18464 fputc ('\n', asm_out_file);
18468 mode = GET_MODE (x);
18470 if (arm_vector_mode_supported_p (mode))
18474 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18476 units = CONST_VECTOR_NUNITS (x);
18477 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18479 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18480 for (i = 0; i < units; i++)
18482 rtx elt = CONST_VECTOR_ELT (x, i);
18484 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18487 for (i = 0; i < units; i++)
18489 rtx elt = CONST_VECTOR_ELT (x, i);
18490 REAL_VALUE_TYPE rval;
18492 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18495 (rval, GET_MODE_INNER (mode),
18496 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18502 return default_assemble_integer (x, size, aligned_p);
18506 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18510 if (!TARGET_AAPCS_BASED)
18513 default_named_section_asm_out_constructor
18514 : default_named_section_asm_out_destructor) (symbol, priority);
18518 /* Put these in the .init_array section, using a special relocation. */
18519 if (priority != DEFAULT_INIT_PRIORITY)
18522 sprintf (buf, "%s.%.5u",
18523 is_ctor ? ".init_array" : ".fini_array",
18525 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18532 switch_to_section (s);
18533 assemble_align (POINTER_SIZE);
18534 fputs ("\t.word\t", asm_out_file);
18535 output_addr_const (asm_out_file, symbol);
18536 fputs ("(target1)\n", asm_out_file);
18539 /* Add a function to the list of static constructors. */
18542 arm_elf_asm_constructor (rtx symbol, int priority)
18544 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18547 /* Add a function to the list of static destructors. */
18550 arm_elf_asm_destructor (rtx symbol, int priority)
18552 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18555 /* A finite state machine takes care of noticing whether or not instructions
18556 can be conditionally executed, and thus decrease execution time and code
18557 size by deleting branch instructions. The fsm is controlled by
18558 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18560 /* The state of the fsm controlling condition codes are:
18561 0: normal, do nothing special
18562 1: make ASM_OUTPUT_OPCODE not output this instruction
18563 2: make ASM_OUTPUT_OPCODE not output this instruction
18564 3: make instructions conditional
18565 4: make instructions conditional
18567 State transitions (state->state by whom under condition):
18568 0 -> 1 final_prescan_insn if the `target' is a label
18569 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18570 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18571 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18572 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18573 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18574 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18575 (the target insn is arm_target_insn).
18577 If the jump clobbers the conditions then we use states 2 and 4.
18579 A similar thing can be done with conditional return insns.
18581 XXX In case the `target' is an unconditional branch, this conditionalising
18582 of the instructions always reduces code size, but not always execution
18583 time. But then, I want to reduce the code size to somewhere near what
18584 /bin/cc produces. */
18586 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18587 instructions. When a COND_EXEC instruction is seen the subsequent
18588 instructions are scanned so that multiple conditional instructions can be
18589 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18590 specify the length and true/false mask for the IT block. These will be
18591 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18593 /* Returns the index of the ARM condition code string in
18594 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18595 COMPARISON should be an rtx like `(eq (...) (...))'. */
18598 maybe_get_arm_condition_code (rtx comparison)
18600 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18601 enum arm_cond_code code;
18602 enum rtx_code comp_code = GET_CODE (comparison);
18604 if (GET_MODE_CLASS (mode) != MODE_CC)
18605 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18606 XEXP (comparison, 1));
18610 case CC_DNEmode: code = ARM_NE; goto dominance;
18611 case CC_DEQmode: code = ARM_EQ; goto dominance;
18612 case CC_DGEmode: code = ARM_GE; goto dominance;
18613 case CC_DGTmode: code = ARM_GT; goto dominance;
18614 case CC_DLEmode: code = ARM_LE; goto dominance;
18615 case CC_DLTmode: code = ARM_LT; goto dominance;
18616 case CC_DGEUmode: code = ARM_CS; goto dominance;
18617 case CC_DGTUmode: code = ARM_HI; goto dominance;
18618 case CC_DLEUmode: code = ARM_LS; goto dominance;
18619 case CC_DLTUmode: code = ARM_CC;
18622 if (comp_code == EQ)
18623 return ARM_INVERSE_CONDITION_CODE (code);
18624 if (comp_code == NE)
18631 case NE: return ARM_NE;
18632 case EQ: return ARM_EQ;
18633 case GE: return ARM_PL;
18634 case LT: return ARM_MI;
18635 default: return ARM_NV;
18641 case NE: return ARM_NE;
18642 case EQ: return ARM_EQ;
18643 default: return ARM_NV;
18649 case NE: return ARM_MI;
18650 case EQ: return ARM_PL;
18651 default: return ARM_NV;
18656 /* We can handle all cases except UNEQ and LTGT. */
18659 case GE: return ARM_GE;
18660 case GT: return ARM_GT;
18661 case LE: return ARM_LS;
18662 case LT: return ARM_MI;
18663 case NE: return ARM_NE;
18664 case EQ: return ARM_EQ;
18665 case ORDERED: return ARM_VC;
18666 case UNORDERED: return ARM_VS;
18667 case UNLT: return ARM_LT;
18668 case UNLE: return ARM_LE;
18669 case UNGT: return ARM_HI;
18670 case UNGE: return ARM_PL;
18671 /* UNEQ and LTGT do not have a representation. */
18672 case UNEQ: /* Fall through. */
18673 case LTGT: /* Fall through. */
18674 default: return ARM_NV;
18680 case NE: return ARM_NE;
18681 case EQ: return ARM_EQ;
18682 case GE: return ARM_LE;
18683 case GT: return ARM_LT;
18684 case LE: return ARM_GE;
18685 case LT: return ARM_GT;
18686 case GEU: return ARM_LS;
18687 case GTU: return ARM_CC;
18688 case LEU: return ARM_CS;
18689 case LTU: return ARM_HI;
18690 default: return ARM_NV;
18696 case LTU: return ARM_CS;
18697 case GEU: return ARM_CC;
18698 default: return ARM_NV;
18704 case NE: return ARM_NE;
18705 case EQ: return ARM_EQ;
18706 case GEU: return ARM_CS;
18707 case GTU: return ARM_HI;
18708 case LEU: return ARM_LS;
18709 case LTU: return ARM_CC;
18710 default: return ARM_NV;
18716 case GE: return ARM_GE;
18717 case LT: return ARM_LT;
18718 case GEU: return ARM_CS;
18719 case LTU: return ARM_CC;
18720 default: return ARM_NV;
18726 case NE: return ARM_NE;
18727 case EQ: return ARM_EQ;
18728 case GE: return ARM_GE;
18729 case GT: return ARM_GT;
18730 case LE: return ARM_LE;
18731 case LT: return ARM_LT;
18732 case GEU: return ARM_CS;
18733 case GTU: return ARM_HI;
18734 case LEU: return ARM_LS;
18735 case LTU: return ARM_CC;
18736 default: return ARM_NV;
18739 default: gcc_unreachable ();
18743 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18744 static enum arm_cond_code
18745 get_arm_condition_code (rtx comparison)
18747 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18748 gcc_assert (code != ARM_NV);
18752 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18755 thumb2_final_prescan_insn (rtx insn)
18757 rtx first_insn = insn;
18758 rtx body = PATTERN (insn);
18760 enum arm_cond_code code;
18764 /* Remove the previous insn from the count of insns to be output. */
18765 if (arm_condexec_count)
18766 arm_condexec_count--;
18768 /* Nothing to do if we are already inside a conditional block. */
18769 if (arm_condexec_count)
18772 if (GET_CODE (body) != COND_EXEC)
18775 /* Conditional jumps are implemented directly. */
18779 predicate = COND_EXEC_TEST (body);
18780 arm_current_cc = get_arm_condition_code (predicate);
18782 n = get_attr_ce_count (insn);
18783 arm_condexec_count = 1;
18784 arm_condexec_mask = (1 << n) - 1;
18785 arm_condexec_masklen = n;
18786 /* See if subsequent instructions can be combined into the same block. */
18789 insn = next_nonnote_insn (insn);
18791 /* Jumping into the middle of an IT block is illegal, so a label or
18792 barrier terminates the block. */
18793 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18796 body = PATTERN (insn);
18797 /* USE and CLOBBER aren't really insns, so just skip them. */
18798 if (GET_CODE (body) == USE
18799 || GET_CODE (body) == CLOBBER)
18802 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18803 if (GET_CODE (body) != COND_EXEC)
18805 /* Allow up to 4 conditionally executed instructions in a block. */
18806 n = get_attr_ce_count (insn);
18807 if (arm_condexec_masklen + n > 4)
18810 predicate = COND_EXEC_TEST (body);
18811 code = get_arm_condition_code (predicate);
18812 mask = (1 << n) - 1;
18813 if (arm_current_cc == code)
18814 arm_condexec_mask |= (mask << arm_condexec_masklen);
18815 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18818 arm_condexec_count++;
18819 arm_condexec_masklen += n;
18821 /* A jump must be the last instruction in a conditional block. */
18825 /* Restore recog_data (getting the attributes of other insns can
18826 destroy this array, but final.c assumes that it remains intact
18827 across this call). */
18828 extract_constrain_insn_cached (first_insn);
18832 arm_final_prescan_insn (rtx insn)
18834 /* BODY will hold the body of INSN. */
18835 rtx body = PATTERN (insn);
18837 /* This will be 1 if trying to repeat the trick, and things need to be
18838 reversed if it appears to fail. */
18841 /* If we start with a return insn, we only succeed if we find another one. */
18842 int seeking_return = 0;
18843 enum rtx_code return_code = UNKNOWN;
18845 /* START_INSN will hold the insn from where we start looking. This is the
18846 first insn after the following code_label if REVERSE is true. */
18847 rtx start_insn = insn;
18849 /* If in state 4, check if the target branch is reached, in order to
18850 change back to state 0. */
18851 if (arm_ccfsm_state == 4)
18853 if (insn == arm_target_insn)
18855 arm_target_insn = NULL;
18856 arm_ccfsm_state = 0;
18861 /* If in state 3, it is possible to repeat the trick, if this insn is an
18862 unconditional branch to a label, and immediately following this branch
18863 is the previous target label which is only used once, and the label this
18864 branch jumps to is not too far off. */
18865 if (arm_ccfsm_state == 3)
18867 if (simplejump_p (insn))
18869 start_insn = next_nonnote_insn (start_insn);
18870 if (BARRIER_P (start_insn))
18872 /* XXX Isn't this always a barrier? */
18873 start_insn = next_nonnote_insn (start_insn);
18875 if (LABEL_P (start_insn)
18876 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18877 && LABEL_NUSES (start_insn) == 1)
18882 else if (ANY_RETURN_P (body))
18884 start_insn = next_nonnote_insn (start_insn);
18885 if (BARRIER_P (start_insn))
18886 start_insn = next_nonnote_insn (start_insn);
18887 if (LABEL_P (start_insn)
18888 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18889 && LABEL_NUSES (start_insn) == 1)
18892 seeking_return = 1;
18893 return_code = GET_CODE (body);
18902 gcc_assert (!arm_ccfsm_state || reverse);
18903 if (!JUMP_P (insn))
18906 /* This jump might be paralleled with a clobber of the condition codes
18907 the jump should always come first */
18908 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18909 body = XVECEXP (body, 0, 0);
18912 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18913 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18916 int fail = FALSE, succeed = FALSE;
18917 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18918 int then_not_else = TRUE;
18919 rtx this_insn = start_insn, label = 0;
18921 /* Register the insn jumped to. */
18924 if (!seeking_return)
18925 label = XEXP (SET_SRC (body), 0);
18927 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18928 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18929 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18931 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18932 then_not_else = FALSE;
18934 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18936 seeking_return = 1;
18937 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18939 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18941 seeking_return = 1;
18942 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18943 then_not_else = FALSE;
18946 gcc_unreachable ();
18948 /* See how many insns this branch skips, and what kind of insns. If all
18949 insns are okay, and the label or unconditional branch to the same
18950 label is not too far away, succeed. */
18951 for (insns_skipped = 0;
18952 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18956 this_insn = next_nonnote_insn (this_insn);
18960 switch (GET_CODE (this_insn))
18963 /* Succeed if it is the target label, otherwise fail since
18964 control falls in from somewhere else. */
18965 if (this_insn == label)
18967 arm_ccfsm_state = 1;
18975 /* Succeed if the following insn is the target label.
18977 If return insns are used then the last insn in a function
18978 will be a barrier. */
18979 this_insn = next_nonnote_insn (this_insn);
18980 if (this_insn && this_insn == label)
18982 arm_ccfsm_state = 1;
18990 /* The AAPCS says that conditional calls should not be
18991 used since they make interworking inefficient (the
18992 linker can't transform BL<cond> into BLX). That's
18993 only a problem if the machine has BLX. */
19000 /* Succeed if the following insn is the target label, or
19001 if the following two insns are a barrier and the
19003 this_insn = next_nonnote_insn (this_insn);
19004 if (this_insn && BARRIER_P (this_insn))
19005 this_insn = next_nonnote_insn (this_insn);
19007 if (this_insn && this_insn == label
19008 && insns_skipped < max_insns_skipped)
19010 arm_ccfsm_state = 1;
19018 /* If this is an unconditional branch to the same label, succeed.
19019 If it is to another label, do nothing. If it is conditional,
19021 /* XXX Probably, the tests for SET and the PC are
19024 scanbody = PATTERN (this_insn);
19025 if (GET_CODE (scanbody) == SET
19026 && GET_CODE (SET_DEST (scanbody)) == PC)
19028 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19029 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19031 arm_ccfsm_state = 2;
19034 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19037 /* Fail if a conditional return is undesirable (e.g. on a
19038 StrongARM), but still allow this if optimizing for size. */
19039 else if (GET_CODE (scanbody) == return_code
19040 && !use_return_insn (TRUE, NULL)
19043 else if (GET_CODE (scanbody) == return_code)
19045 arm_ccfsm_state = 2;
19048 else if (GET_CODE (scanbody) == PARALLEL)
19050 switch (get_attr_conds (this_insn))
19060 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
19065 /* Instructions using or affecting the condition codes make it
19067 scanbody = PATTERN (this_insn);
19068 if (!(GET_CODE (scanbody) == SET
19069 || GET_CODE (scanbody) == PARALLEL)
19070 || get_attr_conds (this_insn) != CONDS_NOCOND)
19080 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19081 arm_target_label = CODE_LABEL_NUMBER (label);
19084 gcc_assert (seeking_return || arm_ccfsm_state == 2);
19086 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19088 this_insn = next_nonnote_insn (this_insn);
19089 gcc_assert (!this_insn
19090 || (!BARRIER_P (this_insn)
19091 && !LABEL_P (this_insn)));
19095 /* Oh, dear! we ran off the end.. give up. */
19096 extract_constrain_insn_cached (insn);
19097 arm_ccfsm_state = 0;
19098 arm_target_insn = NULL;
19101 arm_target_insn = this_insn;
19104 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
19107 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
19109 if (reverse || then_not_else)
19110 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
19113 /* Restore recog_data (getting the attributes of other insns can
19114 destroy this array, but final.c assumes that it remains intact
19115 across this call. */
19116 extract_constrain_insn_cached (insn);
19120 /* Output IT instructions. */
19122 thumb2_asm_output_opcode (FILE * stream)
19127 if (arm_condexec_mask)
19129 for (n = 0; n < arm_condexec_masklen; n++)
19130 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
19132 asm_fprintf(stream, "i%s\t%s\n\t", buff,
19133 arm_condition_codes[arm_current_cc]);
19134 arm_condexec_mask = 0;
19138 /* Returns true if REGNO is a valid register
19139 for holding a quantity of type MODE. */
19141 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
19143 if (GET_MODE_CLASS (mode) == MODE_CC)
19144 return (regno == CC_REGNUM
19145 || (TARGET_HARD_FLOAT && TARGET_VFP
19146 && regno == VFPCC_REGNUM));
19149 /* For the Thumb we only allow values bigger than SImode in
19150 registers 0 - 6, so that there is always a second low
19151 register available to hold the upper part of the value.
19152 We probably we ought to ensure that the register is the
19153 start of an even numbered register pair. */
19154 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
19156 if (TARGET_HARD_FLOAT && TARGET_VFP
19157 && IS_VFP_REGNUM (regno))
19159 if (mode == SFmode || mode == SImode)
19160 return VFP_REGNO_OK_FOR_SINGLE (regno);
19162 if (mode == DFmode)
19163 return VFP_REGNO_OK_FOR_DOUBLE (regno);
19165 /* VFP registers can hold HFmode values, but there is no point in
19166 putting them there unless we have hardware conversion insns. */
19167 if (mode == HFmode)
19168 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
19171 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
19172 || (VALID_NEON_QREG_MODE (mode)
19173 && NEON_REGNO_OK_FOR_QUAD (regno))
19174 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
19175 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
19176 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
19177 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
19178 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
19183 if (TARGET_REALLY_IWMMXT)
19185 if (IS_IWMMXT_GR_REGNUM (regno))
19186 return mode == SImode;
19188 if (IS_IWMMXT_REGNUM (regno))
19189 return VALID_IWMMXT_REG_MODE (mode);
19192 /* We allow almost any value to be stored in the general registers.
19193 Restrict doubleword quantities to even register pairs so that we can
19194 use ldrd. Do not allow very large Neon structure opaque modes in
19195 general registers; they would use too many. */
19196 if (regno <= LAST_ARM_REGNUM)
19197 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
19198 && ARM_NUM_REGS (mode) <= 4;
19200 if (regno == FRAME_POINTER_REGNUM
19201 || regno == ARG_POINTER_REGNUM)
19202 /* We only allow integers in the fake hard registers. */
19203 return GET_MODE_CLASS (mode) == MODE_INT;
19208 /* Implement MODES_TIEABLE_P. */
19211 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19213 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
19216 /* We specifically want to allow elements of "structure" modes to
19217 be tieable to the structure. This more general condition allows
19218 other rarer situations too. */
19220 && (VALID_NEON_DREG_MODE (mode1)
19221 || VALID_NEON_QREG_MODE (mode1)
19222 || VALID_NEON_STRUCT_MODE (mode1))
19223 && (VALID_NEON_DREG_MODE (mode2)
19224 || VALID_NEON_QREG_MODE (mode2)
19225 || VALID_NEON_STRUCT_MODE (mode2)))
19231 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
19232 not used in arm mode. */
19235 arm_regno_class (int regno)
19239 if (regno == STACK_POINTER_REGNUM)
19241 if (regno == CC_REGNUM)
19248 if (TARGET_THUMB2 && regno < 8)
19251 if ( regno <= LAST_ARM_REGNUM
19252 || regno == FRAME_POINTER_REGNUM
19253 || regno == ARG_POINTER_REGNUM)
19254 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
19256 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
19257 return TARGET_THUMB2 ? CC_REG : NO_REGS;
19259 if (IS_VFP_REGNUM (regno))
19261 if (regno <= D7_VFP_REGNUM)
19262 return VFP_D0_D7_REGS;
19263 else if (regno <= LAST_LO_VFP_REGNUM)
19264 return VFP_LO_REGS;
19266 return VFP_HI_REGS;
19269 if (IS_IWMMXT_REGNUM (regno))
19270 return IWMMXT_REGS;
19272 if (IS_IWMMXT_GR_REGNUM (regno))
19273 return IWMMXT_GR_REGS;
19278 /* Handle a special case when computing the offset
19279 of an argument from the frame pointer. */
19281 arm_debugger_arg_offset (int value, rtx addr)
19285 /* We are only interested if dbxout_parms() failed to compute the offset. */
19289 /* We can only cope with the case where the address is held in a register. */
19293 /* If we are using the frame pointer to point at the argument, then
19294 an offset of 0 is correct. */
19295 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
19298 /* If we are using the stack pointer to point at the
19299 argument, then an offset of 0 is correct. */
19300 /* ??? Check this is consistent with thumb2 frame layout. */
19301 if ((TARGET_THUMB || !frame_pointer_needed)
19302 && REGNO (addr) == SP_REGNUM)
19305 /* Oh dear. The argument is pointed to by a register rather
19306 than being held in a register, or being stored at a known
19307 offset from the frame pointer. Since GDB only understands
19308 those two kinds of argument we must translate the address
19309 held in the register into an offset from the frame pointer.
19310 We do this by searching through the insns for the function
19311 looking to see where this register gets its value. If the
19312 register is initialized from the frame pointer plus an offset
19313 then we are in luck and we can continue, otherwise we give up.
19315 This code is exercised by producing debugging information
19316 for a function with arguments like this:
19318 double func (double a, double b, int c, double d) {return d;}
19320 Without this code the stab for parameter 'd' will be set to
19321 an offset of 0 from the frame pointer, rather than 8. */
19323 /* The if() statement says:
19325 If the insn is a normal instruction
19326 and if the insn is setting the value in a register
19327 and if the register being set is the register holding the address of the argument
19328 and if the address is computing by an addition
19329 that involves adding to a register
19330 which is the frame pointer
19335 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19337 if ( NONJUMP_INSN_P (insn)
19338 && GET_CODE (PATTERN (insn)) == SET
19339 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
19340 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
19341 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
19342 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
19343 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
19346 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
19355 warning (0, "unable to compute real location of stacked parameter");
19356 value = 8; /* XXX magic hack */
19376 T_MAX /* Size of enum. Keep last. */
19377 } neon_builtin_type_mode;
19379 #define TYPE_MODE_BIT(X) (1 << (X))
19381 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
19382 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
19383 | TYPE_MODE_BIT (T_DI))
19384 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
19385 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
19386 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
19388 #define v8qi_UP T_V8QI
19389 #define v4hi_UP T_V4HI
19390 #define v2si_UP T_V2SI
19391 #define v2sf_UP T_V2SF
19393 #define v16qi_UP T_V16QI
19394 #define v8hi_UP T_V8HI
19395 #define v4si_UP T_V4SI
19396 #define v4sf_UP T_V4SF
19397 #define v2di_UP T_V2DI
19402 #define UP(X) X##_UP
19436 NEON_LOADSTRUCTLANE,
19438 NEON_STORESTRUCTLANE,
19447 const neon_itype itype;
19448 const neon_builtin_type_mode mode;
19449 const enum insn_code code;
19450 unsigned int fcode;
19451 } neon_builtin_datum;
19453 #define CF(N,X) CODE_FOR_neon_##N##X
19455 #define VAR1(T, N, A) \
19456 {#N, NEON_##T, UP (A), CF (N, A), 0}
19457 #define VAR2(T, N, A, B) \
19459 {#N, NEON_##T, UP (B), CF (N, B), 0}
19460 #define VAR3(T, N, A, B, C) \
19461 VAR2 (T, N, A, B), \
19462 {#N, NEON_##T, UP (C), CF (N, C), 0}
19463 #define VAR4(T, N, A, B, C, D) \
19464 VAR3 (T, N, A, B, C), \
19465 {#N, NEON_##T, UP (D), CF (N, D), 0}
19466 #define VAR5(T, N, A, B, C, D, E) \
19467 VAR4 (T, N, A, B, C, D), \
19468 {#N, NEON_##T, UP (E), CF (N, E), 0}
19469 #define VAR6(T, N, A, B, C, D, E, F) \
19470 VAR5 (T, N, A, B, C, D, E), \
19471 {#N, NEON_##T, UP (F), CF (N, F), 0}
19472 #define VAR7(T, N, A, B, C, D, E, F, G) \
19473 VAR6 (T, N, A, B, C, D, E, F), \
19474 {#N, NEON_##T, UP (G), CF (N, G), 0}
19475 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19476 VAR7 (T, N, A, B, C, D, E, F, G), \
19477 {#N, NEON_##T, UP (H), CF (N, H), 0}
19478 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19479 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19480 {#N, NEON_##T, UP (I), CF (N, I), 0}
19481 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19482 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19483 {#N, NEON_##T, UP (J), CF (N, J), 0}
19485 /* The mode entries in the following table correspond to the "key" type of the
19486 instruction variant, i.e. equivalent to that which would be specified after
19487 the assembler mnemonic, which usually refers to the last vector operand.
19488 (Signed/unsigned/polynomial types are not differentiated between though, and
19489 are all mapped onto the same mode for a given element size.) The modes
19490 listed per instruction should be the same as those defined for that
19491 instruction's pattern in neon.md. */
19493 static neon_builtin_datum neon_builtin_data[] =
19495 VAR10 (BINOP, vadd,
19496 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19497 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19498 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19499 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19500 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19501 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19502 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19503 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19504 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19505 VAR2 (TERNOP, vfma, v2sf, v4sf),
19506 VAR2 (TERNOP, vfms, v2sf, v4sf),
19507 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19508 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19509 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19510 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19511 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19512 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19513 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19514 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19515 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19516 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19517 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19518 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19519 VAR2 (BINOP, vqdmull, v4hi, v2si),
19520 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19521 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19522 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19523 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19524 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19525 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19526 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19527 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19528 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19529 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19530 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19531 VAR10 (BINOP, vsub,
19532 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19533 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19534 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19535 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19536 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19537 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19538 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19539 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19540 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19541 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19542 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19543 VAR2 (BINOP, vcage, v2sf, v4sf),
19544 VAR2 (BINOP, vcagt, v2sf, v4sf),
19545 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19546 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19547 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19548 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19549 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19550 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19551 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19552 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19553 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19554 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19555 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19556 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19557 VAR2 (BINOP, vrecps, v2sf, v4sf),
19558 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19559 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19560 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19561 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19562 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19563 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19564 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19565 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19566 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19567 VAR2 (UNOP, vcnt, v8qi, v16qi),
19568 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19569 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19570 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19571 /* FIXME: vget_lane supports more variants than this! */
19572 VAR10 (GETLANE, vget_lane,
19573 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19574 VAR10 (SETLANE, vset_lane,
19575 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19576 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19577 VAR10 (DUP, vdup_n,
19578 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19579 VAR10 (DUPLANE, vdup_lane,
19580 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19581 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19582 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19583 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19584 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19585 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19586 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19587 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19588 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19589 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19590 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19591 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19592 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19593 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19594 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19595 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19596 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19597 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19598 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19599 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19600 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19601 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19602 VAR10 (BINOP, vext,
19603 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19604 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19605 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19606 VAR2 (UNOP, vrev16, v8qi, v16qi),
19607 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19608 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19609 VAR10 (SELECT, vbsl,
19610 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19611 VAR2 (RINT, vrintn, v2sf, v4sf),
19612 VAR2 (RINT, vrinta, v2sf, v4sf),
19613 VAR2 (RINT, vrintp, v2sf, v4sf),
19614 VAR2 (RINT, vrintm, v2sf, v4sf),
19615 VAR2 (RINT, vrintz, v2sf, v4sf),
19616 VAR2 (RINT, vrintx, v2sf, v4sf),
19617 VAR1 (VTBL, vtbl1, v8qi),
19618 VAR1 (VTBL, vtbl2, v8qi),
19619 VAR1 (VTBL, vtbl3, v8qi),
19620 VAR1 (VTBL, vtbl4, v8qi),
19621 VAR1 (VTBX, vtbx1, v8qi),
19622 VAR1 (VTBX, vtbx2, v8qi),
19623 VAR1 (VTBX, vtbx3, v8qi),
19624 VAR1 (VTBX, vtbx4, v8qi),
19625 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19626 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19627 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19628 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19629 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19630 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19631 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19632 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19633 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19634 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19635 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19636 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19637 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19638 VAR10 (LOAD1, vld1,
19639 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19640 VAR10 (LOAD1LANE, vld1_lane,
19641 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19642 VAR10 (LOAD1, vld1_dup,
19643 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19644 VAR10 (STORE1, vst1,
19645 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19646 VAR10 (STORE1LANE, vst1_lane,
19647 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19649 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19650 VAR7 (LOADSTRUCTLANE, vld2_lane,
19651 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19652 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19653 VAR9 (STORESTRUCT, vst2,
19654 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19655 VAR7 (STORESTRUCTLANE, vst2_lane,
19656 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19658 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19659 VAR7 (LOADSTRUCTLANE, vld3_lane,
19660 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19661 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19662 VAR9 (STORESTRUCT, vst3,
19663 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19664 VAR7 (STORESTRUCTLANE, vst3_lane,
19665 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19666 VAR9 (LOADSTRUCT, vld4,
19667 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19668 VAR7 (LOADSTRUCTLANE, vld4_lane,
19669 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19670 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19671 VAR9 (STORESTRUCT, vst4,
19672 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19673 VAR7 (STORESTRUCTLANE, vst4_lane,
19674 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19675 VAR10 (LOGICBINOP, vand,
19676 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19677 VAR10 (LOGICBINOP, vorr,
19678 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19679 VAR10 (BINOP, veor,
19680 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19681 VAR10 (LOGICBINOP, vbic,
19682 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19683 VAR10 (LOGICBINOP, vorn,
19684 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19699 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19700 symbolic names defined here (which would require too much duplication).
19704 ARM_BUILTIN_GETWCGR0,
19705 ARM_BUILTIN_GETWCGR1,
19706 ARM_BUILTIN_GETWCGR2,
19707 ARM_BUILTIN_GETWCGR3,
19709 ARM_BUILTIN_SETWCGR0,
19710 ARM_BUILTIN_SETWCGR1,
19711 ARM_BUILTIN_SETWCGR2,
19712 ARM_BUILTIN_SETWCGR3,
19716 ARM_BUILTIN_WAVG2BR,
19717 ARM_BUILTIN_WAVG2HR,
19718 ARM_BUILTIN_WAVG2B,
19719 ARM_BUILTIN_WAVG2H,
19726 ARM_BUILTIN_WMACSZ,
19728 ARM_BUILTIN_WMACUZ,
19731 ARM_BUILTIN_WSADBZ,
19733 ARM_BUILTIN_WSADHZ,
19735 ARM_BUILTIN_WALIGNI,
19736 ARM_BUILTIN_WALIGNR0,
19737 ARM_BUILTIN_WALIGNR1,
19738 ARM_BUILTIN_WALIGNR2,
19739 ARM_BUILTIN_WALIGNR3,
19742 ARM_BUILTIN_TMIAPH,
19743 ARM_BUILTIN_TMIABB,
19744 ARM_BUILTIN_TMIABT,
19745 ARM_BUILTIN_TMIATB,
19746 ARM_BUILTIN_TMIATT,
19748 ARM_BUILTIN_TMOVMSKB,
19749 ARM_BUILTIN_TMOVMSKH,
19750 ARM_BUILTIN_TMOVMSKW,
19752 ARM_BUILTIN_TBCSTB,
19753 ARM_BUILTIN_TBCSTH,
19754 ARM_BUILTIN_TBCSTW,
19756 ARM_BUILTIN_WMADDS,
19757 ARM_BUILTIN_WMADDU,
19759 ARM_BUILTIN_WPACKHSS,
19760 ARM_BUILTIN_WPACKWSS,
19761 ARM_BUILTIN_WPACKDSS,
19762 ARM_BUILTIN_WPACKHUS,
19763 ARM_BUILTIN_WPACKWUS,
19764 ARM_BUILTIN_WPACKDUS,
19769 ARM_BUILTIN_WADDSSB,
19770 ARM_BUILTIN_WADDSSH,
19771 ARM_BUILTIN_WADDSSW,
19772 ARM_BUILTIN_WADDUSB,
19773 ARM_BUILTIN_WADDUSH,
19774 ARM_BUILTIN_WADDUSW,
19778 ARM_BUILTIN_WSUBSSB,
19779 ARM_BUILTIN_WSUBSSH,
19780 ARM_BUILTIN_WSUBSSW,
19781 ARM_BUILTIN_WSUBUSB,
19782 ARM_BUILTIN_WSUBUSH,
19783 ARM_BUILTIN_WSUBUSW,
19790 ARM_BUILTIN_WCMPEQB,
19791 ARM_BUILTIN_WCMPEQH,
19792 ARM_BUILTIN_WCMPEQW,
19793 ARM_BUILTIN_WCMPGTUB,
19794 ARM_BUILTIN_WCMPGTUH,
19795 ARM_BUILTIN_WCMPGTUW,
19796 ARM_BUILTIN_WCMPGTSB,
19797 ARM_BUILTIN_WCMPGTSH,
19798 ARM_BUILTIN_WCMPGTSW,
19800 ARM_BUILTIN_TEXTRMSB,
19801 ARM_BUILTIN_TEXTRMSH,
19802 ARM_BUILTIN_TEXTRMSW,
19803 ARM_BUILTIN_TEXTRMUB,
19804 ARM_BUILTIN_TEXTRMUH,
19805 ARM_BUILTIN_TEXTRMUW,
19806 ARM_BUILTIN_TINSRB,
19807 ARM_BUILTIN_TINSRH,
19808 ARM_BUILTIN_TINSRW,
19810 ARM_BUILTIN_WMAXSW,
19811 ARM_BUILTIN_WMAXSH,
19812 ARM_BUILTIN_WMAXSB,
19813 ARM_BUILTIN_WMAXUW,
19814 ARM_BUILTIN_WMAXUH,
19815 ARM_BUILTIN_WMAXUB,
19816 ARM_BUILTIN_WMINSW,
19817 ARM_BUILTIN_WMINSH,
19818 ARM_BUILTIN_WMINSB,
19819 ARM_BUILTIN_WMINUW,
19820 ARM_BUILTIN_WMINUH,
19821 ARM_BUILTIN_WMINUB,
19823 ARM_BUILTIN_WMULUM,
19824 ARM_BUILTIN_WMULSM,
19825 ARM_BUILTIN_WMULUL,
19827 ARM_BUILTIN_PSADBH,
19828 ARM_BUILTIN_WSHUFH,
19842 ARM_BUILTIN_WSLLHI,
19843 ARM_BUILTIN_WSLLWI,
19844 ARM_BUILTIN_WSLLDI,
19845 ARM_BUILTIN_WSRAHI,
19846 ARM_BUILTIN_WSRAWI,
19847 ARM_BUILTIN_WSRADI,
19848 ARM_BUILTIN_WSRLHI,
19849 ARM_BUILTIN_WSRLWI,
19850 ARM_BUILTIN_WSRLDI,
19851 ARM_BUILTIN_WRORHI,
19852 ARM_BUILTIN_WRORWI,
19853 ARM_BUILTIN_WRORDI,
19855 ARM_BUILTIN_WUNPCKIHB,
19856 ARM_BUILTIN_WUNPCKIHH,
19857 ARM_BUILTIN_WUNPCKIHW,
19858 ARM_BUILTIN_WUNPCKILB,
19859 ARM_BUILTIN_WUNPCKILH,
19860 ARM_BUILTIN_WUNPCKILW,
19862 ARM_BUILTIN_WUNPCKEHSB,
19863 ARM_BUILTIN_WUNPCKEHSH,
19864 ARM_BUILTIN_WUNPCKEHSW,
19865 ARM_BUILTIN_WUNPCKEHUB,
19866 ARM_BUILTIN_WUNPCKEHUH,
19867 ARM_BUILTIN_WUNPCKEHUW,
19868 ARM_BUILTIN_WUNPCKELSB,
19869 ARM_BUILTIN_WUNPCKELSH,
19870 ARM_BUILTIN_WUNPCKELSW,
19871 ARM_BUILTIN_WUNPCKELUB,
19872 ARM_BUILTIN_WUNPCKELUH,
19873 ARM_BUILTIN_WUNPCKELUW,
19879 ARM_BUILTIN_WADDSUBHX,
19880 ARM_BUILTIN_WSUBADDHX,
19882 ARM_BUILTIN_WABSDIFFB,
19883 ARM_BUILTIN_WABSDIFFH,
19884 ARM_BUILTIN_WABSDIFFW,
19886 ARM_BUILTIN_WADDCH,
19887 ARM_BUILTIN_WADDCW,
19890 ARM_BUILTIN_WAVG4R,
19892 ARM_BUILTIN_WMADDSX,
19893 ARM_BUILTIN_WMADDUX,
19895 ARM_BUILTIN_WMADDSN,
19896 ARM_BUILTIN_WMADDUN,
19898 ARM_BUILTIN_WMULWSM,
19899 ARM_BUILTIN_WMULWUM,
19901 ARM_BUILTIN_WMULWSMR,
19902 ARM_BUILTIN_WMULWUMR,
19904 ARM_BUILTIN_WMULWL,
19906 ARM_BUILTIN_WMULSMR,
19907 ARM_BUILTIN_WMULUMR,
19909 ARM_BUILTIN_WQMULM,
19910 ARM_BUILTIN_WQMULMR,
19912 ARM_BUILTIN_WQMULWM,
19913 ARM_BUILTIN_WQMULWMR,
19915 ARM_BUILTIN_WADDBHUSM,
19916 ARM_BUILTIN_WADDBHUSL,
19918 ARM_BUILTIN_WQMIABB,
19919 ARM_BUILTIN_WQMIABT,
19920 ARM_BUILTIN_WQMIATB,
19921 ARM_BUILTIN_WQMIATT,
19923 ARM_BUILTIN_WQMIABBN,
19924 ARM_BUILTIN_WQMIABTN,
19925 ARM_BUILTIN_WQMIATBN,
19926 ARM_BUILTIN_WQMIATTN,
19928 ARM_BUILTIN_WMIABB,
19929 ARM_BUILTIN_WMIABT,
19930 ARM_BUILTIN_WMIATB,
19931 ARM_BUILTIN_WMIATT,
19933 ARM_BUILTIN_WMIABBN,
19934 ARM_BUILTIN_WMIABTN,
19935 ARM_BUILTIN_WMIATBN,
19936 ARM_BUILTIN_WMIATTN,
19938 ARM_BUILTIN_WMIAWBB,
19939 ARM_BUILTIN_WMIAWBT,
19940 ARM_BUILTIN_WMIAWTB,
19941 ARM_BUILTIN_WMIAWTT,
19943 ARM_BUILTIN_WMIAWBBN,
19944 ARM_BUILTIN_WMIAWBTN,
19945 ARM_BUILTIN_WMIAWTBN,
19946 ARM_BUILTIN_WMIAWTTN,
19948 ARM_BUILTIN_WMERGE,
19950 ARM_BUILTIN_NEON_BASE,
19952 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19955 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19958 arm_init_neon_builtins (void)
19960 unsigned int i, fcode;
19963 tree neon_intQI_type_node;
19964 tree neon_intHI_type_node;
19965 tree neon_polyQI_type_node;
19966 tree neon_polyHI_type_node;
19967 tree neon_intSI_type_node;
19968 tree neon_intDI_type_node;
19969 tree neon_float_type_node;
19971 tree intQI_pointer_node;
19972 tree intHI_pointer_node;
19973 tree intSI_pointer_node;
19974 tree intDI_pointer_node;
19975 tree float_pointer_node;
19977 tree const_intQI_node;
19978 tree const_intHI_node;
19979 tree const_intSI_node;
19980 tree const_intDI_node;
19981 tree const_float_node;
19983 tree const_intQI_pointer_node;
19984 tree const_intHI_pointer_node;
19985 tree const_intSI_pointer_node;
19986 tree const_intDI_pointer_node;
19987 tree const_float_pointer_node;
19989 tree V8QI_type_node;
19990 tree V4HI_type_node;
19991 tree V2SI_type_node;
19992 tree V2SF_type_node;
19993 tree V16QI_type_node;
19994 tree V8HI_type_node;
19995 tree V4SI_type_node;
19996 tree V4SF_type_node;
19997 tree V2DI_type_node;
19999 tree intUQI_type_node;
20000 tree intUHI_type_node;
20001 tree intUSI_type_node;
20002 tree intUDI_type_node;
20004 tree intEI_type_node;
20005 tree intOI_type_node;
20006 tree intCI_type_node;
20007 tree intXI_type_node;
20009 tree V8QI_pointer_node;
20010 tree V4HI_pointer_node;
20011 tree V2SI_pointer_node;
20012 tree V2SF_pointer_node;
20013 tree V16QI_pointer_node;
20014 tree V8HI_pointer_node;
20015 tree V4SI_pointer_node;
20016 tree V4SF_pointer_node;
20017 tree V2DI_pointer_node;
20019 tree void_ftype_pv8qi_v8qi_v8qi;
20020 tree void_ftype_pv4hi_v4hi_v4hi;
20021 tree void_ftype_pv2si_v2si_v2si;
20022 tree void_ftype_pv2sf_v2sf_v2sf;
20023 tree void_ftype_pdi_di_di;
20024 tree void_ftype_pv16qi_v16qi_v16qi;
20025 tree void_ftype_pv8hi_v8hi_v8hi;
20026 tree void_ftype_pv4si_v4si_v4si;
20027 tree void_ftype_pv4sf_v4sf_v4sf;
20028 tree void_ftype_pv2di_v2di_v2di;
20030 tree reinterp_ftype_dreg[5][5];
20031 tree reinterp_ftype_qreg[5][5];
20032 tree dreg_types[5], qreg_types[5];
20034 /* Create distinguished type nodes for NEON vector element types,
20035 and pointers to values of such types, so we can detect them later. */
20036 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20037 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20038 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20039 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20040 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20041 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20042 neon_float_type_node = make_node (REAL_TYPE);
20043 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20044 layout_type (neon_float_type_node);
20046 /* Define typedefs which exactly correspond to the modes we are basing vector
20047 types on. If you change these names you'll need to change
20048 the table used by arm_mangle_type too. */
20049 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20050 "__builtin_neon_qi");
20051 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20052 "__builtin_neon_hi");
20053 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20054 "__builtin_neon_si");
20055 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20056 "__builtin_neon_sf");
20057 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20058 "__builtin_neon_di");
20059 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20060 "__builtin_neon_poly8");
20061 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20062 "__builtin_neon_poly16");
20064 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20065 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20066 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20067 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20068 float_pointer_node = build_pointer_type (neon_float_type_node);
20070 /* Next create constant-qualified versions of the above types. */
20071 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20073 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20075 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20077 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20079 const_float_node = build_qualified_type (neon_float_type_node,
20082 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20083 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20084 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20085 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20086 const_float_pointer_node = build_pointer_type (const_float_node);
20088 /* Now create vector types based on our NEON element types. */
20089 /* 64-bit vectors. */
20091 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20093 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20095 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20097 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20098 /* 128-bit vectors. */
20100 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20102 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20104 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20106 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20108 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20110 /* Unsigned integer types for various mode sizes. */
20111 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20112 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20113 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20114 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20116 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20117 "__builtin_neon_uqi");
20118 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20119 "__builtin_neon_uhi");
20120 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20121 "__builtin_neon_usi");
20122 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20123 "__builtin_neon_udi");
20125 /* Opaque integer types for structures of vectors. */
20126 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20127 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20128 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20129 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20131 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20132 "__builtin_neon_ti");
20133 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20134 "__builtin_neon_ei");
20135 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20136 "__builtin_neon_oi");
20137 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20138 "__builtin_neon_ci");
20139 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20140 "__builtin_neon_xi");
20142 /* Pointers to vector types. */
20143 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20144 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20145 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20146 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20147 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20148 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20149 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20150 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20151 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20153 /* Operations which return results as pairs. */
20154 void_ftype_pv8qi_v8qi_v8qi =
20155 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20156 V8QI_type_node, NULL);
20157 void_ftype_pv4hi_v4hi_v4hi =
20158 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20159 V4HI_type_node, NULL);
20160 void_ftype_pv2si_v2si_v2si =
20161 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20162 V2SI_type_node, NULL);
20163 void_ftype_pv2sf_v2sf_v2sf =
20164 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20165 V2SF_type_node, NULL);
20166 void_ftype_pdi_di_di =
20167 build_function_type_list (void_type_node, intDI_pointer_node,
20168 neon_intDI_type_node, neon_intDI_type_node, NULL);
20169 void_ftype_pv16qi_v16qi_v16qi =
20170 build_function_type_list (void_type_node, V16QI_pointer_node,
20171 V16QI_type_node, V16QI_type_node, NULL);
20172 void_ftype_pv8hi_v8hi_v8hi =
20173 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20174 V8HI_type_node, NULL);
20175 void_ftype_pv4si_v4si_v4si =
20176 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20177 V4SI_type_node, NULL);
20178 void_ftype_pv4sf_v4sf_v4sf =
20179 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20180 V4SF_type_node, NULL);
20181 void_ftype_pv2di_v2di_v2di =
20182 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20183 V2DI_type_node, NULL);
20185 dreg_types[0] = V8QI_type_node;
20186 dreg_types[1] = V4HI_type_node;
20187 dreg_types[2] = V2SI_type_node;
20188 dreg_types[3] = V2SF_type_node;
20189 dreg_types[4] = neon_intDI_type_node;
20191 qreg_types[0] = V16QI_type_node;
20192 qreg_types[1] = V8HI_type_node;
20193 qreg_types[2] = V4SI_type_node;
20194 qreg_types[3] = V4SF_type_node;
20195 qreg_types[4] = V2DI_type_node;
20197 for (i = 0; i < 5; i++)
20200 for (j = 0; j < 5; j++)
20202 reinterp_ftype_dreg[i][j]
20203 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
20204 reinterp_ftype_qreg[i][j]
20205 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
20209 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
20210 i < ARRAY_SIZE (neon_builtin_data);
20213 neon_builtin_datum *d = &neon_builtin_data[i];
20215 const char* const modenames[] = {
20216 "v8qi", "v4hi", "v2si", "v2sf", "di",
20217 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
20222 int is_load = 0, is_store = 0;
20224 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
20231 case NEON_LOAD1LANE:
20232 case NEON_LOADSTRUCT:
20233 case NEON_LOADSTRUCTLANE:
20235 /* Fall through. */
20237 case NEON_STORE1LANE:
20238 case NEON_STORESTRUCT:
20239 case NEON_STORESTRUCTLANE:
20242 /* Fall through. */
20246 case NEON_LOGICBINOP:
20247 case NEON_SHIFTINSERT:
20254 case NEON_SHIFTIMM:
20255 case NEON_SHIFTACC:
20261 case NEON_LANEMULL:
20262 case NEON_LANEMULH:
20264 case NEON_SCALARMUL:
20265 case NEON_SCALARMULL:
20266 case NEON_SCALARMULH:
20267 case NEON_SCALARMAC:
20273 tree return_type = void_type_node, args = void_list_node;
20275 /* Build a function type directly from the insn_data for
20276 this builtin. The build_function_type() function takes
20277 care of removing duplicates for us. */
20278 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
20282 if (is_load && k == 1)
20284 /* Neon load patterns always have the memory
20285 operand in the operand 1 position. */
20286 gcc_assert (insn_data[d->code].operand[k].predicate
20287 == neon_struct_operand);
20293 eltype = const_intQI_pointer_node;
20298 eltype = const_intHI_pointer_node;
20303 eltype = const_intSI_pointer_node;
20308 eltype = const_float_pointer_node;
20313 eltype = const_intDI_pointer_node;
20316 default: gcc_unreachable ();
20319 else if (is_store && k == 0)
20321 /* Similarly, Neon store patterns use operand 0 as
20322 the memory location to store to. */
20323 gcc_assert (insn_data[d->code].operand[k].predicate
20324 == neon_struct_operand);
20330 eltype = intQI_pointer_node;
20335 eltype = intHI_pointer_node;
20340 eltype = intSI_pointer_node;
20345 eltype = float_pointer_node;
20350 eltype = intDI_pointer_node;
20353 default: gcc_unreachable ();
20358 switch (insn_data[d->code].operand[k].mode)
20360 case VOIDmode: eltype = void_type_node; break;
20362 case QImode: eltype = neon_intQI_type_node; break;
20363 case HImode: eltype = neon_intHI_type_node; break;
20364 case SImode: eltype = neon_intSI_type_node; break;
20365 case SFmode: eltype = neon_float_type_node; break;
20366 case DImode: eltype = neon_intDI_type_node; break;
20367 case TImode: eltype = intTI_type_node; break;
20368 case EImode: eltype = intEI_type_node; break;
20369 case OImode: eltype = intOI_type_node; break;
20370 case CImode: eltype = intCI_type_node; break;
20371 case XImode: eltype = intXI_type_node; break;
20372 /* 64-bit vectors. */
20373 case V8QImode: eltype = V8QI_type_node; break;
20374 case V4HImode: eltype = V4HI_type_node; break;
20375 case V2SImode: eltype = V2SI_type_node; break;
20376 case V2SFmode: eltype = V2SF_type_node; break;
20377 /* 128-bit vectors. */
20378 case V16QImode: eltype = V16QI_type_node; break;
20379 case V8HImode: eltype = V8HI_type_node; break;
20380 case V4SImode: eltype = V4SI_type_node; break;
20381 case V4SFmode: eltype = V4SF_type_node; break;
20382 case V2DImode: eltype = V2DI_type_node; break;
20383 default: gcc_unreachable ();
20387 if (k == 0 && !is_store)
20388 return_type = eltype;
20390 args = tree_cons (NULL_TREE, eltype, args);
20393 ftype = build_function_type (return_type, args);
20397 case NEON_RESULTPAIR:
20399 switch (insn_data[d->code].operand[1].mode)
20401 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
20402 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
20403 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
20404 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
20405 case DImode: ftype = void_ftype_pdi_di_di; break;
20406 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
20407 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
20408 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
20409 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
20410 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
20411 default: gcc_unreachable ();
20416 case NEON_REINTERP:
20418 /* We iterate over 5 doubleword types, then 5 quadword
20420 int rhs = d->mode % 5;
20421 switch (insn_data[d->code].operand[0].mode)
20423 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
20424 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
20425 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
20426 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
20427 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
20428 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
20429 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
20430 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
20431 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
20432 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
20433 default: gcc_unreachable ();
20439 gcc_unreachable ();
20442 gcc_assert (ftype != NULL);
20444 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
20446 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
20448 arm_builtin_decls[fcode] = decl;
20452 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
20455 if ((MASK) & insn_flags) \
20458 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
20459 BUILT_IN_MD, NULL, NULL_TREE); \
20460 arm_builtin_decls[CODE] = bdecl; \
20465 struct builtin_description
20467 const unsigned int mask;
20468 const enum insn_code icode;
20469 const char * const name;
20470 const enum arm_builtins code;
20471 const enum rtx_code comparison;
20472 const unsigned int flag;
20475 static const struct builtin_description bdesc_2arg[] =
20477 #define IWMMXT_BUILTIN(code, string, builtin) \
20478 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20479 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20481 #define IWMMXT2_BUILTIN(code, string, builtin) \
20482 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
20483 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20485 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
20486 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
20487 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
20488 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
20489 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
20490 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
20491 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
20492 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
20493 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
20494 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
20495 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
20496 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
20497 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
20498 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
20499 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
20500 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
20501 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
20502 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
20503 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
20504 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
20505 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
20506 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
20507 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
20508 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
20509 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
20510 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
20511 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
20512 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
20513 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
20514 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
20515 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
20516 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
20517 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
20518 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
20519 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
20520 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20521 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20522 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20523 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20524 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20525 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20526 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20527 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20528 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20529 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20530 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20531 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20532 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20533 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20534 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20535 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20536 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20537 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20538 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20539 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20540 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20541 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
20542 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
20543 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
20544 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
20545 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
20546 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
20547 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
20548 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
20549 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
20550 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
20551 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
20552 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
20553 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
20554 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
20555 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
20556 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
20557 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
20558 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
20559 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
20560 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
20561 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
20562 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
20564 #define IWMMXT_BUILTIN2(code, builtin) \
20565 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20567 #define IWMMXT2_BUILTIN2(code, builtin) \
20568 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20570 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
20571 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
20572 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20573 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20574 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20575 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20576 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20577 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20578 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20579 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20582 static const struct builtin_description bdesc_1arg[] =
20584 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20585 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20586 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20587 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20588 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20589 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20590 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20591 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20592 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20593 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20594 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20595 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20596 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20597 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20598 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20599 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20600 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20601 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20602 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
20603 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
20604 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
20605 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
20606 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
20607 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
20610 /* Set up all the iWMMXt builtins. This is not called if
20611 TARGET_IWMMXT is zero. */
20614 arm_init_iwmmxt_builtins (void)
20616 const struct builtin_description * d;
20619 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20620 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20621 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20623 tree v8qi_ftype_v8qi_v8qi_int
20624 = build_function_type_list (V8QI_type_node,
20625 V8QI_type_node, V8QI_type_node,
20626 integer_type_node, NULL_TREE);
20627 tree v4hi_ftype_v4hi_int
20628 = build_function_type_list (V4HI_type_node,
20629 V4HI_type_node, integer_type_node, NULL_TREE);
20630 tree v2si_ftype_v2si_int
20631 = build_function_type_list (V2SI_type_node,
20632 V2SI_type_node, integer_type_node, NULL_TREE);
20633 tree v2si_ftype_di_di
20634 = build_function_type_list (V2SI_type_node,
20635 long_long_integer_type_node,
20636 long_long_integer_type_node,
20638 tree di_ftype_di_int
20639 = build_function_type_list (long_long_integer_type_node,
20640 long_long_integer_type_node,
20641 integer_type_node, NULL_TREE);
20642 tree di_ftype_di_int_int
20643 = build_function_type_list (long_long_integer_type_node,
20644 long_long_integer_type_node,
20646 integer_type_node, NULL_TREE);
20647 tree int_ftype_v8qi
20648 = build_function_type_list (integer_type_node,
20649 V8QI_type_node, NULL_TREE);
20650 tree int_ftype_v4hi
20651 = build_function_type_list (integer_type_node,
20652 V4HI_type_node, NULL_TREE);
20653 tree int_ftype_v2si
20654 = build_function_type_list (integer_type_node,
20655 V2SI_type_node, NULL_TREE);
20656 tree int_ftype_v8qi_int
20657 = build_function_type_list (integer_type_node,
20658 V8QI_type_node, integer_type_node, NULL_TREE);
20659 tree int_ftype_v4hi_int
20660 = build_function_type_list (integer_type_node,
20661 V4HI_type_node, integer_type_node, NULL_TREE);
20662 tree int_ftype_v2si_int
20663 = build_function_type_list (integer_type_node,
20664 V2SI_type_node, integer_type_node, NULL_TREE);
20665 tree v8qi_ftype_v8qi_int_int
20666 = build_function_type_list (V8QI_type_node,
20667 V8QI_type_node, integer_type_node,
20668 integer_type_node, NULL_TREE);
20669 tree v4hi_ftype_v4hi_int_int
20670 = build_function_type_list (V4HI_type_node,
20671 V4HI_type_node, integer_type_node,
20672 integer_type_node, NULL_TREE);
20673 tree v2si_ftype_v2si_int_int
20674 = build_function_type_list (V2SI_type_node,
20675 V2SI_type_node, integer_type_node,
20676 integer_type_node, NULL_TREE);
20677 /* Miscellaneous. */
20678 tree v8qi_ftype_v4hi_v4hi
20679 = build_function_type_list (V8QI_type_node,
20680 V4HI_type_node, V4HI_type_node, NULL_TREE);
20681 tree v4hi_ftype_v2si_v2si
20682 = build_function_type_list (V4HI_type_node,
20683 V2SI_type_node, V2SI_type_node, NULL_TREE);
20684 tree v8qi_ftype_v4hi_v8qi
20685 = build_function_type_list (V8QI_type_node,
20686 V4HI_type_node, V8QI_type_node, NULL_TREE);
20687 tree v2si_ftype_v4hi_v4hi
20688 = build_function_type_list (V2SI_type_node,
20689 V4HI_type_node, V4HI_type_node, NULL_TREE);
20690 tree v2si_ftype_v8qi_v8qi
20691 = build_function_type_list (V2SI_type_node,
20692 V8QI_type_node, V8QI_type_node, NULL_TREE);
20693 tree v4hi_ftype_v4hi_di
20694 = build_function_type_list (V4HI_type_node,
20695 V4HI_type_node, long_long_integer_type_node,
20697 tree v2si_ftype_v2si_di
20698 = build_function_type_list (V2SI_type_node,
20699 V2SI_type_node, long_long_integer_type_node,
20702 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20703 tree int_ftype_void
20704 = build_function_type_list (integer_type_node, NULL_TREE);
20706 = build_function_type_list (long_long_integer_type_node,
20707 V8QI_type_node, NULL_TREE);
20709 = build_function_type_list (long_long_integer_type_node,
20710 V4HI_type_node, NULL_TREE);
20712 = build_function_type_list (long_long_integer_type_node,
20713 V2SI_type_node, NULL_TREE);
20714 tree v2si_ftype_v4hi
20715 = build_function_type_list (V2SI_type_node,
20716 V4HI_type_node, NULL_TREE);
20717 tree v4hi_ftype_v8qi
20718 = build_function_type_list (V4HI_type_node,
20719 V8QI_type_node, NULL_TREE);
20720 tree v8qi_ftype_v8qi
20721 = build_function_type_list (V8QI_type_node,
20722 V8QI_type_node, NULL_TREE);
20723 tree v4hi_ftype_v4hi
20724 = build_function_type_list (V4HI_type_node,
20725 V4HI_type_node, NULL_TREE);
20726 tree v2si_ftype_v2si
20727 = build_function_type_list (V2SI_type_node,
20728 V2SI_type_node, NULL_TREE);
20730 tree di_ftype_di_v4hi_v4hi
20731 = build_function_type_list (long_long_unsigned_type_node,
20732 long_long_unsigned_type_node,
20733 V4HI_type_node, V4HI_type_node,
20736 tree di_ftype_v4hi_v4hi
20737 = build_function_type_list (long_long_unsigned_type_node,
20738 V4HI_type_node,V4HI_type_node,
20741 tree v2si_ftype_v2si_v4hi_v4hi
20742 = build_function_type_list (V2SI_type_node,
20743 V2SI_type_node, V4HI_type_node,
20744 V4HI_type_node, NULL_TREE);
20746 tree v2si_ftype_v2si_v8qi_v8qi
20747 = build_function_type_list (V2SI_type_node,
20748 V2SI_type_node, V8QI_type_node,
20749 V8QI_type_node, NULL_TREE);
20751 tree di_ftype_di_v2si_v2si
20752 = build_function_type_list (long_long_unsigned_type_node,
20753 long_long_unsigned_type_node,
20754 V2SI_type_node, V2SI_type_node,
20757 tree di_ftype_di_di_int
20758 = build_function_type_list (long_long_unsigned_type_node,
20759 long_long_unsigned_type_node,
20760 long_long_unsigned_type_node,
20761 integer_type_node, NULL_TREE);
20763 tree void_ftype_int
20764 = build_function_type_list (void_type_node,
20765 integer_type_node, NULL_TREE);
20767 tree v8qi_ftype_char
20768 = build_function_type_list (V8QI_type_node,
20769 signed_char_type_node, NULL_TREE);
20771 tree v4hi_ftype_short
20772 = build_function_type_list (V4HI_type_node,
20773 short_integer_type_node, NULL_TREE);
20775 tree v2si_ftype_int
20776 = build_function_type_list (V2SI_type_node,
20777 integer_type_node, NULL_TREE);
20779 /* Normal vector binops. */
20780 tree v8qi_ftype_v8qi_v8qi
20781 = build_function_type_list (V8QI_type_node,
20782 V8QI_type_node, V8QI_type_node, NULL_TREE);
20783 tree v4hi_ftype_v4hi_v4hi
20784 = build_function_type_list (V4HI_type_node,
20785 V4HI_type_node,V4HI_type_node, NULL_TREE);
20786 tree v2si_ftype_v2si_v2si
20787 = build_function_type_list (V2SI_type_node,
20788 V2SI_type_node, V2SI_type_node, NULL_TREE);
20789 tree di_ftype_di_di
20790 = build_function_type_list (long_long_unsigned_type_node,
20791 long_long_unsigned_type_node,
20792 long_long_unsigned_type_node,
20795 /* Add all builtins that are more or less simple operations on two
20797 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20799 /* Use one of the operands; the target can have a different mode for
20800 mask-generating compares. */
20801 enum machine_mode mode;
20807 mode = insn_data[d->icode].operand[1].mode;
20812 type = v8qi_ftype_v8qi_v8qi;
20815 type = v4hi_ftype_v4hi_v4hi;
20818 type = v2si_ftype_v2si_v2si;
20821 type = di_ftype_di_di;
20825 gcc_unreachable ();
20828 def_mbuiltin (d->mask, d->name, type, d->code);
20831 /* Add the remaining MMX insns with somewhat more complicated types. */
20832 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20834 ARM_BUILTIN_ ## CODE)
20836 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20837 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20838 ARM_BUILTIN_ ## CODE)
20840 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20841 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20842 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20843 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20844 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20845 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20846 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20847 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20848 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20850 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20851 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20852 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20853 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20854 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20855 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20857 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20858 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20859 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20860 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20861 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20862 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20864 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20865 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20866 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20867 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20868 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20869 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20871 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20872 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20873 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20874 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20875 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20876 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20878 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20880 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20881 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20882 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20883 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20884 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20885 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20886 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20887 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20888 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20889 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20891 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20892 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20893 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20894 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20895 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20896 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20897 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20898 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20899 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20901 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20902 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20903 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20905 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20906 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20907 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20909 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20910 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20912 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20913 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20914 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20915 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20916 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20917 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20919 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20920 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20921 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20922 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20923 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20924 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20925 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20926 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20927 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20928 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20929 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20930 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20932 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20933 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20934 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20935 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20937 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20938 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20939 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20940 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20941 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20942 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20943 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20945 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20946 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20947 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20949 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20950 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20951 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20952 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20954 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20955 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20956 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20957 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20959 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20960 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20961 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20962 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20964 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20965 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20966 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20967 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20969 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20970 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20971 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20972 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20974 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20975 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20976 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20977 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20979 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20981 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20982 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20983 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20985 #undef iwmmx_mbuiltin
20986 #undef iwmmx2_mbuiltin
20990 arm_init_fp16_builtins (void)
20992 tree fp16_type = make_node (REAL_TYPE);
20993 TYPE_PRECISION (fp16_type) = 16;
20994 layout_type (fp16_type);
20995 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20999 arm_init_builtins (void)
21001 if (TARGET_REALLY_IWMMXT)
21002 arm_init_iwmmxt_builtins ();
21005 arm_init_neon_builtins ();
21007 if (arm_fp16_format)
21008 arm_init_fp16_builtins ();
21011 /* Return the ARM builtin for CODE. */
21014 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
21016 if (code >= ARM_BUILTIN_MAX)
21017 return error_mark_node;
21019 return arm_builtin_decls[code];
21022 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21024 static const char *
21025 arm_invalid_parameter_type (const_tree t)
21027 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21028 return N_("function parameters cannot have __fp16 type");
21032 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
21034 static const char *
21035 arm_invalid_return_type (const_tree t)
21037 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21038 return N_("functions cannot return __fp16 type");
21042 /* Implement TARGET_PROMOTED_TYPE. */
21045 arm_promoted_type (const_tree t)
21047 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21048 return float_type_node;
21052 /* Implement TARGET_CONVERT_TO_TYPE.
21053 Specifically, this hook implements the peculiarity of the ARM
21054 half-precision floating-point C semantics that requires conversions between
21055 __fp16 to or from double to do an intermediate conversion to float. */
21058 arm_convert_to_type (tree type, tree expr)
21060 tree fromtype = TREE_TYPE (expr);
21061 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
21063 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
21064 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
21065 return convert (type, convert (float_type_node, expr));
21069 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
21070 This simply adds HFmode as a supported mode; even though we don't
21071 implement arithmetic on this type directly, it's supported by
21072 optabs conversions, much the way the double-word arithmetic is
21073 special-cased in the default hook. */
21076 arm_scalar_mode_supported_p (enum machine_mode mode)
21078 if (mode == HFmode)
21079 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
21080 else if (ALL_FIXED_POINT_MODE_P (mode))
21083 return default_scalar_mode_supported_p (mode);
21086 /* Errors in the source file can cause expand_expr to return const0_rtx
21087 where we expect a vector. To avoid crashing, use one of the vector
21088 clear instructions. */
21091 safe_vector_operand (rtx x, enum machine_mode mode)
21093 if (x != const0_rtx)
21095 x = gen_reg_rtx (mode);
21097 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
21098 : gen_rtx_SUBREG (DImode, x, 0)));
21102 /* Subroutine of arm_expand_builtin to take care of binop insns. */
21105 arm_expand_binop_builtin (enum insn_code icode,
21106 tree exp, rtx target)
21109 tree arg0 = CALL_EXPR_ARG (exp, 0);
21110 tree arg1 = CALL_EXPR_ARG (exp, 1);
21111 rtx op0 = expand_normal (arg0);
21112 rtx op1 = expand_normal (arg1);
21113 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21114 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21115 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21117 if (VECTOR_MODE_P (mode0))
21118 op0 = safe_vector_operand (op0, mode0);
21119 if (VECTOR_MODE_P (mode1))
21120 op1 = safe_vector_operand (op1, mode1);
21123 || GET_MODE (target) != tmode
21124 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21125 target = gen_reg_rtx (tmode);
21127 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
21128 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
21130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21131 op0 = copy_to_mode_reg (mode0, op0);
21132 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21133 op1 = copy_to_mode_reg (mode1, op1);
21135 pat = GEN_FCN (icode) (target, op0, op1);
21142 /* Subroutine of arm_expand_builtin to take care of unop insns. */
21145 arm_expand_unop_builtin (enum insn_code icode,
21146 tree exp, rtx target, int do_load)
21149 tree arg0 = CALL_EXPR_ARG (exp, 0);
21150 rtx op0 = expand_normal (arg0);
21151 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21152 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21155 || GET_MODE (target) != tmode
21156 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21157 target = gen_reg_rtx (tmode);
21159 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
21162 if (VECTOR_MODE_P (mode0))
21163 op0 = safe_vector_operand (op0, mode0);
21165 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21166 op0 = copy_to_mode_reg (mode0, op0);
21169 pat = GEN_FCN (icode) (target, op0);
21177 NEON_ARG_COPY_TO_REG,
21183 #define NEON_MAX_BUILTIN_ARGS 5
21185 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
21186 and return an expression for the accessed memory.
21188 The intrinsic function operates on a block of registers that has
21189 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
21190 function references the memory at EXP of type TYPE and in mode
21191 MEM_MODE; this mode may be BLKmode if no more suitable mode is
21195 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
21196 enum machine_mode reg_mode,
21197 neon_builtin_type_mode type_mode)
21199 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
21200 tree elem_type, upper_bound, array_type;
21202 /* Work out the size of the register block in bytes. */
21203 reg_size = GET_MODE_SIZE (reg_mode);
21205 /* Work out the size of each vector in bytes. */
21206 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
21207 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
21209 /* Work out how many vectors there are. */
21210 gcc_assert (reg_size % vector_size == 0);
21211 nvectors = reg_size / vector_size;
21213 /* Work out the type of each element. */
21214 gcc_assert (POINTER_TYPE_P (type));
21215 elem_type = TREE_TYPE (type);
21217 /* Work out how many elements are being loaded or stored.
21218 MEM_MODE == REG_MODE implies a one-to-one mapping between register
21219 and memory elements; anything else implies a lane load or store. */
21220 if (mem_mode == reg_mode)
21221 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
21225 /* Create a type that describes the full access. */
21226 upper_bound = build_int_cst (size_type_node, nelems - 1);
21227 array_type = build_array_type (elem_type, build_index_type (upper_bound));
21229 /* Dereference EXP using that type. */
21230 return fold_build2 (MEM_REF, array_type, exp,
21231 build_int_cst (build_pointer_type (array_type), 0));
21234 /* Expand a Neon builtin. */
21236 arm_expand_neon_args (rtx target, int icode, int have_retval,
21237 neon_builtin_type_mode type_mode,
21238 tree exp, int fcode, ...)
21242 tree arg[NEON_MAX_BUILTIN_ARGS];
21243 rtx op[NEON_MAX_BUILTIN_ARGS];
21246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21247 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
21248 enum machine_mode other_mode;
21254 || GET_MODE (target) != tmode
21255 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
21256 target = gen_reg_rtx (tmode);
21258 va_start (ap, fcode);
21260 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
21264 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
21266 if (thisarg == NEON_ARG_STOP)
21270 opno = argc + have_retval;
21271 mode[argc] = insn_data[icode].operand[opno].mode;
21272 arg[argc] = CALL_EXPR_ARG (exp, argc);
21273 arg_type = TREE_VALUE (formals);
21274 if (thisarg == NEON_ARG_MEMORY)
21276 other_mode = insn_data[icode].operand[1 - opno].mode;
21277 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
21278 mode[argc], other_mode,
21282 op[argc] = expand_normal (arg[argc]);
21286 case NEON_ARG_COPY_TO_REG:
21287 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
21288 if (!(*insn_data[icode].operand[opno].predicate)
21289 (op[argc], mode[argc]))
21290 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
21293 case NEON_ARG_CONSTANT:
21294 /* FIXME: This error message is somewhat unhelpful. */
21295 if (!(*insn_data[icode].operand[opno].predicate)
21296 (op[argc], mode[argc]))
21297 error ("argument must be a constant");
21300 case NEON_ARG_MEMORY:
21301 gcc_assert (MEM_P (op[argc]));
21302 PUT_MODE (op[argc], mode[argc]);
21303 /* ??? arm_neon.h uses the same built-in functions for signed
21304 and unsigned accesses, casting where necessary. This isn't
21306 set_mem_alias_set (op[argc], 0);
21307 if (!(*insn_data[icode].operand[opno].predicate)
21308 (op[argc], mode[argc]))
21309 op[argc] = (replace_equiv_address
21310 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
21313 case NEON_ARG_STOP:
21314 gcc_unreachable ();
21318 formals = TREE_CHAIN (formals);
21328 pat = GEN_FCN (icode) (target, op[0]);
21332 pat = GEN_FCN (icode) (target, op[0], op[1]);
21336 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
21340 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
21344 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
21348 gcc_unreachable ();
21354 pat = GEN_FCN (icode) (op[0]);
21358 pat = GEN_FCN (icode) (op[0], op[1]);
21362 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
21366 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
21370 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
21374 gcc_unreachable ();
21385 /* Expand a Neon builtin. These are "special" because they don't have symbolic
21386 constants defined per-instruction or per instruction-variant. Instead, the
21387 required info is looked up in the table neon_builtin_data. */
21389 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
21391 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
21392 neon_itype itype = d->itype;
21393 enum insn_code icode = d->code;
21394 neon_builtin_type_mode type_mode = d->mode;
21401 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21402 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21406 case NEON_SCALARMUL:
21407 case NEON_SCALARMULL:
21408 case NEON_SCALARMULH:
21409 case NEON_SHIFTINSERT:
21410 case NEON_LOGICBINOP:
21411 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21412 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21416 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21417 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21418 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21422 case NEON_SHIFTIMM:
21423 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21424 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
21428 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21429 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21434 case NEON_REINTERP:
21435 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21436 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21440 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21441 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21443 case NEON_RESULTPAIR:
21444 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21445 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21449 case NEON_LANEMULL:
21450 case NEON_LANEMULH:
21451 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21452 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21453 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21456 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21457 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21458 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21460 case NEON_SHIFTACC:
21461 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21462 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21463 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21465 case NEON_SCALARMAC:
21466 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21467 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21468 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21472 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21473 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21477 case NEON_LOADSTRUCT:
21478 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21479 NEON_ARG_MEMORY, NEON_ARG_STOP);
21481 case NEON_LOAD1LANE:
21482 case NEON_LOADSTRUCTLANE:
21483 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21484 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21488 case NEON_STORESTRUCT:
21489 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21490 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21492 case NEON_STORE1LANE:
21493 case NEON_STORESTRUCTLANE:
21494 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21495 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21499 gcc_unreachable ();
21502 /* Emit code to reinterpret one Neon type as another, without altering bits. */
21504 neon_reinterpret (rtx dest, rtx src)
21506 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
21509 /* Emit code to place a Neon pair result in memory locations (with equal
21512 neon_emit_pair_result_insn (enum machine_mode mode,
21513 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
21516 rtx mem = gen_rtx_MEM (mode, destaddr);
21517 rtx tmp1 = gen_reg_rtx (mode);
21518 rtx tmp2 = gen_reg_rtx (mode);
21520 emit_insn (intfn (tmp1, op1, op2, tmp2));
21522 emit_move_insn (mem, tmp1);
21523 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
21524 emit_move_insn (mem, tmp2);
21527 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21528 not to early-clobber SRC registers in the process.
21530 We assume that the operands described by SRC and DEST represent a
21531 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
21532 number of components into which the copy has been decomposed. */
21534 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
21538 if (!reg_overlap_mentioned_p (operands[0], operands[1])
21539 || REGNO (operands[0]) < REGNO (operands[1]))
21541 for (i = 0; i < count; i++)
21543 operands[2 * i] = dest[i];
21544 operands[2 * i + 1] = src[i];
21549 for (i = 0; i < count; i++)
21551 operands[2 * i] = dest[count - i - 1];
21552 operands[2 * i + 1] = src[count - i - 1];
21557 /* Split operands into moves from op[1] + op[2] into op[0]. */
21560 neon_split_vcombine (rtx operands[3])
21562 unsigned int dest = REGNO (operands[0]);
21563 unsigned int src1 = REGNO (operands[1]);
21564 unsigned int src2 = REGNO (operands[2]);
21565 enum machine_mode halfmode = GET_MODE (operands[1]);
21566 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
21567 rtx destlo, desthi;
21569 if (src1 == dest && src2 == dest + halfregs)
21571 /* No-op move. Can't split to nothing; emit something. */
21572 emit_note (NOTE_INSN_DELETED);
21576 /* Preserve register attributes for variable tracking. */
21577 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
21578 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
21579 GET_MODE_SIZE (halfmode));
21581 /* Special case of reversed high/low parts. Use VSWP. */
21582 if (src2 == dest && src1 == dest + halfregs)
21584 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
21585 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
21586 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
21590 if (!reg_overlap_mentioned_p (operands[2], destlo))
21592 /* Try to avoid unnecessary moves if part of the result
21593 is in the right place already. */
21595 emit_move_insn (destlo, operands[1]);
21596 if (src2 != dest + halfregs)
21597 emit_move_insn (desthi, operands[2]);
21601 if (src2 != dest + halfregs)
21602 emit_move_insn (desthi, operands[2]);
21604 emit_move_insn (destlo, operands[1]);
21608 /* Expand an expression EXP that calls a built-in function,
21609 with result going to TARGET if that's convenient
21610 (and in mode MODE if that's convenient).
21611 SUBTARGET may be used as the target for computing one of EXP's operands.
21612 IGNORE is nonzero if the value is to be ignored. */
21615 arm_expand_builtin (tree exp,
21617 rtx subtarget ATTRIBUTE_UNUSED,
21618 enum machine_mode mode ATTRIBUTE_UNUSED,
21619 int ignore ATTRIBUTE_UNUSED)
21621 const struct builtin_description * d;
21622 enum insn_code icode;
21623 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21631 int fcode = DECL_FUNCTION_CODE (fndecl);
21633 enum machine_mode tmode;
21634 enum machine_mode mode0;
21635 enum machine_mode mode1;
21636 enum machine_mode mode2;
21642 if (fcode >= ARM_BUILTIN_NEON_BASE)
21643 return arm_expand_neon_builtin (fcode, exp, target);
21647 case ARM_BUILTIN_TEXTRMSB:
21648 case ARM_BUILTIN_TEXTRMUB:
21649 case ARM_BUILTIN_TEXTRMSH:
21650 case ARM_BUILTIN_TEXTRMUH:
21651 case ARM_BUILTIN_TEXTRMSW:
21652 case ARM_BUILTIN_TEXTRMUW:
21653 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21654 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21655 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21656 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21657 : CODE_FOR_iwmmxt_textrmw);
21659 arg0 = CALL_EXPR_ARG (exp, 0);
21660 arg1 = CALL_EXPR_ARG (exp, 1);
21661 op0 = expand_normal (arg0);
21662 op1 = expand_normal (arg1);
21663 tmode = insn_data[icode].operand[0].mode;
21664 mode0 = insn_data[icode].operand[1].mode;
21665 mode1 = insn_data[icode].operand[2].mode;
21667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21668 op0 = copy_to_mode_reg (mode0, op0);
21669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21671 /* @@@ better error message */
21672 error ("selector must be an immediate");
21673 return gen_reg_rtx (tmode);
21676 opint = INTVAL (op1);
21677 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
21679 if (opint > 7 || opint < 0)
21680 error ("the range of selector should be in 0 to 7");
21682 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
21684 if (opint > 3 || opint < 0)
21685 error ("the range of selector should be in 0 to 3");
21687 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
21689 if (opint > 1 || opint < 0)
21690 error ("the range of selector should be in 0 to 1");
21694 || GET_MODE (target) != tmode
21695 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21696 target = gen_reg_rtx (tmode);
21697 pat = GEN_FCN (icode) (target, op0, op1);
21703 case ARM_BUILTIN_WALIGNI:
21704 /* If op2 is immediate, call walighi, else call walighr. */
21705 arg0 = CALL_EXPR_ARG (exp, 0);
21706 arg1 = CALL_EXPR_ARG (exp, 1);
21707 arg2 = CALL_EXPR_ARG (exp, 2);
21708 op0 = expand_normal (arg0);
21709 op1 = expand_normal (arg1);
21710 op2 = expand_normal (arg2);
21711 if (CONST_INT_P (op2))
21713 icode = CODE_FOR_iwmmxt_waligni;
21714 tmode = insn_data[icode].operand[0].mode;
21715 mode0 = insn_data[icode].operand[1].mode;
21716 mode1 = insn_data[icode].operand[2].mode;
21717 mode2 = insn_data[icode].operand[3].mode;
21718 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21719 op0 = copy_to_mode_reg (mode0, op0);
21720 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21721 op1 = copy_to_mode_reg (mode1, op1);
21722 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
21723 selector = INTVAL (op2);
21724 if (selector > 7 || selector < 0)
21725 error ("the range of selector should be in 0 to 7");
21729 icode = CODE_FOR_iwmmxt_walignr;
21730 tmode = insn_data[icode].operand[0].mode;
21731 mode0 = insn_data[icode].operand[1].mode;
21732 mode1 = insn_data[icode].operand[2].mode;
21733 mode2 = insn_data[icode].operand[3].mode;
21734 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21735 op0 = copy_to_mode_reg (mode0, op0);
21736 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21737 op1 = copy_to_mode_reg (mode1, op1);
21738 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
21739 op2 = copy_to_mode_reg (mode2, op2);
21742 || GET_MODE (target) != tmode
21743 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21744 target = gen_reg_rtx (tmode);
21745 pat = GEN_FCN (icode) (target, op0, op1, op2);
21751 case ARM_BUILTIN_TINSRB:
21752 case ARM_BUILTIN_TINSRH:
21753 case ARM_BUILTIN_TINSRW:
21754 case ARM_BUILTIN_WMERGE:
21755 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21756 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21757 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
21758 : CODE_FOR_iwmmxt_tinsrw);
21759 arg0 = CALL_EXPR_ARG (exp, 0);
21760 arg1 = CALL_EXPR_ARG (exp, 1);
21761 arg2 = CALL_EXPR_ARG (exp, 2);
21762 op0 = expand_normal (arg0);
21763 op1 = expand_normal (arg1);
21764 op2 = expand_normal (arg2);
21765 tmode = insn_data[icode].operand[0].mode;
21766 mode0 = insn_data[icode].operand[1].mode;
21767 mode1 = insn_data[icode].operand[2].mode;
21768 mode2 = insn_data[icode].operand[3].mode;
21770 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21771 op0 = copy_to_mode_reg (mode0, op0);
21772 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21773 op1 = copy_to_mode_reg (mode1, op1);
21774 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21776 error ("selector must be an immediate");
21779 if (icode == CODE_FOR_iwmmxt_wmerge)
21781 selector = INTVAL (op2);
21782 if (selector > 7 || selector < 0)
21783 error ("the range of selector should be in 0 to 7");
21785 if ((icode == CODE_FOR_iwmmxt_tinsrb)
21786 || (icode == CODE_FOR_iwmmxt_tinsrh)
21787 || (icode == CODE_FOR_iwmmxt_tinsrw))
21790 selector= INTVAL (op2);
21791 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21792 error ("the range of selector should be in 0 to 7");
21793 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21794 error ("the range of selector should be in 0 to 3");
21795 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21796 error ("the range of selector should be in 0 to 1");
21798 op2 = GEN_INT (mask);
21801 || GET_MODE (target) != tmode
21802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21803 target = gen_reg_rtx (tmode);
21804 pat = GEN_FCN (icode) (target, op0, op1, op2);
21810 case ARM_BUILTIN_SETWCGR0:
21811 case ARM_BUILTIN_SETWCGR1:
21812 case ARM_BUILTIN_SETWCGR2:
21813 case ARM_BUILTIN_SETWCGR3:
21814 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21815 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21816 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21817 : CODE_FOR_iwmmxt_setwcgr3);
21818 arg0 = CALL_EXPR_ARG (exp, 0);
21819 op0 = expand_normal (arg0);
21820 mode0 = insn_data[icode].operand[0].mode;
21821 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21822 op0 = copy_to_mode_reg (mode0, op0);
21823 pat = GEN_FCN (icode) (op0);
21829 case ARM_BUILTIN_GETWCGR0:
21830 case ARM_BUILTIN_GETWCGR1:
21831 case ARM_BUILTIN_GETWCGR2:
21832 case ARM_BUILTIN_GETWCGR3:
21833 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21834 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21835 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21836 : CODE_FOR_iwmmxt_getwcgr3);
21837 tmode = insn_data[icode].operand[0].mode;
21839 || GET_MODE (target) != tmode
21840 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21841 target = gen_reg_rtx (tmode);
21842 pat = GEN_FCN (icode) (target);
21848 case ARM_BUILTIN_WSHUFH:
21849 icode = CODE_FOR_iwmmxt_wshufh;
21850 arg0 = CALL_EXPR_ARG (exp, 0);
21851 arg1 = CALL_EXPR_ARG (exp, 1);
21852 op0 = expand_normal (arg0);
21853 op1 = expand_normal (arg1);
21854 tmode = insn_data[icode].operand[0].mode;
21855 mode1 = insn_data[icode].operand[1].mode;
21856 mode2 = insn_data[icode].operand[2].mode;
21858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21859 op0 = copy_to_mode_reg (mode1, op0);
21860 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21862 error ("mask must be an immediate");
21865 selector = INTVAL (op1);
21866 if (selector < 0 || selector > 255)
21867 error ("the range of mask should be in 0 to 255");
21869 || GET_MODE (target) != tmode
21870 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21871 target = gen_reg_rtx (tmode);
21872 pat = GEN_FCN (icode) (target, op0, op1);
21878 case ARM_BUILTIN_WMADDS:
21879 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21880 case ARM_BUILTIN_WMADDSX:
21881 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21882 case ARM_BUILTIN_WMADDSN:
21883 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21884 case ARM_BUILTIN_WMADDU:
21885 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21886 case ARM_BUILTIN_WMADDUX:
21887 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21888 case ARM_BUILTIN_WMADDUN:
21889 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21890 case ARM_BUILTIN_WSADBZ:
21891 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21892 case ARM_BUILTIN_WSADHZ:
21893 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21895 /* Several three-argument builtins. */
21896 case ARM_BUILTIN_WMACS:
21897 case ARM_BUILTIN_WMACU:
21898 case ARM_BUILTIN_TMIA:
21899 case ARM_BUILTIN_TMIAPH:
21900 case ARM_BUILTIN_TMIATT:
21901 case ARM_BUILTIN_TMIATB:
21902 case ARM_BUILTIN_TMIABT:
21903 case ARM_BUILTIN_TMIABB:
21904 case ARM_BUILTIN_WQMIABB:
21905 case ARM_BUILTIN_WQMIABT:
21906 case ARM_BUILTIN_WQMIATB:
21907 case ARM_BUILTIN_WQMIATT:
21908 case ARM_BUILTIN_WQMIABBN:
21909 case ARM_BUILTIN_WQMIABTN:
21910 case ARM_BUILTIN_WQMIATBN:
21911 case ARM_BUILTIN_WQMIATTN:
21912 case ARM_BUILTIN_WMIABB:
21913 case ARM_BUILTIN_WMIABT:
21914 case ARM_BUILTIN_WMIATB:
21915 case ARM_BUILTIN_WMIATT:
21916 case ARM_BUILTIN_WMIABBN:
21917 case ARM_BUILTIN_WMIABTN:
21918 case ARM_BUILTIN_WMIATBN:
21919 case ARM_BUILTIN_WMIATTN:
21920 case ARM_BUILTIN_WMIAWBB:
21921 case ARM_BUILTIN_WMIAWBT:
21922 case ARM_BUILTIN_WMIAWTB:
21923 case ARM_BUILTIN_WMIAWTT:
21924 case ARM_BUILTIN_WMIAWBBN:
21925 case ARM_BUILTIN_WMIAWBTN:
21926 case ARM_BUILTIN_WMIAWTBN:
21927 case ARM_BUILTIN_WMIAWTTN:
21928 case ARM_BUILTIN_WSADB:
21929 case ARM_BUILTIN_WSADH:
21930 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21931 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21932 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21933 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21934 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21935 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21936 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21937 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21938 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21939 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21940 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21941 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21942 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21943 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21944 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21945 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21946 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21947 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21948 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21949 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21950 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21951 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21952 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21953 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21954 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21955 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21956 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21957 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21958 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21959 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21960 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21961 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21962 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21963 : CODE_FOR_iwmmxt_wsadh);
21964 arg0 = CALL_EXPR_ARG (exp, 0);
21965 arg1 = CALL_EXPR_ARG (exp, 1);
21966 arg2 = CALL_EXPR_ARG (exp, 2);
21967 op0 = expand_normal (arg0);
21968 op1 = expand_normal (arg1);
21969 op2 = expand_normal (arg2);
21970 tmode = insn_data[icode].operand[0].mode;
21971 mode0 = insn_data[icode].operand[1].mode;
21972 mode1 = insn_data[icode].operand[2].mode;
21973 mode2 = insn_data[icode].operand[3].mode;
21975 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21976 op0 = copy_to_mode_reg (mode0, op0);
21977 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21978 op1 = copy_to_mode_reg (mode1, op1);
21979 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21980 op2 = copy_to_mode_reg (mode2, op2);
21982 || GET_MODE (target) != tmode
21983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21984 target = gen_reg_rtx (tmode);
21985 pat = GEN_FCN (icode) (target, op0, op1, op2);
21991 case ARM_BUILTIN_WZERO:
21992 target = gen_reg_rtx (DImode);
21993 emit_insn (gen_iwmmxt_clrdi (target));
21996 case ARM_BUILTIN_WSRLHI:
21997 case ARM_BUILTIN_WSRLWI:
21998 case ARM_BUILTIN_WSRLDI:
21999 case ARM_BUILTIN_WSLLHI:
22000 case ARM_BUILTIN_WSLLWI:
22001 case ARM_BUILTIN_WSLLDI:
22002 case ARM_BUILTIN_WSRAHI:
22003 case ARM_BUILTIN_WSRAWI:
22004 case ARM_BUILTIN_WSRADI:
22005 case ARM_BUILTIN_WRORHI:
22006 case ARM_BUILTIN_WRORWI:
22007 case ARM_BUILTIN_WRORDI:
22008 case ARM_BUILTIN_WSRLH:
22009 case ARM_BUILTIN_WSRLW:
22010 case ARM_BUILTIN_WSRLD:
22011 case ARM_BUILTIN_WSLLH:
22012 case ARM_BUILTIN_WSLLW:
22013 case ARM_BUILTIN_WSLLD:
22014 case ARM_BUILTIN_WSRAH:
22015 case ARM_BUILTIN_WSRAW:
22016 case ARM_BUILTIN_WSRAD:
22017 case ARM_BUILTIN_WRORH:
22018 case ARM_BUILTIN_WRORW:
22019 case ARM_BUILTIN_WRORD:
22020 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
22021 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
22022 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
22023 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
22024 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
22025 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
22026 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
22027 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
22028 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
22029 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
22030 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
22031 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
22032 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
22033 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
22034 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
22035 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
22036 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
22037 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
22038 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
22039 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
22040 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
22041 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
22042 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
22043 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
22044 : CODE_FOR_nothing);
22045 arg1 = CALL_EXPR_ARG (exp, 1);
22046 op1 = expand_normal (arg1);
22047 if (GET_MODE (op1) == VOIDmode)
22049 imm = INTVAL (op1);
22050 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
22051 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
22052 && (imm < 0 || imm > 32))
22054 if (fcode == ARM_BUILTIN_WRORHI)
22055 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
22056 else if (fcode == ARM_BUILTIN_WRORWI)
22057 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
22058 else if (fcode == ARM_BUILTIN_WRORH)
22059 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
22061 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
22063 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
22064 && (imm < 0 || imm > 64))
22066 if (fcode == ARM_BUILTIN_WRORDI)
22067 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
22069 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
22073 if (fcode == ARM_BUILTIN_WSRLHI)
22074 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
22075 else if (fcode == ARM_BUILTIN_WSRLWI)
22076 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
22077 else if (fcode == ARM_BUILTIN_WSRLDI)
22078 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
22079 else if (fcode == ARM_BUILTIN_WSLLHI)
22080 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
22081 else if (fcode == ARM_BUILTIN_WSLLWI)
22082 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
22083 else if (fcode == ARM_BUILTIN_WSLLDI)
22084 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
22085 else if (fcode == ARM_BUILTIN_WSRAHI)
22086 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
22087 else if (fcode == ARM_BUILTIN_WSRAWI)
22088 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
22089 else if (fcode == ARM_BUILTIN_WSRADI)
22090 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
22091 else if (fcode == ARM_BUILTIN_WSRLH)
22092 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
22093 else if (fcode == ARM_BUILTIN_WSRLW)
22094 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
22095 else if (fcode == ARM_BUILTIN_WSRLD)
22096 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
22097 else if (fcode == ARM_BUILTIN_WSLLH)
22098 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
22099 else if (fcode == ARM_BUILTIN_WSLLW)
22100 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
22101 else if (fcode == ARM_BUILTIN_WSLLD)
22102 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
22103 else if (fcode == ARM_BUILTIN_WSRAH)
22104 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
22105 else if (fcode == ARM_BUILTIN_WSRAW)
22106 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
22108 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
22111 return arm_expand_binop_builtin (icode, exp, target);
22117 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
22118 if (d->code == (const enum arm_builtins) fcode)
22119 return arm_expand_binop_builtin (d->icode, exp, target);
22121 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
22122 if (d->code == (const enum arm_builtins) fcode)
22123 return arm_expand_unop_builtin (d->icode, exp, target, 0);
22125 /* @@@ Should really do something sensible here. */
22129 /* Return the number (counting from 0) of
22130 the least significant set bit in MASK. */
22133 number_of_first_bit_set (unsigned mask)
22135 return ctz_hwi (mask);
22138 /* Like emit_multi_reg_push, but allowing for a different set of
22139 registers to be described as saved. MASK is the set of registers
22140 to be saved; REAL_REGS is the set of registers to be described as
22141 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22144 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22146 unsigned long regno;
22147 rtx par[10], tmp, reg, insn;
22150 /* Build the parallel of the registers actually being stored. */
22151 for (i = 0; mask; ++i, mask &= mask - 1)
22153 regno = ctz_hwi (mask);
22154 reg = gen_rtx_REG (SImode, regno);
22157 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22159 tmp = gen_rtx_USE (VOIDmode, reg);
22164 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22165 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22166 tmp = gen_frame_mem (BLKmode, tmp);
22167 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
22170 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22171 insn = emit_insn (tmp);
22173 /* Always build the stack adjustment note for unwind info. */
22174 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22175 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
22178 /* Build the parallel of the registers recorded as saved for unwind. */
22179 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22181 regno = ctz_hwi (real_regs);
22182 reg = gen_rtx_REG (SImode, regno);
22184 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22185 tmp = gen_frame_mem (SImode, tmp);
22186 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
22187 RTX_FRAME_RELATED_P (tmp) = 1;
22195 RTX_FRAME_RELATED_P (par[0]) = 1;
22196 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22199 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22204 /* Emit code to push or pop registers to or from the stack. F is the
22205 assembly file. MASK is the registers to pop. */
22207 thumb_pop (FILE *f, unsigned long mask)
22210 int lo_mask = mask & 0xFF;
22211 int pushed_words = 0;
22215 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22217 /* Special case. Do not generate a POP PC statement here, do it in
22219 thumb_exit (f, -1);
22223 fprintf (f, "\tpop\t{");
22225 /* Look at the low registers first. */
22226 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22230 asm_fprintf (f, "%r", regno);
22232 if ((lo_mask & ~1) != 0)
22239 if (mask & (1 << PC_REGNUM))
22241 /* Catch popping the PC. */
22242 if (TARGET_INTERWORK || TARGET_BACKTRACE
22243 || crtl->calls_eh_return)
22245 /* The PC is never poped directly, instead
22246 it is popped into r3 and then BX is used. */
22247 fprintf (f, "}\n");
22249 thumb_exit (f, -1);
22258 asm_fprintf (f, "%r", PC_REGNUM);
22262 fprintf (f, "}\n");
22265 /* Generate code to return from a thumb function.
22266 If 'reg_containing_return_addr' is -1, then the return address is
22267 actually on the stack, at the stack pointer. */
22269 thumb_exit (FILE *f, int reg_containing_return_addr)
22271 unsigned regs_available_for_popping;
22272 unsigned regs_to_pop;
22274 unsigned available;
22278 int restore_a4 = FALSE;
22280 /* Compute the registers we need to pop. */
22284 if (reg_containing_return_addr == -1)
22286 regs_to_pop |= 1 << LR_REGNUM;
22290 if (TARGET_BACKTRACE)
22292 /* Restore the (ARM) frame pointer and stack pointer. */
22293 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22297 /* If there is nothing to pop then just emit the BX instruction and
22299 if (pops_needed == 0)
22301 if (crtl->calls_eh_return)
22302 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22304 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22307 /* Otherwise if we are not supporting interworking and we have not created
22308 a backtrace structure and the function was not entered in ARM mode then
22309 just pop the return address straight into the PC. */
22310 else if (!TARGET_INTERWORK
22311 && !TARGET_BACKTRACE
22312 && !is_called_in_ARM_mode (current_function_decl)
22313 && !crtl->calls_eh_return)
22315 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
22319 /* Find out how many of the (return) argument registers we can corrupt. */
22320 regs_available_for_popping = 0;
22322 /* If returning via __builtin_eh_return, the bottom three registers
22323 all contain information needed for the return. */
22324 if (crtl->calls_eh_return)
22328 /* If we can deduce the registers used from the function's
22329 return value. This is more reliable that examining
22330 df_regs_ever_live_p () because that will be set if the register is
22331 ever used in the function, not just if the register is used
22332 to hold a return value. */
22334 if (crtl->return_rtx != 0)
22335 mode = GET_MODE (crtl->return_rtx);
22337 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22339 size = GET_MODE_SIZE (mode);
22343 /* In a void function we can use any argument register.
22344 In a function that returns a structure on the stack
22345 we can use the second and third argument registers. */
22346 if (mode == VOIDmode)
22347 regs_available_for_popping =
22348 (1 << ARG_REGISTER (1))
22349 | (1 << ARG_REGISTER (2))
22350 | (1 << ARG_REGISTER (3));
22352 regs_available_for_popping =
22353 (1 << ARG_REGISTER (2))
22354 | (1 << ARG_REGISTER (3));
22356 else if (size <= 4)
22357 regs_available_for_popping =
22358 (1 << ARG_REGISTER (2))
22359 | (1 << ARG_REGISTER (3));
22360 else if (size <= 8)
22361 regs_available_for_popping =
22362 (1 << ARG_REGISTER (3));
22365 /* Match registers to be popped with registers into which we pop them. */
22366 for (available = regs_available_for_popping,
22367 required = regs_to_pop;
22368 required != 0 && available != 0;
22369 available &= ~(available & - available),
22370 required &= ~(required & - required))
22373 /* If we have any popping registers left over, remove them. */
22375 regs_available_for_popping &= ~available;
22377 /* Otherwise if we need another popping register we can use
22378 the fourth argument register. */
22379 else if (pops_needed)
22381 /* If we have not found any free argument registers and
22382 reg a4 contains the return address, we must move it. */
22383 if (regs_available_for_popping == 0
22384 && reg_containing_return_addr == LAST_ARG_REGNUM)
22386 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22387 reg_containing_return_addr = LR_REGNUM;
22389 else if (size > 12)
22391 /* Register a4 is being used to hold part of the return value,
22392 but we have dire need of a free, low register. */
22395 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
22398 if (reg_containing_return_addr != LAST_ARG_REGNUM)
22400 /* The fourth argument register is available. */
22401 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
22407 /* Pop as many registers as we can. */
22408 thumb_pop (f, regs_available_for_popping);
22410 /* Process the registers we popped. */
22411 if (reg_containing_return_addr == -1)
22413 /* The return address was popped into the lowest numbered register. */
22414 regs_to_pop &= ~(1 << LR_REGNUM);
22416 reg_containing_return_addr =
22417 number_of_first_bit_set (regs_available_for_popping);
22419 /* Remove this register for the mask of available registers, so that
22420 the return address will not be corrupted by further pops. */
22421 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
22424 /* If we popped other registers then handle them here. */
22425 if (regs_available_for_popping)
22429 /* Work out which register currently contains the frame pointer. */
22430 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
22432 /* Move it into the correct place. */
22433 asm_fprintf (f, "\tmov\t%r, %r\n",
22434 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
22436 /* (Temporarily) remove it from the mask of popped registers. */
22437 regs_available_for_popping &= ~(1 << frame_pointer);
22438 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
22440 if (regs_available_for_popping)
22444 /* We popped the stack pointer as well,
22445 find the register that contains it. */
22446 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22448 /* Move it into the stack register. */
22449 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22451 /* At this point we have popped all necessary registers, so
22452 do not worry about restoring regs_available_for_popping
22453 to its correct value:
22455 assert (pops_needed == 0)
22456 assert (regs_available_for_popping == (1 << frame_pointer))
22457 assert (regs_to_pop == (1 << STACK_POINTER)) */
22461 /* Since we have just move the popped value into the frame
22462 pointer, the popping register is available for reuse, and
22463 we know that we still have the stack pointer left to pop. */
22464 regs_available_for_popping |= (1 << frame_pointer);
22468 /* If we still have registers left on the stack, but we no longer have
22469 any registers into which we can pop them, then we must move the return
22470 address into the link register and make available the register that
22472 if (regs_available_for_popping == 0 && pops_needed > 0)
22474 regs_available_for_popping |= 1 << reg_containing_return_addr;
22476 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22477 reg_containing_return_addr);
22479 reg_containing_return_addr = LR_REGNUM;
22482 /* If we have registers left on the stack then pop some more.
22483 We know that at most we will want to pop FP and SP. */
22484 if (pops_needed > 0)
22489 thumb_pop (f, regs_available_for_popping);
22491 /* We have popped either FP or SP.
22492 Move whichever one it is into the correct register. */
22493 popped_into = number_of_first_bit_set (regs_available_for_popping);
22494 move_to = number_of_first_bit_set (regs_to_pop);
22496 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
22498 regs_to_pop &= ~(1 << move_to);
22503 /* If we still have not popped everything then we must have only
22504 had one register available to us and we are now popping the SP. */
22505 if (pops_needed > 0)
22509 thumb_pop (f, regs_available_for_popping);
22511 popped_into = number_of_first_bit_set (regs_available_for_popping);
22513 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
22515 assert (regs_to_pop == (1 << STACK_POINTER))
22516 assert (pops_needed == 1)
22520 /* If necessary restore the a4 register. */
22523 if (reg_containing_return_addr != LR_REGNUM)
22525 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22526 reg_containing_return_addr = LR_REGNUM;
22529 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
22532 if (crtl->calls_eh_return)
22533 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22535 /* Return to caller. */
22536 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22539 /* Scan INSN just before assembler is output for it.
22540 For Thumb-1, we track the status of the condition codes; this
22541 information is used in the cbranchsi4_insn pattern. */
22543 thumb1_final_prescan_insn (rtx insn)
22545 if (flag_print_asm_name)
22546 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
22547 INSN_ADDRESSES (INSN_UID (insn)));
22548 /* Don't overwrite the previous setter when we get to a cbranch. */
22549 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
22551 enum attr_conds conds;
22553 if (cfun->machine->thumb1_cc_insn)
22555 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
22556 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
22559 conds = get_attr_conds (insn);
22560 if (conds == CONDS_SET)
22562 rtx set = single_set (insn);
22563 cfun->machine->thumb1_cc_insn = insn;
22564 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
22565 cfun->machine->thumb1_cc_op1 = const0_rtx;
22566 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
22567 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
22569 rtx src1 = XEXP (SET_SRC (set), 1);
22570 if (src1 == const0_rtx)
22571 cfun->machine->thumb1_cc_mode = CCmode;
22573 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
22575 /* Record the src register operand instead of dest because
22576 cprop_hardreg pass propagates src. */
22577 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
22580 else if (conds != CONDS_NOCOND)
22581 cfun->machine->thumb1_cc_insn = NULL_RTX;
22586 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
22588 unsigned HOST_WIDE_INT mask = 0xff;
22591 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
22592 if (val == 0) /* XXX */
22595 for (i = 0; i < 25; i++)
22596 if ((val & (mask << i)) == val)
22602 /* Returns nonzero if the current function contains,
22603 or might contain a far jump. */
22605 thumb_far_jump_used_p (void)
22609 /* This test is only important for leaf functions. */
22610 /* assert (!leaf_function_p ()); */
22612 /* If we have already decided that far jumps may be used,
22613 do not bother checking again, and always return true even if
22614 it turns out that they are not being used. Once we have made
22615 the decision that far jumps are present (and that hence the link
22616 register will be pushed onto the stack) we cannot go back on it. */
22617 if (cfun->machine->far_jump_used)
22620 /* If this function is not being called from the prologue/epilogue
22621 generation code then it must be being called from the
22622 INITIAL_ELIMINATION_OFFSET macro. */
22623 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
22625 /* In this case we know that we are being asked about the elimination
22626 of the arg pointer register. If that register is not being used,
22627 then there are no arguments on the stack, and we do not have to
22628 worry that a far jump might force the prologue to push the link
22629 register, changing the stack offsets. In this case we can just
22630 return false, since the presence of far jumps in the function will
22631 not affect stack offsets.
22633 If the arg pointer is live (or if it was live, but has now been
22634 eliminated and so set to dead) then we do have to test to see if
22635 the function might contain a far jump. This test can lead to some
22636 false negatives, since before reload is completed, then length of
22637 branch instructions is not known, so gcc defaults to returning their
22638 longest length, which in turn sets the far jump attribute to true.
22640 A false negative will not result in bad code being generated, but it
22641 will result in a needless push and pop of the link register. We
22642 hope that this does not occur too often.
22644 If we need doubleword stack alignment this could affect the other
22645 elimination offsets so we can't risk getting it wrong. */
22646 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
22647 cfun->machine->arg_pointer_live = 1;
22648 else if (!cfun->machine->arg_pointer_live)
22652 /* Check to see if the function contains a branch
22653 insn with the far jump attribute set. */
22654 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22657 /* Ignore tablejump patterns. */
22658 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22659 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
22660 && get_attr_far_jump (insn) == FAR_JUMP_YES
22663 /* Record the fact that we have decided that
22664 the function does use far jumps. */
22665 cfun->machine->far_jump_used = 1;
22673 /* Return nonzero if FUNC must be entered in ARM mode. */
22675 is_called_in_ARM_mode (tree func)
22677 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
22679 /* Ignore the problem about functions whose address is taken. */
22680 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
22684 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
22690 /* Given the stack offsets and register mask in OFFSETS, decide how
22691 many additional registers to push instead of subtracting a constant
22692 from SP. For epilogues the principle is the same except we use pop.
22693 FOR_PROLOGUE indicates which we're generating. */
22695 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
22697 HOST_WIDE_INT amount;
22698 unsigned long live_regs_mask = offsets->saved_regs_mask;
22699 /* Extract a mask of the ones we can give to the Thumb's push/pop
22701 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
22702 /* Then count how many other high registers will need to be pushed. */
22703 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22704 int n_free, reg_base, size;
22706 if (!for_prologue && frame_pointer_needed)
22707 amount = offsets->locals_base - offsets->saved_regs;
22709 amount = offsets->outgoing_args - offsets->saved_regs;
22711 /* If the stack frame size is 512 exactly, we can save one load
22712 instruction, which should make this a win even when optimizing
22714 if (!optimize_size && amount != 512)
22717 /* Can't do this if there are high registers to push. */
22718 if (high_regs_pushed != 0)
22721 /* Shouldn't do it in the prologue if no registers would normally
22722 be pushed at all. In the epilogue, also allow it if we'll have
22723 a pop insn for the PC. */
22726 || TARGET_BACKTRACE
22727 || (live_regs_mask & 1 << LR_REGNUM) == 0
22728 || TARGET_INTERWORK
22729 || crtl->args.pretend_args_size != 0))
22732 /* Don't do this if thumb_expand_prologue wants to emit instructions
22733 between the push and the stack frame allocation. */
22735 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
22736 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
22743 size = arm_size_return_regs ();
22744 reg_base = ARM_NUM_INTS (size);
22745 live_regs_mask >>= reg_base;
22748 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
22749 && (for_prologue || call_used_regs[reg_base + n_free]))
22751 live_regs_mask >>= 1;
22757 gcc_assert (amount / 4 * 4 == amount);
22759 if (amount >= 512 && (amount - n_free * 4) < 512)
22760 return (amount - 508) / 4;
22761 if (amount <= n_free * 4)
22766 /* The bits which aren't usefully expanded as rtl. */
22768 thumb1_unexpanded_epilogue (void)
22770 arm_stack_offsets *offsets;
22772 unsigned long live_regs_mask = 0;
22773 int high_regs_pushed = 0;
22775 int had_to_push_lr;
22778 if (cfun->machine->return_used_this_function != 0)
22781 if (IS_NAKED (arm_current_func_type ()))
22784 offsets = arm_get_frame_offsets ();
22785 live_regs_mask = offsets->saved_regs_mask;
22786 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22788 /* If we can deduce the registers used from the function's return value.
22789 This is more reliable that examining df_regs_ever_live_p () because that
22790 will be set if the register is ever used in the function, not just if
22791 the register is used to hold a return value. */
22792 size = arm_size_return_regs ();
22794 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22797 unsigned long extra_mask = (1 << extra_pop) - 1;
22798 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22801 /* The prolog may have pushed some high registers to use as
22802 work registers. e.g. the testsuite file:
22803 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22804 compiles to produce:
22805 push {r4, r5, r6, r7, lr}
22809 as part of the prolog. We have to undo that pushing here. */
22811 if (high_regs_pushed)
22813 unsigned long mask = live_regs_mask & 0xff;
22816 /* The available low registers depend on the size of the value we are
22824 /* Oh dear! We have no low registers into which we can pop
22827 ("no low registers available for popping high registers");
22829 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22830 if (live_regs_mask & (1 << next_hi_reg))
22833 while (high_regs_pushed)
22835 /* Find lo register(s) into which the high register(s) can
22837 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22839 if (mask & (1 << regno))
22840 high_regs_pushed--;
22841 if (high_regs_pushed == 0)
22845 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22847 /* Pop the values into the low register(s). */
22848 thumb_pop (asm_out_file, mask);
22850 /* Move the value(s) into the high registers. */
22851 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22853 if (mask & (1 << regno))
22855 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22858 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22859 if (live_regs_mask & (1 << next_hi_reg))
22864 live_regs_mask &= ~0x0f00;
22867 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22868 live_regs_mask &= 0xff;
22870 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22872 /* Pop the return address into the PC. */
22873 if (had_to_push_lr)
22874 live_regs_mask |= 1 << PC_REGNUM;
22876 /* Either no argument registers were pushed or a backtrace
22877 structure was created which includes an adjusted stack
22878 pointer, so just pop everything. */
22879 if (live_regs_mask)
22880 thumb_pop (asm_out_file, live_regs_mask);
22882 /* We have either just popped the return address into the
22883 PC or it is was kept in LR for the entire function.
22884 Note that thumb_pop has already called thumb_exit if the
22885 PC was in the list. */
22886 if (!had_to_push_lr)
22887 thumb_exit (asm_out_file, LR_REGNUM);
22891 /* Pop everything but the return address. */
22892 if (live_regs_mask)
22893 thumb_pop (asm_out_file, live_regs_mask);
22895 if (had_to_push_lr)
22899 /* We have no free low regs, so save one. */
22900 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22904 /* Get the return address into a temporary register. */
22905 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22909 /* Move the return address to lr. */
22910 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22912 /* Restore the low register. */
22913 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22918 regno = LAST_ARG_REGNUM;
22923 /* Remove the argument registers that were pushed onto the stack. */
22924 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22925 SP_REGNUM, SP_REGNUM,
22926 crtl->args.pretend_args_size);
22928 thumb_exit (asm_out_file, regno);
22934 /* Functions to save and restore machine-specific function data. */
22935 static struct machine_function *
22936 arm_init_machine_status (void)
22938 struct machine_function *machine;
22939 machine = ggc_alloc_cleared_machine_function ();
22941 #if ARM_FT_UNKNOWN != 0
22942 machine->func_type = ARM_FT_UNKNOWN;
22947 /* Return an RTX indicating where the return address to the
22948 calling function can be found. */
22950 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22955 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22958 /* Do anything needed before RTL is emitted for each function. */
22960 arm_init_expanders (void)
22962 /* Arrange to initialize and mark the machine per-function status. */
22963 init_machine_status = arm_init_machine_status;
22965 /* This is to stop the combine pass optimizing away the alignment
22966 adjustment of va_arg. */
22967 /* ??? It is claimed that this should not be necessary. */
22969 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22973 /* Like arm_compute_initial_elimination offset. Simpler because there
22974 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22975 to point at the base of the local variables after static stack
22976 space for a function has been allocated. */
22979 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22981 arm_stack_offsets *offsets;
22983 offsets = arm_get_frame_offsets ();
22987 case ARG_POINTER_REGNUM:
22990 case STACK_POINTER_REGNUM:
22991 return offsets->outgoing_args - offsets->saved_args;
22993 case FRAME_POINTER_REGNUM:
22994 return offsets->soft_frame - offsets->saved_args;
22996 case ARM_HARD_FRAME_POINTER_REGNUM:
22997 return offsets->saved_regs - offsets->saved_args;
22999 case THUMB_HARD_FRAME_POINTER_REGNUM:
23000 return offsets->locals_base - offsets->saved_args;
23003 gcc_unreachable ();
23007 case FRAME_POINTER_REGNUM:
23010 case STACK_POINTER_REGNUM:
23011 return offsets->outgoing_args - offsets->soft_frame;
23013 case ARM_HARD_FRAME_POINTER_REGNUM:
23014 return offsets->saved_regs - offsets->soft_frame;
23016 case THUMB_HARD_FRAME_POINTER_REGNUM:
23017 return offsets->locals_base - offsets->soft_frame;
23020 gcc_unreachable ();
23025 gcc_unreachable ();
23029 /* Generate the function's prologue. */
23032 thumb1_expand_prologue (void)
23036 HOST_WIDE_INT amount;
23037 arm_stack_offsets *offsets;
23038 unsigned long func_type;
23040 unsigned long live_regs_mask;
23041 unsigned long l_mask;
23042 unsigned high_regs_pushed = 0;
23044 func_type = arm_current_func_type ();
23046 /* Naked functions don't have prologues. */
23047 if (IS_NAKED (func_type))
23050 if (IS_INTERRUPT (func_type))
23052 error ("interrupt Service Routines cannot be coded in Thumb mode");
23056 if (is_called_in_ARM_mode (current_function_decl))
23057 emit_insn (gen_prologue_thumb1_interwork ());
23059 offsets = arm_get_frame_offsets ();
23060 live_regs_mask = offsets->saved_regs_mask;
23062 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23063 l_mask = live_regs_mask & 0x40ff;
23064 /* Then count how many other high registers will need to be pushed. */
23065 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23067 if (crtl->args.pretend_args_size)
23069 rtx x = GEN_INT (-crtl->args.pretend_args_size);
23071 if (cfun->machine->uses_anonymous_args)
23073 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23074 unsigned long mask;
23076 mask = 1ul << (LAST_ARG_REGNUM + 1);
23077 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23079 insn = thumb1_emit_multi_reg_push (mask, 0);
23083 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23084 stack_pointer_rtx, x));
23086 RTX_FRAME_RELATED_P (insn) = 1;
23089 if (TARGET_BACKTRACE)
23091 HOST_WIDE_INT offset = 0;
23092 unsigned work_register;
23093 rtx work_reg, x, arm_hfp_rtx;
23095 /* We have been asked to create a stack backtrace structure.
23096 The code looks like this:
23100 0 sub SP, #16 Reserve space for 4 registers.
23101 2 push {R7} Push low registers.
23102 4 add R7, SP, #20 Get the stack pointer before the push.
23103 6 str R7, [SP, #8] Store the stack pointer
23104 (before reserving the space).
23105 8 mov R7, PC Get hold of the start of this code + 12.
23106 10 str R7, [SP, #16] Store it.
23107 12 mov R7, FP Get hold of the current frame pointer.
23108 14 str R7, [SP, #4] Store it.
23109 16 mov R7, LR Get hold of the current return address.
23110 18 str R7, [SP, #12] Store it.
23111 20 add R7, SP, #16 Point at the start of the
23112 backtrace structure.
23113 22 mov FP, R7 Put this value into the frame pointer. */
23115 work_register = thumb_find_work_register (live_regs_mask);
23116 work_reg = gen_rtx_REG (SImode, work_register);
23117 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23119 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23120 stack_pointer_rtx, GEN_INT (-16)));
23121 RTX_FRAME_RELATED_P (insn) = 1;
23125 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23126 RTX_FRAME_RELATED_P (insn) = 1;
23128 offset = bit_count (l_mask) * UNITS_PER_WORD;
23131 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23132 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23134 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23135 x = gen_frame_mem (SImode, x);
23136 emit_move_insn (x, work_reg);
23138 /* Make sure that the instruction fetching the PC is in the right place
23139 to calculate "start of backtrace creation code + 12". */
23140 /* ??? The stores using the common WORK_REG ought to be enough to
23141 prevent the scheduler from doing anything weird. Failing that
23142 we could always move all of the following into an UNSPEC_VOLATILE. */
23145 x = gen_rtx_REG (SImode, PC_REGNUM);
23146 emit_move_insn (work_reg, x);
23148 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23149 x = gen_frame_mem (SImode, x);
23150 emit_move_insn (x, work_reg);
23152 emit_move_insn (work_reg, arm_hfp_rtx);
23154 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23155 x = gen_frame_mem (SImode, x);
23156 emit_move_insn (x, work_reg);
23160 emit_move_insn (work_reg, arm_hfp_rtx);
23162 x = plus_constant (Pmode, stack_pointer_rtx, offset);
23163 x = gen_frame_mem (SImode, x);
23164 emit_move_insn (x, work_reg);
23166 x = gen_rtx_REG (SImode, PC_REGNUM);
23167 emit_move_insn (work_reg, x);
23169 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23170 x = gen_frame_mem (SImode, x);
23171 emit_move_insn (x, work_reg);
23174 x = gen_rtx_REG (SImode, LR_REGNUM);
23175 emit_move_insn (work_reg, x);
23177 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23178 x = gen_frame_mem (SImode, x);
23179 emit_move_insn (x, work_reg);
23181 x = GEN_INT (offset + 12);
23182 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23184 emit_move_insn (arm_hfp_rtx, work_reg);
23186 /* Optimization: If we are not pushing any low registers but we are going
23187 to push some high registers then delay our first push. This will just
23188 be a push of LR and we can combine it with the push of the first high
23190 else if ((l_mask & 0xff) != 0
23191 || (high_regs_pushed == 0 && l_mask))
23193 unsigned long mask = l_mask;
23194 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23195 insn = thumb1_emit_multi_reg_push (mask, mask);
23196 RTX_FRAME_RELATED_P (insn) = 1;
23199 if (high_regs_pushed)
23201 unsigned pushable_regs;
23202 unsigned next_hi_reg;
23203 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23204 : crtl->args.info.nregs;
23205 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23207 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23208 if (live_regs_mask & (1 << next_hi_reg))
23211 /* Here we need to mask out registers used for passing arguments
23212 even if they can be pushed. This is to avoid using them to stash the high
23213 registers. Such kind of stash may clobber the use of arguments. */
23214 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23216 if (pushable_regs == 0)
23217 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23219 while (high_regs_pushed > 0)
23221 unsigned long real_regs_mask = 0;
23223 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23225 if (pushable_regs & (1 << regno))
23227 emit_move_insn (gen_rtx_REG (SImode, regno),
23228 gen_rtx_REG (SImode, next_hi_reg));
23230 high_regs_pushed --;
23231 real_regs_mask |= (1 << next_hi_reg);
23233 if (high_regs_pushed)
23235 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23237 if (live_regs_mask & (1 << next_hi_reg))
23242 pushable_regs &= ~((1 << regno) - 1);
23248 /* If we had to find a work register and we have not yet
23249 saved the LR then add it to the list of regs to push. */
23250 if (l_mask == (1 << LR_REGNUM))
23252 pushable_regs |= l_mask;
23253 real_regs_mask |= l_mask;
23257 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23258 RTX_FRAME_RELATED_P (insn) = 1;
23262 /* Load the pic register before setting the frame pointer,
23263 so we can use r7 as a temporary work register. */
23264 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23265 arm_load_pic_register (live_regs_mask);
23267 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23268 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23269 stack_pointer_rtx);
23271 if (flag_stack_usage_info)
23272 current_function_static_stack_size
23273 = offsets->outgoing_args - offsets->saved_args;
23275 amount = offsets->outgoing_args - offsets->saved_regs;
23276 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23281 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23282 GEN_INT (- amount)));
23283 RTX_FRAME_RELATED_P (insn) = 1;
23289 /* The stack decrement is too big for an immediate value in a single
23290 insn. In theory we could issue multiple subtracts, but after
23291 three of them it becomes more space efficient to place the full
23292 value in the constant pool and load into a register. (Also the
23293 ARM debugger really likes to see only one stack decrement per
23294 function). So instead we look for a scratch register into which
23295 we can load the decrement, and then we subtract this from the
23296 stack pointer. Unfortunately on the thumb the only available
23297 scratch registers are the argument registers, and we cannot use
23298 these as they may hold arguments to the function. Instead we
23299 attempt to locate a call preserved register which is used by this
23300 function. If we can find one, then we know that it will have
23301 been pushed at the start of the prologue and so we can corrupt
23303 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
23304 if (live_regs_mask & (1 << regno))
23307 gcc_assert(regno <= LAST_LO_REGNUM);
23309 reg = gen_rtx_REG (SImode, regno);
23311 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
23313 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23314 stack_pointer_rtx, reg));
23316 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23317 plus_constant (Pmode, stack_pointer_rtx,
23319 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23320 RTX_FRAME_RELATED_P (insn) = 1;
23324 if (frame_pointer_needed)
23325 thumb_set_frame_pointer (offsets);
23327 /* If we are profiling, make sure no instructions are scheduled before
23328 the call to mcount. Similarly if the user has requested no
23329 scheduling in the prolog. Similarly if we want non-call exceptions
23330 using the EABI unwinder, to prevent faulting instructions from being
23331 swapped with a stack adjustment. */
23332 if (crtl->profile || !TARGET_SCHED_PROLOG
23333 || (arm_except_unwind_info (&global_options) == UI_TARGET
23334 && cfun->can_throw_non_call_exceptions))
23335 emit_insn (gen_blockage ());
23337 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
23338 if (live_regs_mask & 0xff)
23339 cfun->machine->lr_save_eliminated = 0;
23342 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23343 POP instruction can be generated. LR should be replaced by PC. All
23344 the checks required are already done by USE_RETURN_INSN (). Hence,
23345 all we really need to check here is if single register is to be
23346 returned, or multiple register return. */
23348 thumb2_expand_return (void)
23351 unsigned long saved_regs_mask;
23352 arm_stack_offsets *offsets;
23354 offsets = arm_get_frame_offsets ();
23355 saved_regs_mask = offsets->saved_regs_mask;
23357 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
23358 if (saved_regs_mask & (1 << i))
23361 if (saved_regs_mask)
23365 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23366 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
23367 rtx addr = gen_rtx_MEM (SImode,
23368 gen_rtx_POST_INC (SImode,
23369 stack_pointer_rtx));
23370 set_mem_alias_set (addr, get_frame_alias_set ());
23371 XVECEXP (par, 0, 0) = ret_rtx;
23372 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
23373 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
23374 emit_jump_insn (par);
23378 saved_regs_mask &= ~ (1 << LR_REGNUM);
23379 saved_regs_mask |= (1 << PC_REGNUM);
23380 arm_emit_multi_reg_pop (saved_regs_mask);
23385 emit_jump_insn (simple_return_rtx);
23390 thumb1_expand_epilogue (void)
23392 HOST_WIDE_INT amount;
23393 arm_stack_offsets *offsets;
23396 /* Naked functions don't have prologues. */
23397 if (IS_NAKED (arm_current_func_type ()))
23400 offsets = arm_get_frame_offsets ();
23401 amount = offsets->outgoing_args - offsets->saved_regs;
23403 if (frame_pointer_needed)
23405 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23406 amount = offsets->locals_base - offsets->saved_regs;
23408 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
23410 gcc_assert (amount >= 0);
23413 emit_insn (gen_blockage ());
23416 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23417 GEN_INT (amount)));
23420 /* r3 is always free in the epilogue. */
23421 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
23423 emit_insn (gen_movsi (reg, GEN_INT (amount)));
23424 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
23428 /* Emit a USE (stack_pointer_rtx), so that
23429 the stack adjustment will not be deleted. */
23430 emit_insn (gen_force_register_use (stack_pointer_rtx));
23432 if (crtl->profile || !TARGET_SCHED_PROLOG)
23433 emit_insn (gen_blockage ());
23435 /* Emit a clobber for each insn that will be restored in the epilogue,
23436 so that flow2 will get register lifetimes correct. */
23437 for (regno = 0; regno < 13; regno++)
23438 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
23439 emit_clobber (gen_rtx_REG (SImode, regno));
23441 if (! df_regs_ever_live_p (LR_REGNUM))
23442 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
23445 /* Epilogue code for APCS frame. */
23447 arm_expand_epilogue_apcs_frame (bool really_return)
23449 unsigned long func_type;
23450 unsigned long saved_regs_mask;
23453 int floats_from_frame = 0;
23454 arm_stack_offsets *offsets;
23456 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
23457 func_type = arm_current_func_type ();
23459 /* Get frame offsets for ARM. */
23460 offsets = arm_get_frame_offsets ();
23461 saved_regs_mask = offsets->saved_regs_mask;
23463 /* Find the offset of the floating-point save area in the frame. */
23464 floats_from_frame = offsets->saved_args - offsets->frame;
23466 /* Compute how many core registers saved and how far away the floats are. */
23467 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23468 if (saved_regs_mask & (1 << i))
23471 floats_from_frame += 4;
23474 if (TARGET_HARD_FLOAT && TARGET_VFP)
23478 /* The offset is from IP_REGNUM. */
23479 int saved_size = arm_get_vfp_saved_size ();
23480 if (saved_size > 0)
23482 floats_from_frame += saved_size;
23483 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
23484 hard_frame_pointer_rtx,
23485 GEN_INT (-floats_from_frame)));
23488 /* Generate VFP register multi-pop. */
23489 start_reg = FIRST_VFP_REGNUM;
23491 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
23492 /* Look for a case where a reg does not need restoring. */
23493 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23494 && (!df_regs_ever_live_p (i + 1)
23495 || call_used_regs[i + 1]))
23497 if (start_reg != i)
23498 arm_emit_vfp_multi_reg_pop (start_reg,
23499 (i - start_reg) / 2,
23500 gen_rtx_REG (SImode,
23505 /* Restore the remaining regs that we have discovered (or possibly
23506 even all of them, if the conditional in the for loop never
23508 if (start_reg != i)
23509 arm_emit_vfp_multi_reg_pop (start_reg,
23510 (i - start_reg) / 2,
23511 gen_rtx_REG (SImode, IP_REGNUM));
23516 /* The frame pointer is guaranteed to be non-double-word aligned, as
23517 it is set to double-word-aligned old_stack_pointer - 4. */
23519 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
23521 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
23522 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23524 rtx addr = gen_frame_mem (V2SImode,
23525 plus_constant (Pmode, hard_frame_pointer_rtx,
23527 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23528 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23529 gen_rtx_REG (V2SImode, i),
23535 /* saved_regs_mask should contain IP which contains old stack pointer
23536 at the time of activation creation. Since SP and IP are adjacent registers,
23537 we can restore the value directly into SP. */
23538 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
23539 saved_regs_mask &= ~(1 << IP_REGNUM);
23540 saved_regs_mask |= (1 << SP_REGNUM);
23542 /* There are two registers left in saved_regs_mask - LR and PC. We
23543 only need to restore LR (the return address), but to
23544 save time we can load it directly into PC, unless we need a
23545 special function exit sequence, or we are not really returning. */
23547 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
23548 && !crtl->calls_eh_return)
23549 /* Delete LR from the register mask, so that LR on
23550 the stack is loaded into the PC in the register mask. */
23551 saved_regs_mask &= ~(1 << LR_REGNUM);
23553 saved_regs_mask &= ~(1 << PC_REGNUM);
23555 num_regs = bit_count (saved_regs_mask);
23556 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
23558 /* Unwind the stack to just below the saved registers. */
23559 emit_insn (gen_addsi3 (stack_pointer_rtx,
23560 hard_frame_pointer_rtx,
23561 GEN_INT (- 4 * num_regs)));
23564 arm_emit_multi_reg_pop (saved_regs_mask);
23566 if (IS_INTERRUPT (func_type))
23568 /* Interrupt handlers will have pushed the
23569 IP onto the stack, so restore it now. */
23571 rtx addr = gen_rtx_MEM (SImode,
23572 gen_rtx_POST_INC (SImode,
23573 stack_pointer_rtx));
23574 set_mem_alias_set (addr, get_frame_alias_set ());
23575 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
23576 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23577 gen_rtx_REG (SImode, IP_REGNUM),
23581 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
23584 if (crtl->calls_eh_return)
23585 emit_insn (gen_addsi3 (stack_pointer_rtx,
23587 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
23589 if (IS_STACKALIGN (func_type))
23590 /* Restore the original stack pointer. Before prologue, the stack was
23591 realigned and the original stack pointer saved in r0. For details,
23592 see comment in arm_expand_prologue. */
23593 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23595 emit_jump_insn (simple_return_rtx);
23598 /* Generate RTL to represent ARM epilogue. Really_return is true if the
23599 function is not a sibcall. */
23601 arm_expand_epilogue (bool really_return)
23603 unsigned long func_type;
23604 unsigned long saved_regs_mask;
23608 arm_stack_offsets *offsets;
23610 func_type = arm_current_func_type ();
23612 /* Naked functions don't have epilogue. Hence, generate return pattern, and
23613 let output_return_instruction take care of instruction emition if any. */
23614 if (IS_NAKED (func_type)
23615 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
23618 emit_jump_insn (simple_return_rtx);
23622 /* If we are throwing an exception, then we really must be doing a
23623 return, so we can't tail-call. */
23624 gcc_assert (!crtl->calls_eh_return || really_return);
23626 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23628 arm_expand_epilogue_apcs_frame (really_return);
23632 /* Get frame offsets for ARM. */
23633 offsets = arm_get_frame_offsets ();
23634 saved_regs_mask = offsets->saved_regs_mask;
23635 num_regs = bit_count (saved_regs_mask);
23637 if (frame_pointer_needed)
23639 /* Restore stack pointer if necessary. */
23642 /* In ARM mode, frame pointer points to first saved register.
23643 Restore stack pointer to last saved register. */
23644 amount = offsets->frame - offsets->saved_regs;
23646 /* Force out any pending memory operations that reference stacked data
23647 before stack de-allocation occurs. */
23648 emit_insn (gen_blockage ());
23649 emit_insn (gen_addsi3 (stack_pointer_rtx,
23650 hard_frame_pointer_rtx,
23651 GEN_INT (amount)));
23653 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23655 emit_insn (gen_force_register_use (stack_pointer_rtx));
23659 /* In Thumb-2 mode, the frame pointer points to the last saved
23661 amount = offsets->locals_base - offsets->saved_regs;
23663 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23664 hard_frame_pointer_rtx,
23665 GEN_INT (amount)));
23667 /* Force out any pending memory operations that reference stacked data
23668 before stack de-allocation occurs. */
23669 emit_insn (gen_blockage ());
23670 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23671 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23673 emit_insn (gen_force_register_use (stack_pointer_rtx));
23678 /* Pop off outgoing args and local frame to adjust stack pointer to
23679 last saved register. */
23680 amount = offsets->outgoing_args - offsets->saved_regs;
23683 /* Force out any pending memory operations that reference stacked data
23684 before stack de-allocation occurs. */
23685 emit_insn (gen_blockage ());
23686 emit_insn (gen_addsi3 (stack_pointer_rtx,
23688 GEN_INT (amount)));
23689 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23691 emit_insn (gen_force_register_use (stack_pointer_rtx));
23695 if (TARGET_HARD_FLOAT && TARGET_VFP)
23697 /* Generate VFP register multi-pop. */
23698 int end_reg = LAST_VFP_REGNUM + 1;
23700 /* Scan the registers in reverse order. We need to match
23701 any groupings made in the prologue and generate matching
23702 vldm operations. The need to match groups is because,
23703 unlike pop, vldm can only do consecutive regs. */
23704 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
23705 /* Look for a case where a reg does not need restoring. */
23706 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23707 && (!df_regs_ever_live_p (i + 1)
23708 || call_used_regs[i + 1]))
23710 /* Restore the regs discovered so far (from reg+2 to
23712 if (end_reg > i + 2)
23713 arm_emit_vfp_multi_reg_pop (i + 2,
23714 (end_reg - (i + 2)) / 2,
23715 stack_pointer_rtx);
23719 /* Restore the remaining regs that we have discovered (or possibly
23720 even all of them, if the conditional in the for loop never
23722 if (end_reg > i + 2)
23723 arm_emit_vfp_multi_reg_pop (i + 2,
23724 (end_reg - (i + 2)) / 2,
23725 stack_pointer_rtx);
23729 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
23730 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23733 rtx addr = gen_rtx_MEM (V2SImode,
23734 gen_rtx_POST_INC (SImode,
23735 stack_pointer_rtx));
23736 set_mem_alias_set (addr, get_frame_alias_set ());
23737 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23738 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23739 gen_rtx_REG (V2SImode, i),
23743 if (saved_regs_mask)
23746 bool return_in_pc = false;
23748 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
23749 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
23750 && !IS_STACKALIGN (func_type)
23752 && crtl->args.pretend_args_size == 0
23753 && saved_regs_mask & (1 << LR_REGNUM)
23754 && !crtl->calls_eh_return)
23756 saved_regs_mask &= ~(1 << LR_REGNUM);
23757 saved_regs_mask |= (1 << PC_REGNUM);
23758 return_in_pc = true;
23761 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
23763 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23764 if (saved_regs_mask & (1 << i))
23766 rtx addr = gen_rtx_MEM (SImode,
23767 gen_rtx_POST_INC (SImode,
23768 stack_pointer_rtx));
23769 set_mem_alias_set (addr, get_frame_alias_set ());
23771 if (i == PC_REGNUM)
23773 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23774 XVECEXP (insn, 0, 0) = ret_rtx;
23775 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
23776 gen_rtx_REG (SImode, i),
23778 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
23779 insn = emit_jump_insn (insn);
23783 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
23785 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23786 gen_rtx_REG (SImode, i),
23794 && current_tune->prefer_ldrd_strd
23795 && !optimize_function_for_size_p (cfun))
23798 thumb2_emit_ldrd_pop (saved_regs_mask);
23800 arm_emit_multi_reg_pop (saved_regs_mask);
23803 arm_emit_multi_reg_pop (saved_regs_mask);
23806 if (return_in_pc == true)
23810 if (crtl->args.pretend_args_size)
23811 emit_insn (gen_addsi3 (stack_pointer_rtx,
23813 GEN_INT (crtl->args.pretend_args_size)));
23815 if (!really_return)
23818 if (crtl->calls_eh_return)
23819 emit_insn (gen_addsi3 (stack_pointer_rtx,
23821 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23823 if (IS_STACKALIGN (func_type))
23824 /* Restore the original stack pointer. Before prologue, the stack was
23825 realigned and the original stack pointer saved in r0. For details,
23826 see comment in arm_expand_prologue. */
23827 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23829 emit_jump_insn (simple_return_rtx);
23832 /* Implementation of insn prologue_thumb1_interwork. This is the first
23833 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23836 thumb1_output_interwork (void)
23839 FILE *f = asm_out_file;
23841 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23842 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23844 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23846 /* Generate code sequence to switch us into Thumb mode. */
23847 /* The .code 32 directive has already been emitted by
23848 ASM_DECLARE_FUNCTION_NAME. */
23849 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23850 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23852 /* Generate a label, so that the debugger will notice the
23853 change in instruction sets. This label is also used by
23854 the assembler to bypass the ARM code when this function
23855 is called from a Thumb encoded function elsewhere in the
23856 same file. Hence the definition of STUB_NAME here must
23857 agree with the definition in gas/config/tc-arm.c. */
23859 #define STUB_NAME ".real_start_of"
23861 fprintf (f, "\t.code\t16\n");
23863 if (arm_dllexport_name_p (name))
23864 name = arm_strip_name_encoding (name);
23866 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23867 fprintf (f, "\t.thumb_func\n");
23868 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23873 /* Handle the case of a double word load into a low register from
23874 a computed memory address. The computed address may involve a
23875 register which is overwritten by the load. */
23877 thumb_load_double_from_address (rtx *operands)
23885 gcc_assert (REG_P (operands[0]));
23886 gcc_assert (MEM_P (operands[1]));
23888 /* Get the memory address. */
23889 addr = XEXP (operands[1], 0);
23891 /* Work out how the memory address is computed. */
23892 switch (GET_CODE (addr))
23895 operands[2] = adjust_address (operands[1], SImode, 4);
23897 if (REGNO (operands[0]) == REGNO (addr))
23899 output_asm_insn ("ldr\t%H0, %2", operands);
23900 output_asm_insn ("ldr\t%0, %1", operands);
23904 output_asm_insn ("ldr\t%0, %1", operands);
23905 output_asm_insn ("ldr\t%H0, %2", operands);
23910 /* Compute <address> + 4 for the high order load. */
23911 operands[2] = adjust_address (operands[1], SImode, 4);
23913 output_asm_insn ("ldr\t%0, %1", operands);
23914 output_asm_insn ("ldr\t%H0, %2", operands);
23918 arg1 = XEXP (addr, 0);
23919 arg2 = XEXP (addr, 1);
23921 if (CONSTANT_P (arg1))
23922 base = arg2, offset = arg1;
23924 base = arg1, offset = arg2;
23926 gcc_assert (REG_P (base));
23928 /* Catch the case of <address> = <reg> + <reg> */
23929 if (REG_P (offset))
23931 int reg_offset = REGNO (offset);
23932 int reg_base = REGNO (base);
23933 int reg_dest = REGNO (operands[0]);
23935 /* Add the base and offset registers together into the
23936 higher destination register. */
23937 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23938 reg_dest + 1, reg_base, reg_offset);
23940 /* Load the lower destination register from the address in
23941 the higher destination register. */
23942 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23943 reg_dest, reg_dest + 1);
23945 /* Load the higher destination register from its own address
23947 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23948 reg_dest + 1, reg_dest + 1);
23952 /* Compute <address> + 4 for the high order load. */
23953 operands[2] = adjust_address (operands[1], SImode, 4);
23955 /* If the computed address is held in the low order register
23956 then load the high order register first, otherwise always
23957 load the low order register first. */
23958 if (REGNO (operands[0]) == REGNO (base))
23960 output_asm_insn ("ldr\t%H0, %2", operands);
23961 output_asm_insn ("ldr\t%0, %1", operands);
23965 output_asm_insn ("ldr\t%0, %1", operands);
23966 output_asm_insn ("ldr\t%H0, %2", operands);
23972 /* With no registers to worry about we can just load the value
23974 operands[2] = adjust_address (operands[1], SImode, 4);
23976 output_asm_insn ("ldr\t%H0, %2", operands);
23977 output_asm_insn ("ldr\t%0, %1", operands);
23981 gcc_unreachable ();
23988 thumb_output_move_mem_multiple (int n, rtx *operands)
23995 if (REGNO (operands[4]) > REGNO (operands[5]))
23998 operands[4] = operands[5];
24001 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24002 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24006 if (REGNO (operands[4]) > REGNO (operands[5]))
24009 operands[4] = operands[5];
24012 if (REGNO (operands[5]) > REGNO (operands[6]))
24015 operands[5] = operands[6];
24018 if (REGNO (operands[4]) > REGNO (operands[5]))
24021 operands[4] = operands[5];
24025 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24026 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24030 gcc_unreachable ();
24036 /* Output a call-via instruction for thumb state. */
24038 thumb_call_via_reg (rtx reg)
24040 int regno = REGNO (reg);
24043 gcc_assert (regno < LR_REGNUM);
24045 /* If we are in the normal text section we can use a single instance
24046 per compilation unit. If we are doing function sections, then we need
24047 an entry per section, since we can't rely on reachability. */
24048 if (in_section == text_section)
24050 thumb_call_reg_needed = 1;
24052 if (thumb_call_via_label[regno] == NULL)
24053 thumb_call_via_label[regno] = gen_label_rtx ();
24054 labelp = thumb_call_via_label + regno;
24058 if (cfun->machine->call_via[regno] == NULL)
24059 cfun->machine->call_via[regno] = gen_label_rtx ();
24060 labelp = cfun->machine->call_via + regno;
24063 output_asm_insn ("bl\t%a0", labelp);
24067 /* Routines for generating rtl. */
24069 thumb_expand_movmemqi (rtx *operands)
24071 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24072 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24073 HOST_WIDE_INT len = INTVAL (operands[2]);
24074 HOST_WIDE_INT offset = 0;
24078 emit_insn (gen_movmem12b (out, in, out, in));
24084 emit_insn (gen_movmem8b (out, in, out, in));
24090 rtx reg = gen_reg_rtx (SImode);
24091 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24092 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24099 rtx reg = gen_reg_rtx (HImode);
24100 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24101 plus_constant (Pmode, in,
24103 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24112 rtx reg = gen_reg_rtx (QImode);
24113 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24114 plus_constant (Pmode, in,
24116 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24123 thumb_reload_out_hi (rtx *operands)
24125 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24128 /* Handle reading a half-word from memory during reload. */
24130 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
24132 gcc_unreachable ();
24135 /* Return the length of a function name prefix
24136 that starts with the character 'c'. */
24138 arm_get_strip_length (int c)
24142 ARM_NAME_ENCODING_LENGTHS
24147 /* Return a pointer to a function's name with any
24148 and all prefix encodings stripped from it. */
24150 arm_strip_name_encoding (const char *name)
24154 while ((skip = arm_get_strip_length (* name)))
24160 /* If there is a '*' anywhere in the name's prefix, then
24161 emit the stripped name verbatim, otherwise prepend an
24162 underscore if leading underscores are being used. */
24164 arm_asm_output_labelref (FILE *stream, const char *name)
24169 while ((skip = arm_get_strip_length (* name)))
24171 verbatim |= (*name == '*');
24176 fputs (name, stream);
24178 asm_fprintf (stream, "%U%s", name);
24181 /* This function is used to emit an EABI tag and its associated value.
24182 We emit the numerical value of the tag in case the assembler does not
24183 support textual tags. (Eg gas prior to 2.20). If requested we include
24184 the tag name in a comment so that anyone reading the assembler output
24185 will know which tag is being set.
24187 This function is not static because arm-c.c needs it too. */
24190 arm_emit_eabi_attribute (const char *name, int num, int val)
24192 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24193 if (flag_verbose_asm || flag_debug_asm)
24194 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24195 asm_fprintf (asm_out_file, "\n");
24199 arm_file_start (void)
24203 if (TARGET_UNIFIED_ASM)
24204 asm_fprintf (asm_out_file, "\t.syntax unified\n");
24208 const char *fpu_name;
24209 if (arm_selected_arch)
24210 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24211 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24212 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24214 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
24216 if (TARGET_SOFT_FLOAT)
24218 fpu_name = "softvfp";
24222 fpu_name = arm_fpu_desc->name;
24223 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
24225 if (TARGET_HARD_FLOAT)
24226 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
24227 if (TARGET_HARD_FLOAT_ABI)
24228 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24231 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
24233 /* Some of these attributes only apply when the corresponding features
24234 are used. However we don't have any easy way of figuring this out.
24235 Conservatively record the setting that would have been used. */
24237 if (flag_rounding_math)
24238 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24240 if (!flag_unsafe_math_optimizations)
24242 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24243 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24245 if (flag_signaling_nans)
24246 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24248 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24249 flag_finite_math_only ? 1 : 3);
24251 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24252 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24253 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24254 flag_short_enums ? 1 : 2);
24256 /* Tag_ABI_optimization_goals. */
24259 else if (optimize >= 2)
24265 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
24267 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24270 if (arm_fp16_format)
24271 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24272 (int) arm_fp16_format);
24274 if (arm_lang_output_object_attributes_hook)
24275 arm_lang_output_object_attributes_hook();
24278 default_file_start ();
24282 arm_file_end (void)
24286 if (NEED_INDICATE_EXEC_STACK)
24287 /* Add .note.GNU-stack. */
24288 file_end_indicate_exec_stack ();
24290 if (! thumb_call_reg_needed)
24293 switch_to_section (text_section);
24294 asm_fprintf (asm_out_file, "\t.code 16\n");
24295 ASM_OUTPUT_ALIGN (asm_out_file, 1);
24297 for (regno = 0; regno < LR_REGNUM; regno++)
24299 rtx label = thumb_call_via_label[regno];
24303 targetm.asm_out.internal_label (asm_out_file, "L",
24304 CODE_LABEL_NUMBER (label));
24305 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
24311 /* Symbols in the text segment can be accessed without indirecting via the
24312 constant pool; it may take an extra binary operation, but this is still
24313 faster than indirecting via memory. Don't do this when not optimizing,
24314 since we won't be calculating al of the offsets necessary to do this
24318 arm_encode_section_info (tree decl, rtx rtl, int first)
24320 if (optimize > 0 && TREE_CONSTANT (decl))
24321 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
24323 default_encode_section_info (decl, rtl, first);
24325 #endif /* !ARM_PE */
24328 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
24330 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
24331 && !strcmp (prefix, "L"))
24333 arm_ccfsm_state = 0;
24334 arm_target_insn = NULL;
24336 default_internal_label (stream, prefix, labelno);
24339 /* Output code to add DELTA to the first argument, and then jump
24340 to FUNCTION. Used for C++ multiple inheritance. */
24342 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
24343 HOST_WIDE_INT delta,
24344 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
24347 static int thunk_label = 0;
24350 int mi_delta = delta;
24351 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
24353 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
24356 mi_delta = - mi_delta;
24358 final_start_function (emit_barrier (), file, 1);
24362 int labelno = thunk_label++;
24363 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
24364 /* Thunks are entered in arm mode when avaiable. */
24365 if (TARGET_THUMB1_ONLY)
24367 /* push r3 so we can use it as a temporary. */
24368 /* TODO: Omit this save if r3 is not used. */
24369 fputs ("\tpush {r3}\n", file);
24370 fputs ("\tldr\tr3, ", file);
24374 fputs ("\tldr\tr12, ", file);
24376 assemble_name (file, label);
24377 fputc ('\n', file);
24380 /* If we are generating PIC, the ldr instruction below loads
24381 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
24382 the address of the add + 8, so we have:
24384 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
24387 Note that we have "+ 1" because some versions of GNU ld
24388 don't set the low bit of the result for R_ARM_REL32
24389 relocations against thumb function symbols.
24390 On ARMv6M this is +4, not +8. */
24391 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
24392 assemble_name (file, labelpc);
24393 fputs (":\n", file);
24394 if (TARGET_THUMB1_ONLY)
24396 /* This is 2 insns after the start of the thunk, so we know it
24397 is 4-byte aligned. */
24398 fputs ("\tadd\tr3, pc, r3\n", file);
24399 fputs ("\tmov r12, r3\n", file);
24402 fputs ("\tadd\tr12, pc, r12\n", file);
24404 else if (TARGET_THUMB1_ONLY)
24405 fputs ("\tmov r12, r3\n", file);
24407 if (TARGET_THUMB1_ONLY)
24409 if (mi_delta > 255)
24411 fputs ("\tldr\tr3, ", file);
24412 assemble_name (file, label);
24413 fputs ("+4\n", file);
24414 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
24415 mi_op, this_regno, this_regno);
24417 else if (mi_delta != 0)
24419 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24420 mi_op, this_regno, this_regno,
24426 /* TODO: Use movw/movt for large constants when available. */
24427 while (mi_delta != 0)
24429 if ((mi_delta & (3 << shift)) == 0)
24433 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24434 mi_op, this_regno, this_regno,
24435 mi_delta & (0xff << shift));
24436 mi_delta &= ~(0xff << shift);
24443 if (TARGET_THUMB1_ONLY)
24444 fputs ("\tpop\t{r3}\n", file);
24446 fprintf (file, "\tbx\tr12\n");
24447 ASM_OUTPUT_ALIGN (file, 2);
24448 assemble_name (file, label);
24449 fputs (":\n", file);
24452 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
24453 rtx tem = XEXP (DECL_RTL (function), 0);
24454 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
24455 tem = gen_rtx_MINUS (GET_MODE (tem),
24457 gen_rtx_SYMBOL_REF (Pmode,
24458 ggc_strdup (labelpc)));
24459 assemble_integer (tem, 4, BITS_PER_WORD, 1);
24462 /* Output ".word .LTHUNKn". */
24463 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
24465 if (TARGET_THUMB1_ONLY && mi_delta > 255)
24466 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
24470 fputs ("\tb\t", file);
24471 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
24472 if (NEED_PLT_RELOC)
24473 fputs ("(PLT)", file);
24474 fputc ('\n', file);
24477 final_end_function ();
24481 arm_emit_vector_const (FILE *file, rtx x)
24484 const char * pattern;
24486 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24488 switch (GET_MODE (x))
24490 case V2SImode: pattern = "%08x"; break;
24491 case V4HImode: pattern = "%04x"; break;
24492 case V8QImode: pattern = "%02x"; break;
24493 default: gcc_unreachable ();
24496 fprintf (file, "0x");
24497 for (i = CONST_VECTOR_NUNITS (x); i--;)
24501 element = CONST_VECTOR_ELT (x, i);
24502 fprintf (file, pattern, INTVAL (element));
24508 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
24509 HFmode constant pool entries are actually loaded with ldr. */
24511 arm_emit_fp16_const (rtx c)
24516 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
24517 bits = real_to_target (NULL, &r, HFmode);
24518 if (WORDS_BIG_ENDIAN)
24519 assemble_zeros (2);
24520 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
24521 if (!WORDS_BIG_ENDIAN)
24522 assemble_zeros (2);
24526 arm_output_load_gr (rtx *operands)
24533 if (!MEM_P (operands [1])
24534 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
24535 || !REG_P (reg = XEXP (sum, 0))
24536 || !CONST_INT_P (offset = XEXP (sum, 1))
24537 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
24538 return "wldrw%?\t%0, %1";
24540 /* Fix up an out-of-range load of a GR register. */
24541 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
24542 wcgr = operands[0];
24544 output_asm_insn ("ldr%?\t%0, %1", operands);
24546 operands[0] = wcgr;
24548 output_asm_insn ("tmcr%?\t%0, %1", operands);
24549 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
24554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
24556 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
24557 named arg and all anonymous args onto the stack.
24558 XXX I know the prologue shouldn't be pushing registers, but it is faster
24562 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
24563 enum machine_mode mode,
24566 int second_time ATTRIBUTE_UNUSED)
24568 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
24571 cfun->machine->uses_anonymous_args = 1;
24572 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
24574 nregs = pcum->aapcs_ncrn;
24575 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
24579 nregs = pcum->nregs;
24581 if (nregs < NUM_ARG_REGS)
24582 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
24585 /* Return nonzero if the CONSUMER instruction (a store) does not need
24586 PRODUCER's value to calculate the address. */
24589 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
24591 rtx value = PATTERN (producer);
24592 rtx addr = PATTERN (consumer);
24594 if (GET_CODE (value) == COND_EXEC)
24595 value = COND_EXEC_CODE (value);
24596 if (GET_CODE (value) == PARALLEL)
24597 value = XVECEXP (value, 0, 0);
24598 value = XEXP (value, 0);
24599 if (GET_CODE (addr) == COND_EXEC)
24600 addr = COND_EXEC_CODE (addr);
24601 if (GET_CODE (addr) == PARALLEL)
24602 addr = XVECEXP (addr, 0, 0);
24603 addr = XEXP (addr, 0);
24605 return !reg_overlap_mentioned_p (value, addr);
24608 /* Return nonzero if the CONSUMER instruction (a store) does need
24609 PRODUCER's value to calculate the address. */
24612 arm_early_store_addr_dep (rtx producer, rtx consumer)
24614 return !arm_no_early_store_addr_dep (producer, consumer);
24617 /* Return nonzero if the CONSUMER instruction (a load) does need
24618 PRODUCER's value to calculate the address. */
24621 arm_early_load_addr_dep (rtx producer, rtx consumer)
24623 rtx value = PATTERN (producer);
24624 rtx addr = PATTERN (consumer);
24626 if (GET_CODE (value) == COND_EXEC)
24627 value = COND_EXEC_CODE (value);
24628 if (GET_CODE (value) == PARALLEL)
24629 value = XVECEXP (value, 0, 0);
24630 value = XEXP (value, 0);
24631 if (GET_CODE (addr) == COND_EXEC)
24632 addr = COND_EXEC_CODE (addr);
24633 if (GET_CODE (addr) == PARALLEL)
24635 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
24636 addr = XVECEXP (addr, 0, 1);
24638 addr = XVECEXP (addr, 0, 0);
24640 addr = XEXP (addr, 1);
24642 return reg_overlap_mentioned_p (value, addr);
24645 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24646 have an early register shift value or amount dependency on the
24647 result of PRODUCER. */
24650 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
24652 rtx value = PATTERN (producer);
24653 rtx op = PATTERN (consumer);
24656 if (GET_CODE (value) == COND_EXEC)
24657 value = COND_EXEC_CODE (value);
24658 if (GET_CODE (value) == PARALLEL)
24659 value = XVECEXP (value, 0, 0);
24660 value = XEXP (value, 0);
24661 if (GET_CODE (op) == COND_EXEC)
24662 op = COND_EXEC_CODE (op);
24663 if (GET_CODE (op) == PARALLEL)
24664 op = XVECEXP (op, 0, 0);
24667 early_op = XEXP (op, 0);
24668 /* This is either an actual independent shift, or a shift applied to
24669 the first operand of another operation. We want the whole shift
24671 if (REG_P (early_op))
24674 return !reg_overlap_mentioned_p (value, early_op);
24677 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24678 have an early register shift value dependency on the result of
24682 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
24684 rtx value = PATTERN (producer);
24685 rtx op = PATTERN (consumer);
24688 if (GET_CODE (value) == COND_EXEC)
24689 value = COND_EXEC_CODE (value);
24690 if (GET_CODE (value) == PARALLEL)
24691 value = XVECEXP (value, 0, 0);
24692 value = XEXP (value, 0);
24693 if (GET_CODE (op) == COND_EXEC)
24694 op = COND_EXEC_CODE (op);
24695 if (GET_CODE (op) == PARALLEL)
24696 op = XVECEXP (op, 0, 0);
24699 early_op = XEXP (op, 0);
24701 /* This is either an actual independent shift, or a shift applied to
24702 the first operand of another operation. We want the value being
24703 shifted, in either case. */
24704 if (!REG_P (early_op))
24705 early_op = XEXP (early_op, 0);
24707 return !reg_overlap_mentioned_p (value, early_op);
24710 /* Return nonzero if the CONSUMER (a mul or mac op) does not
24711 have an early register mult dependency on the result of
24715 arm_no_early_mul_dep (rtx producer, rtx consumer)
24717 rtx value = PATTERN (producer);
24718 rtx op = PATTERN (consumer);
24720 if (GET_CODE (value) == COND_EXEC)
24721 value = COND_EXEC_CODE (value);
24722 if (GET_CODE (value) == PARALLEL)
24723 value = XVECEXP (value, 0, 0);
24724 value = XEXP (value, 0);
24725 if (GET_CODE (op) == COND_EXEC)
24726 op = COND_EXEC_CODE (op);
24727 if (GET_CODE (op) == PARALLEL)
24728 op = XVECEXP (op, 0, 0);
24731 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
24733 if (GET_CODE (XEXP (op, 0)) == MULT)
24734 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
24736 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
24742 /* We can't rely on the caller doing the proper promotion when
24743 using APCS or ATPCS. */
24746 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
24748 return !TARGET_AAPCS_BASED;
24751 static enum machine_mode
24752 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
24753 enum machine_mode mode,
24754 int *punsignedp ATTRIBUTE_UNUSED,
24755 const_tree fntype ATTRIBUTE_UNUSED,
24756 int for_return ATTRIBUTE_UNUSED)
24758 if (GET_MODE_CLASS (mode) == MODE_INT
24759 && GET_MODE_SIZE (mode) < 4)
24765 /* AAPCS based ABIs use short enums by default. */
24768 arm_default_short_enums (void)
24770 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
24774 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24777 arm_align_anon_bitfield (void)
24779 return TARGET_AAPCS_BASED;
24783 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24786 arm_cxx_guard_type (void)
24788 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
24791 /* Return non-zero iff the consumer (a multiply-accumulate or a
24792 multiple-subtract instruction) has an accumulator dependency on the
24793 result of the producer and no other dependency on that result. It
24794 does not check if the producer is multiply-accumulate instruction. */
24796 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
24801 producer = PATTERN (producer);
24802 consumer = PATTERN (consumer);
24804 if (GET_CODE (producer) == COND_EXEC)
24805 producer = COND_EXEC_CODE (producer);
24806 if (GET_CODE (consumer) == COND_EXEC)
24807 consumer = COND_EXEC_CODE (consumer);
24809 if (GET_CODE (producer) != SET)
24812 result = XEXP (producer, 0);
24814 if (GET_CODE (consumer) != SET)
24817 /* Check that the consumer is of the form
24818 (set (...) (plus (mult ...) (...)))
24820 (set (...) (minus (...) (mult ...))). */
24821 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
24823 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
24826 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
24827 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
24828 acc = XEXP (XEXP (consumer, 1), 1);
24830 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
24832 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
24835 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
24836 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
24837 acc = XEXP (XEXP (consumer, 1), 0);
24842 return (reg_overlap_mentioned_p (result, acc)
24843 && !reg_overlap_mentioned_p (result, op0)
24844 && !reg_overlap_mentioned_p (result, op1));
24847 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24848 has an accumulator dependency on the result of the producer (a
24849 multiplication instruction) and no other dependency on that result. */
24851 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
24853 rtx mul = PATTERN (producer);
24854 rtx mac = PATTERN (consumer);
24856 rtx mac_op0, mac_op1, mac_acc;
24858 if (GET_CODE (mul) == COND_EXEC)
24859 mul = COND_EXEC_CODE (mul);
24860 if (GET_CODE (mac) == COND_EXEC)
24861 mac = COND_EXEC_CODE (mac);
24863 /* Check that mul is of the form (set (...) (mult ...))
24864 and mla is of the form (set (...) (plus (mult ...) (...))). */
24865 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24866 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24867 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24870 mul_result = XEXP (mul, 0);
24871 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24872 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24873 mac_acc = XEXP (XEXP (mac, 1), 1);
24875 return (reg_overlap_mentioned_p (mul_result, mac_acc)
24876 && !reg_overlap_mentioned_p (mul_result, mac_op0)
24877 && !reg_overlap_mentioned_p (mul_result, mac_op1));
24881 /* The EABI says test the least significant bit of a guard variable. */
24884 arm_cxx_guard_mask_bit (void)
24886 return TARGET_AAPCS_BASED;
24890 /* The EABI specifies that all array cookies are 8 bytes long. */
24893 arm_get_cookie_size (tree type)
24897 if (!TARGET_AAPCS_BASED)
24898 return default_cxx_get_cookie_size (type);
24900 size = build_int_cst (sizetype, 8);
24905 /* The EABI says that array cookies should also contain the element size. */
24908 arm_cookie_has_size (void)
24910 return TARGET_AAPCS_BASED;
24914 /* The EABI says constructors and destructors should return a pointer to
24915 the object constructed/destroyed. */
24918 arm_cxx_cdtor_returns_this (void)
24920 return TARGET_AAPCS_BASED;
24923 /* The EABI says that an inline function may never be the key
24927 arm_cxx_key_method_may_be_inline (void)
24929 return !TARGET_AAPCS_BASED;
24933 arm_cxx_determine_class_data_visibility (tree decl)
24935 if (!TARGET_AAPCS_BASED
24936 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24939 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24940 is exported. However, on systems without dynamic vague linkage,
24941 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24942 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24943 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24945 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24946 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24950 arm_cxx_class_data_always_comdat (void)
24952 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24953 vague linkage if the class has no key function. */
24954 return !TARGET_AAPCS_BASED;
24958 /* The EABI says __aeabi_atexit should be used to register static
24962 arm_cxx_use_aeabi_atexit (void)
24964 return TARGET_AAPCS_BASED;
24969 arm_set_return_address (rtx source, rtx scratch)
24971 arm_stack_offsets *offsets;
24972 HOST_WIDE_INT delta;
24974 unsigned long saved_regs;
24976 offsets = arm_get_frame_offsets ();
24977 saved_regs = offsets->saved_regs_mask;
24979 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24980 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24983 if (frame_pointer_needed)
24984 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24987 /* LR will be the first saved register. */
24988 delta = offsets->outgoing_args - (offsets->frame + 4);
24993 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24994 GEN_INT (delta & ~4095)));
24999 addr = stack_pointer_rtx;
25001 addr = plus_constant (Pmode, addr, delta);
25003 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25009 thumb_set_return_address (rtx source, rtx scratch)
25011 arm_stack_offsets *offsets;
25012 HOST_WIDE_INT delta;
25013 HOST_WIDE_INT limit;
25016 unsigned long mask;
25020 offsets = arm_get_frame_offsets ();
25021 mask = offsets->saved_regs_mask;
25022 if (mask & (1 << LR_REGNUM))
25025 /* Find the saved regs. */
25026 if (frame_pointer_needed)
25028 delta = offsets->soft_frame - offsets->saved_args;
25029 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25035 delta = offsets->outgoing_args - offsets->saved_args;
25038 /* Allow for the stack frame. */
25039 if (TARGET_THUMB1 && TARGET_BACKTRACE)
25041 /* The link register is always the first saved register. */
25044 /* Construct the address. */
25045 addr = gen_rtx_REG (SImode, reg);
25048 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25049 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25053 addr = plus_constant (Pmode, addr, delta);
25055 emit_move_insn (gen_frame_mem (Pmode, addr), source);
25058 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25061 /* Implements target hook vector_mode_supported_p. */
25063 arm_vector_mode_supported_p (enum machine_mode mode)
25065 /* Neon also supports V2SImode, etc. listed in the clause below. */
25066 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25067 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
25070 if ((TARGET_NEON || TARGET_IWMMXT)
25071 && ((mode == V2SImode)
25072 || (mode == V4HImode)
25073 || (mode == V8QImode)))
25076 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25077 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25078 || mode == V2HAmode))
25084 /* Implements target hook array_mode_supported_p. */
25087 arm_array_mode_supported_p (enum machine_mode mode,
25088 unsigned HOST_WIDE_INT nelems)
25091 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25092 && (nelems >= 2 && nelems <= 4))
25098 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25099 registers when autovectorizing for Neon, at least until multiple vector
25100 widths are supported properly by the middle-end. */
25102 static enum machine_mode
25103 arm_preferred_simd_mode (enum machine_mode mode)
25109 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25111 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25113 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25115 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25117 if (!TARGET_NEON_VECTORIZE_DOUBLE)
25124 if (TARGET_REALLY_IWMMXT)
25140 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25142 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25143 using r0-r4 for function arguments, r7 for the stack frame and don't have
25144 enough left over to do doubleword arithmetic. For Thumb-2 all the
25145 potentially problematic instructions accept high registers so this is not
25146 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25147 that require many low registers. */
25149 arm_class_likely_spilled_p (reg_class_t rclass)
25151 if ((TARGET_THUMB1 && rclass == LO_REGS)
25152 || rclass == CC_REG)
25158 /* Implements target hook small_register_classes_for_mode_p. */
25160 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
25162 return TARGET_THUMB1;
25165 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25166 ARM insns and therefore guarantee that the shift count is modulo 256.
25167 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25168 guarantee no particular behavior for out-of-range counts. */
25170 static unsigned HOST_WIDE_INT
25171 arm_shift_truncation_mask (enum machine_mode mode)
25173 return mode == SImode ? 255 : 0;
25177 /* Map internal gcc register numbers to DWARF2 register numbers. */
25180 arm_dbx_register_number (unsigned int regno)
25185 if (IS_VFP_REGNUM (regno))
25187 /* See comment in arm_dwarf_register_span. */
25188 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25189 return 64 + regno - FIRST_VFP_REGNUM;
25191 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25194 if (IS_IWMMXT_GR_REGNUM (regno))
25195 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25197 if (IS_IWMMXT_REGNUM (regno))
25198 return 112 + regno - FIRST_IWMMXT_REGNUM;
25200 gcc_unreachable ();
25203 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25204 GCC models tham as 64 32-bit registers, so we need to describe this to
25205 the DWARF generation code. Other registers can use the default. */
25207 arm_dwarf_register_span (rtx rtl)
25214 regno = REGNO (rtl);
25215 if (!IS_VFP_REGNUM (regno))
25218 /* XXX FIXME: The EABI defines two VFP register ranges:
25219 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25221 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25222 corresponding D register. Until GDB supports this, we shall use the
25223 legacy encodings. We also use these encodings for D0-D15 for
25224 compatibility with older debuggers. */
25225 if (VFP_REGNO_OK_FOR_SINGLE (regno))
25228 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
25229 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
25230 regno = (regno - FIRST_VFP_REGNUM) / 2;
25231 for (i = 0; i < nregs; i++)
25232 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
25237 #if ARM_UNWIND_INFO
25238 /* Emit unwind directives for a store-multiple instruction or stack pointer
25239 push during alignment.
25240 These should only ever be generated by the function prologue code, so
25241 expect them to have a particular form. */
25244 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
25247 HOST_WIDE_INT offset;
25248 HOST_WIDE_INT nregs;
25254 e = XVECEXP (p, 0, 0);
25255 if (GET_CODE (e) != SET)
25258 /* First insn will adjust the stack pointer. */
25259 if (GET_CODE (e) != SET
25260 || !REG_P (XEXP (e, 0))
25261 || REGNO (XEXP (e, 0)) != SP_REGNUM
25262 || GET_CODE (XEXP (e, 1)) != PLUS)
25265 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
25266 nregs = XVECLEN (p, 0) - 1;
25268 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
25271 /* The function prologue may also push pc, but not annotate it as it is
25272 never restored. We turn this into a stack pointer adjustment. */
25273 if (nregs * 4 == offset - 4)
25275 fprintf (asm_out_file, "\t.pad #4\n");
25279 fprintf (asm_out_file, "\t.save {");
25281 else if (IS_VFP_REGNUM (reg))
25284 fprintf (asm_out_file, "\t.vsave {");
25287 /* Unknown register type. */
25290 /* If the stack increment doesn't match the size of the saved registers,
25291 something has gone horribly wrong. */
25292 if (offset != nregs * reg_size)
25297 /* The remaining insns will describe the stores. */
25298 for (i = 1; i <= nregs; i++)
25300 /* Expect (set (mem <addr>) (reg)).
25301 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25302 e = XVECEXP (p, 0, i);
25303 if (GET_CODE (e) != SET
25304 || !MEM_P (XEXP (e, 0))
25305 || !REG_P (XEXP (e, 1)))
25308 reg = REGNO (XEXP (e, 1));
25313 fprintf (asm_out_file, ", ");
25314 /* We can't use %r for vfp because we need to use the
25315 double precision register names. */
25316 if (IS_VFP_REGNUM (reg))
25317 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
25319 asm_fprintf (asm_out_file, "%r", reg);
25321 #ifdef ENABLE_CHECKING
25322 /* Check that the addresses are consecutive. */
25323 e = XEXP (XEXP (e, 0), 0);
25324 if (GET_CODE (e) == PLUS)
25326 offset += reg_size;
25327 if (!REG_P (XEXP (e, 0))
25328 || REGNO (XEXP (e, 0)) != SP_REGNUM
25329 || !CONST_INT_P (XEXP (e, 1))
25330 || offset != INTVAL (XEXP (e, 1)))
25335 || REGNO (e) != SP_REGNUM)
25339 fprintf (asm_out_file, "}\n");
25342 /* Emit unwind directives for a SET. */
25345 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
25353 switch (GET_CODE (e0))
25356 /* Pushing a single register. */
25357 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
25358 || !REG_P (XEXP (XEXP (e0, 0), 0))
25359 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
25362 asm_fprintf (asm_out_file, "\t.save ");
25363 if (IS_VFP_REGNUM (REGNO (e1)))
25364 asm_fprintf(asm_out_file, "{d%d}\n",
25365 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
25367 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
25371 if (REGNO (e0) == SP_REGNUM)
25373 /* A stack increment. */
25374 if (GET_CODE (e1) != PLUS
25375 || !REG_P (XEXP (e1, 0))
25376 || REGNO (XEXP (e1, 0)) != SP_REGNUM
25377 || !CONST_INT_P (XEXP (e1, 1)))
25380 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
25381 -INTVAL (XEXP (e1, 1)));
25383 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
25385 HOST_WIDE_INT offset;
25387 if (GET_CODE (e1) == PLUS)
25389 if (!REG_P (XEXP (e1, 0))
25390 || !CONST_INT_P (XEXP (e1, 1)))
25392 reg = REGNO (XEXP (e1, 0));
25393 offset = INTVAL (XEXP (e1, 1));
25394 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
25395 HARD_FRAME_POINTER_REGNUM, reg,
25398 else if (REG_P (e1))
25401 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
25402 HARD_FRAME_POINTER_REGNUM, reg);
25407 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
25409 /* Move from sp to reg. */
25410 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
25412 else if (GET_CODE (e1) == PLUS
25413 && REG_P (XEXP (e1, 0))
25414 && REGNO (XEXP (e1, 0)) == SP_REGNUM
25415 && CONST_INT_P (XEXP (e1, 1)))
25417 /* Set reg to offset from sp. */
25418 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
25419 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
25431 /* Emit unwind directives for the given insn. */
25434 arm_unwind_emit (FILE * asm_out_file, rtx insn)
25437 bool handled_one = false;
25439 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25442 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25443 && (TREE_NOTHROW (current_function_decl)
25444 || crtl->all_throwers_are_sibcalls))
25447 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
25450 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
25452 pat = XEXP (note, 0);
25453 switch (REG_NOTE_KIND (note))
25455 case REG_FRAME_RELATED_EXPR:
25458 case REG_CFA_REGISTER:
25461 pat = PATTERN (insn);
25462 if (GET_CODE (pat) == PARALLEL)
25463 pat = XVECEXP (pat, 0, 0);
25466 /* Only emitted for IS_STACKALIGN re-alignment. */
25471 src = SET_SRC (pat);
25472 dest = SET_DEST (pat);
25474 gcc_assert (src == stack_pointer_rtx);
25475 reg = REGNO (dest);
25476 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
25479 handled_one = true;
25482 case REG_CFA_DEF_CFA:
25483 case REG_CFA_EXPRESSION:
25484 case REG_CFA_ADJUST_CFA:
25485 case REG_CFA_OFFSET:
25486 /* ??? Only handling here what we actually emit. */
25487 gcc_unreachable ();
25495 pat = PATTERN (insn);
25498 switch (GET_CODE (pat))
25501 arm_unwind_emit_set (asm_out_file, pat);
25505 /* Store multiple. */
25506 arm_unwind_emit_sequence (asm_out_file, pat);
25515 /* Output a reference from a function exception table to the type_info
25516 object X. The EABI specifies that the symbol should be relocated by
25517 an R_ARM_TARGET2 relocation. */
25520 arm_output_ttype (rtx x)
25522 fputs ("\t.word\t", asm_out_file);
25523 output_addr_const (asm_out_file, x);
25524 /* Use special relocations for symbol references. */
25525 if (!CONST_INT_P (x))
25526 fputs ("(TARGET2)", asm_out_file);
25527 fputc ('\n', asm_out_file);
25532 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
25535 arm_asm_emit_except_personality (rtx personality)
25537 fputs ("\t.personality\t", asm_out_file);
25538 output_addr_const (asm_out_file, personality);
25539 fputc ('\n', asm_out_file);
25542 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
25545 arm_asm_init_sections (void)
25547 exception_section = get_unnamed_section (0, output_section_asm_op,
25550 #endif /* ARM_UNWIND_INFO */
25552 /* Output unwind directives for the start/end of a function. */
25555 arm_output_fn_unwind (FILE * f, bool prologue)
25557 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25561 fputs ("\t.fnstart\n", f);
25564 /* If this function will never be unwound, then mark it as such.
25565 The came condition is used in arm_unwind_emit to suppress
25566 the frame annotations. */
25567 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25568 && (TREE_NOTHROW (current_function_decl)
25569 || crtl->all_throwers_are_sibcalls))
25570 fputs("\t.cantunwind\n", f);
25572 fputs ("\t.fnend\n", f);
25577 arm_emit_tls_decoration (FILE *fp, rtx x)
25579 enum tls_reloc reloc;
25582 val = XVECEXP (x, 0, 0);
25583 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
25585 output_addr_const (fp, val);
25590 fputs ("(tlsgd)", fp);
25593 fputs ("(tlsldm)", fp);
25596 fputs ("(tlsldo)", fp);
25599 fputs ("(gottpoff)", fp);
25602 fputs ("(tpoff)", fp);
25605 fputs ("(tlsdesc)", fp);
25608 gcc_unreachable ();
25617 fputs (" + (. - ", fp);
25618 output_addr_const (fp, XVECEXP (x, 0, 2));
25619 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
25620 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
25621 output_addr_const (fp, XVECEXP (x, 0, 3));
25631 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
25634 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
25636 gcc_assert (size == 4);
25637 fputs ("\t.word\t", file);
25638 output_addr_const (file, x);
25639 fputs ("(tlsldo)", file);
25642 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
25645 arm_output_addr_const_extra (FILE *fp, rtx x)
25647 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
25648 return arm_emit_tls_decoration (fp, x);
25649 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
25652 int labelno = INTVAL (XVECEXP (x, 0, 0));
25654 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
25655 assemble_name_raw (fp, label);
25659 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
25661 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
25665 output_addr_const (fp, XVECEXP (x, 0, 0));
25669 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
25671 output_addr_const (fp, XVECEXP (x, 0, 0));
25675 output_addr_const (fp, XVECEXP (x, 0, 1));
25679 else if (GET_CODE (x) == CONST_VECTOR)
25680 return arm_emit_vector_const (fp, x);
25685 /* Output assembly for a shift instruction.
25686 SET_FLAGS determines how the instruction modifies the condition codes.
25687 0 - Do not set condition codes.
25688 1 - Set condition codes.
25689 2 - Use smallest instruction. */
25691 arm_output_shift(rtx * operands, int set_flags)
25694 static const char flag_chars[3] = {'?', '.', '!'};
25699 c = flag_chars[set_flags];
25700 if (TARGET_UNIFIED_ASM)
25702 shift = shift_op(operands[3], &val);
25706 operands[2] = GEN_INT(val);
25707 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
25710 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
25713 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
25714 output_asm_insn (pattern, operands);
25718 /* Output assembly for a WMMX immediate shift instruction. */
25720 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
25722 int shift = INTVAL (operands[2]);
25724 enum machine_mode opmode = GET_MODE (operands[0]);
25726 gcc_assert (shift >= 0);
25728 /* If the shift value in the register versions is > 63 (for D qualifier),
25729 31 (for W qualifier) or 15 (for H qualifier). */
25730 if (((opmode == V4HImode) && (shift > 15))
25731 || ((opmode == V2SImode) && (shift > 31))
25732 || ((opmode == DImode) && (shift > 63)))
25736 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25737 output_asm_insn (templ, operands);
25738 if (opmode == DImode)
25740 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
25741 output_asm_insn (templ, operands);
25746 /* The destination register will contain all zeros. */
25747 sprintf (templ, "wzero\t%%0");
25748 output_asm_insn (templ, operands);
25753 if ((opmode == DImode) && (shift > 32))
25755 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25756 output_asm_insn (templ, operands);
25757 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
25758 output_asm_insn (templ, operands);
25762 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
25763 output_asm_insn (templ, operands);
25768 /* Output assembly for a WMMX tinsr instruction. */
25770 arm_output_iwmmxt_tinsr (rtx *operands)
25772 int mask = INTVAL (operands[3]);
25775 int units = mode_nunits[GET_MODE (operands[0])];
25776 gcc_assert ((mask & (mask - 1)) == 0);
25777 for (i = 0; i < units; ++i)
25779 if ((mask & 0x01) == 1)
25785 gcc_assert (i < units);
25787 switch (GET_MODE (operands[0]))
25790 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
25793 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
25796 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
25799 gcc_unreachable ();
25802 output_asm_insn (templ, operands);
25807 /* Output a Thumb-1 casesi dispatch sequence. */
25809 thumb1_output_casesi (rtx *operands)
25811 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
25813 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25815 switch (GET_MODE(diff_vec))
25818 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25819 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25821 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25822 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25824 return "bl\t%___gnu_thumb1_case_si";
25826 gcc_unreachable ();
25830 /* Output a Thumb-2 casesi instruction. */
25832 thumb2_output_casesi (rtx *operands)
25834 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
25836 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25838 output_asm_insn ("cmp\t%0, %1", operands);
25839 output_asm_insn ("bhi\t%l3", operands);
25840 switch (GET_MODE(diff_vec))
25843 return "tbb\t[%|pc, %0]";
25845 return "tbh\t[%|pc, %0, lsl #1]";
25849 output_asm_insn ("adr\t%4, %l2", operands);
25850 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
25851 output_asm_insn ("add\t%4, %4, %5", operands);
25856 output_asm_insn ("adr\t%4, %l2", operands);
25857 return "ldr\t%|pc, [%4, %0, lsl #2]";
25860 gcc_unreachable ();
25864 /* Most ARM cores are single issue, but some newer ones can dual issue.
25865 The scheduler descriptions rely on this being correct. */
25867 arm_issue_rate (void)
25891 /* A table and a function to perform ARM-specific name mangling for
25892 NEON vector types in order to conform to the AAPCS (see "Procedure
25893 Call Standard for the ARM Architecture", Appendix A). To qualify
25894 for emission with the mangled names defined in that document, a
25895 vector type must not only be of the correct mode but also be
25896 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25899 enum machine_mode mode;
25900 const char *element_type_name;
25901 const char *aapcs_name;
25902 } arm_mangle_map_entry;
25904 static arm_mangle_map_entry arm_mangle_map[] = {
25905 /* 64-bit containerized types. */
25906 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25907 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25908 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25909 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25910 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25911 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25912 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25913 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25914 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25915 /* 128-bit containerized types. */
25916 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25917 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25918 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25919 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25920 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25921 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25922 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25923 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25924 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25925 { VOIDmode, NULL, NULL }
25929 arm_mangle_type (const_tree type)
25931 arm_mangle_map_entry *pos = arm_mangle_map;
25933 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25934 has to be managled as if it is in the "std" namespace. */
25935 if (TARGET_AAPCS_BASED
25936 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25937 return "St9__va_list";
25939 /* Half-precision float. */
25940 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25943 if (TREE_CODE (type) != VECTOR_TYPE)
25946 /* Check the mode of the vector type, and the name of the vector
25947 element type, against the table. */
25948 while (pos->mode != VOIDmode)
25950 tree elt_type = TREE_TYPE (type);
25952 if (pos->mode == TYPE_MODE (type)
25953 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25954 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25955 pos->element_type_name))
25956 return pos->aapcs_name;
25961 /* Use the default mangling for unrecognized (possibly user-defined)
25966 /* Order of allocation of core registers for Thumb: this allocation is
25967 written over the corresponding initial entries of the array
25968 initialized with REG_ALLOC_ORDER. We allocate all low registers
25969 first. Saving and restoring a low register is usually cheaper than
25970 using a call-clobbered high register. */
25972 static const int thumb_core_reg_alloc_order[] =
25974 3, 2, 1, 0, 4, 5, 6, 7,
25975 14, 12, 8, 9, 10, 11
25978 /* Adjust register allocation order when compiling for Thumb. */
25981 arm_order_regs_for_local_alloc (void)
25983 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25984 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25986 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25987 sizeof (thumb_core_reg_alloc_order));
25990 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25993 arm_frame_pointer_required (void)
25995 return (cfun->has_nonlocal_label
25996 || SUBTARGET_FRAME_POINTER_REQUIRED
25997 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
26000 /* Only thumb1 can't support conditional execution, so return true if
26001 the target is not thumb1. */
26003 arm_have_conditional_execution (void)
26005 return !TARGET_THUMB1;
26008 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26009 static HOST_WIDE_INT
26010 arm_vector_alignment (const_tree type)
26012 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
26014 if (TARGET_AAPCS_BASED)
26015 align = MIN (align, 64);
26020 static unsigned int
26021 arm_autovectorize_vector_sizes (void)
26023 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26027 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26029 /* Vectors which aren't in packed structures will not be less aligned than
26030 the natural alignment of their element type, so this is safe. */
26031 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26034 return default_builtin_vector_alignment_reachable (type, is_packed);
26038 arm_builtin_support_vector_misalignment (enum machine_mode mode,
26039 const_tree type, int misalignment,
26042 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26044 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26049 /* If the misalignment is unknown, we should be able to handle the access
26050 so long as it is not to a member of a packed data structure. */
26051 if (misalignment == -1)
26054 /* Return true if the misalignment is a multiple of the natural alignment
26055 of the vector's element type. This is probably always going to be
26056 true in practice, since we've already established that this isn't a
26058 return ((misalignment % align) == 0);
26061 return default_builtin_support_vector_misalignment (mode, type, misalignment,
26066 arm_conditional_register_usage (void)
26070 if (TARGET_THUMB1 && optimize_size)
26072 /* When optimizing for size on Thumb-1, it's better not
26073 to use the HI regs, because of the overhead of
26075 for (regno = FIRST_HI_REGNUM;
26076 regno <= LAST_HI_REGNUM; ++regno)
26077 fixed_regs[regno] = call_used_regs[regno] = 1;
26080 /* The link register can be clobbered by any branch insn,
26081 but we have no way to track that at present, so mark
26082 it as unavailable. */
26084 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26086 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
26088 /* VFPv3 registers are disabled when earlier VFP
26089 versions are selected due to the definition of
26090 LAST_VFP_REGNUM. */
26091 for (regno = FIRST_VFP_REGNUM;
26092 regno <= LAST_VFP_REGNUM; ++ regno)
26094 fixed_regs[regno] = 0;
26095 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26096 || regno >= FIRST_VFP_REGNUM + 32;
26100 if (TARGET_REALLY_IWMMXT)
26102 regno = FIRST_IWMMXT_GR_REGNUM;
26103 /* The 2002/10/09 revision of the XScale ABI has wCG0
26104 and wCG1 as call-preserved registers. The 2002/11/21
26105 revision changed this so that all wCG registers are
26106 scratch registers. */
26107 for (regno = FIRST_IWMMXT_GR_REGNUM;
26108 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26109 fixed_regs[regno] = 0;
26110 /* The XScale ABI has wR0 - wR9 as scratch registers,
26111 the rest as call-preserved registers. */
26112 for (regno = FIRST_IWMMXT_REGNUM;
26113 regno <= LAST_IWMMXT_REGNUM; ++ regno)
26115 fixed_regs[regno] = 0;
26116 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26120 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26122 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26123 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26125 else if (TARGET_APCS_STACK)
26127 fixed_regs[10] = 1;
26128 call_used_regs[10] = 1;
26130 /* -mcaller-super-interworking reserves r11 for calls to
26131 _interwork_r11_call_via_rN(). Making the register global
26132 is an easy way of ensuring that it remains valid for all
26134 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26135 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26137 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26138 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26139 if (TARGET_CALLER_INTERWORKING)
26140 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26142 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26146 arm_preferred_rename_class (reg_class_t rclass)
26148 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26149 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26150 and code size can be reduced. */
26151 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26157 /* Compute the atrribute "length" of insn "*push_multi".
26158 So this function MUST be kept in sync with that insn pattern. */
26160 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26162 int i, regno, hi_reg;
26163 int num_saves = XVECLEN (parallel_op, 0);
26173 regno = REGNO (first_op);
26174 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26175 for (i = 1; i < num_saves && !hi_reg; i++)
26177 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26178 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26186 /* Compute the number of instructions emitted by output_move_double. */
26188 arm_count_output_move_double_insns (rtx *operands)
26192 /* output_move_double may modify the operands array, so call it
26193 here on a copy of the array. */
26194 ops[0] = operands[0];
26195 ops[1] = operands[1];
26196 output_move_double (ops, false, &count);
26201 vfp3_const_double_for_fract_bits (rtx operand)
26203 REAL_VALUE_TYPE r0;
26205 if (!CONST_DOUBLE_P (operand))
26208 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
26209 if (exact_real_inverse (DFmode, &r0))
26211 if (exact_real_truncate (DFmode, &r0))
26213 HOST_WIDE_INT value = real_to_integer (&r0);
26214 value = value & 0xffffffff;
26215 if ((value != 0) && ( (value & (value - 1)) == 0))
26216 return int_log2 (value);
26222 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26225 arm_pre_atomic_barrier (enum memmodel model)
26227 if (need_atomic_barrier_p (model, true))
26228 emit_insn (gen_memory_barrier ());
26232 arm_post_atomic_barrier (enum memmodel model)
26234 if (need_atomic_barrier_p (model, false))
26235 emit_insn (gen_memory_barrier ());
26238 /* Emit the load-exclusive and store-exclusive instructions. */
26241 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
26243 rtx (*gen) (rtx, rtx);
26247 case QImode: gen = gen_arm_load_exclusiveqi; break;
26248 case HImode: gen = gen_arm_load_exclusivehi; break;
26249 case SImode: gen = gen_arm_load_exclusivesi; break;
26250 case DImode: gen = gen_arm_load_exclusivedi; break;
26252 gcc_unreachable ();
26255 emit_insn (gen (rval, mem));
26259 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
26261 rtx (*gen) (rtx, rtx, rtx);
26265 case QImode: gen = gen_arm_store_exclusiveqi; break;
26266 case HImode: gen = gen_arm_store_exclusivehi; break;
26267 case SImode: gen = gen_arm_store_exclusivesi; break;
26268 case DImode: gen = gen_arm_store_exclusivedi; break;
26270 gcc_unreachable ();
26273 emit_insn (gen (bval, rval, mem));
26276 /* Mark the previous jump instruction as unlikely. */
26279 emit_unlikely_jump (rtx insn)
26281 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
26283 insn = emit_jump_insn (insn);
26284 add_reg_note (insn, REG_BR_PROB, very_unlikely);
26287 /* Expand a compare and swap pattern. */
26290 arm_expand_compare_and_swap (rtx operands[])
26292 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
26293 enum machine_mode mode;
26294 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
26296 bval = operands[0];
26297 rval = operands[1];
26299 oldval = operands[3];
26300 newval = operands[4];
26301 is_weak = operands[5];
26302 mod_s = operands[6];
26303 mod_f = operands[7];
26304 mode = GET_MODE (mem);
26310 /* For narrow modes, we're going to perform the comparison in SImode,
26311 so do the zero-extension now. */
26312 rval = gen_reg_rtx (SImode);
26313 oldval = convert_modes (SImode, mode, oldval, true);
26317 /* Force the value into a register if needed. We waited until after
26318 the zero-extension above to do this properly. */
26319 if (!arm_add_operand (oldval, SImode))
26320 oldval = force_reg (SImode, oldval);
26324 if (!cmpdi_operand (oldval, mode))
26325 oldval = force_reg (mode, oldval);
26329 gcc_unreachable ();
26334 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
26335 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
26336 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
26337 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
26339 gcc_unreachable ();
26342 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
26344 if (mode == QImode || mode == HImode)
26345 emit_move_insn (operands[1], gen_lowpart (mode, rval));
26347 /* In all cases, we arrange for success to be signaled by Z set.
26348 This arrangement allows for the boolean result to be used directly
26349 in a subsequent branch, post optimization. */
26350 x = gen_rtx_REG (CCmode, CC_REGNUM);
26351 x = gen_rtx_EQ (SImode, x, const0_rtx);
26352 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
26355 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
26356 another memory store between the load-exclusive and store-exclusive can
26357 reset the monitor from Exclusive to Open state. This means we must wait
26358 until after reload to split the pattern, lest we get a register spill in
26359 the middle of the atomic sequence. */
26362 arm_split_compare_and_swap (rtx operands[])
26364 rtx rval, mem, oldval, newval, scratch;
26365 enum machine_mode mode;
26366 enum memmodel mod_s, mod_f;
26368 rtx label1, label2, x, cond;
26370 rval = operands[0];
26372 oldval = operands[2];
26373 newval = operands[3];
26374 is_weak = (operands[4] != const0_rtx);
26375 mod_s = (enum memmodel) INTVAL (operands[5]);
26376 mod_f = (enum memmodel) INTVAL (operands[6]);
26377 scratch = operands[7];
26378 mode = GET_MODE (mem);
26380 arm_pre_atomic_barrier (mod_s);
26385 label1 = gen_label_rtx ();
26386 emit_label (label1);
26388 label2 = gen_label_rtx ();
26390 arm_emit_load_exclusive (mode, rval, mem);
26392 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
26393 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26394 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26395 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
26396 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26398 arm_emit_store_exclusive (mode, scratch, mem, newval);
26400 /* Weak or strong, we want EQ to be true for success, so that we
26401 match the flags that we got from the compare above. */
26402 cond = gen_rtx_REG (CCmode, CC_REGNUM);
26403 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
26404 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
26408 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26409 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26410 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
26411 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26414 if (mod_f != MEMMODEL_RELAXED)
26415 emit_label (label2);
26417 arm_post_atomic_barrier (mod_s);
26419 if (mod_f == MEMMODEL_RELAXED)
26420 emit_label (label2);
26424 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
26425 rtx value, rtx model_rtx, rtx cond)
26427 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
26428 enum machine_mode mode = GET_MODE (mem);
26429 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
26432 arm_pre_atomic_barrier (model);
26434 label = gen_label_rtx ();
26435 emit_label (label);
26438 new_out = gen_lowpart (wmode, new_out);
26440 old_out = gen_lowpart (wmode, old_out);
26443 value = simplify_gen_subreg (wmode, value, mode, 0);
26445 arm_emit_load_exclusive (mode, old_out, mem);
26454 x = gen_rtx_AND (wmode, old_out, value);
26455 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26456 x = gen_rtx_NOT (wmode, new_out);
26457 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26461 if (CONST_INT_P (value))
26463 value = GEN_INT (-INTVAL (value));
26469 if (mode == DImode)
26471 /* DImode plus/minus need to clobber flags. */
26472 /* The adddi3 and subdi3 patterns are incorrectly written so that
26473 they require matching operands, even when we could easily support
26474 three operands. Thankfully, this can be fixed up post-splitting,
26475 as the individual add+adc patterns do accept three operands and
26476 post-reload cprop can make these moves go away. */
26477 emit_move_insn (new_out, old_out);
26479 x = gen_adddi3 (new_out, new_out, value);
26481 x = gen_subdi3 (new_out, new_out, value);
26488 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
26489 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26493 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
26495 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26496 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
26498 arm_post_atomic_barrier (model);
26501 #define MAX_VECT_LEN 16
26503 struct expand_vec_perm_d
26505 rtx target, op0, op1;
26506 unsigned char perm[MAX_VECT_LEN];
26507 enum machine_mode vmode;
26508 unsigned char nelt;
26513 /* Generate a variable permutation. */
26516 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
26518 enum machine_mode vmode = GET_MODE (target);
26519 bool one_vector_p = rtx_equal_p (op0, op1);
26521 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
26522 gcc_checking_assert (GET_MODE (op0) == vmode);
26523 gcc_checking_assert (GET_MODE (op1) == vmode);
26524 gcc_checking_assert (GET_MODE (sel) == vmode);
26525 gcc_checking_assert (TARGET_NEON);
26529 if (vmode == V8QImode)
26530 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
26532 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
26538 if (vmode == V8QImode)
26540 pair = gen_reg_rtx (V16QImode);
26541 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
26542 pair = gen_lowpart (TImode, pair);
26543 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
26547 pair = gen_reg_rtx (OImode);
26548 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
26549 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
26555 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
26557 enum machine_mode vmode = GET_MODE (target);
26558 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
26559 bool one_vector_p = rtx_equal_p (op0, op1);
26560 rtx rmask[MAX_VECT_LEN], mask;
26562 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26563 numbering of elements for big-endian, we must reverse the order. */
26564 gcc_checking_assert (!BYTES_BIG_ENDIAN);
26566 /* The VTBL instruction does not use a modulo index, so we must take care
26567 of that ourselves. */
26568 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
26569 for (i = 0; i < nelt; ++i)
26571 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
26572 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
26574 arm_expand_vec_perm_1 (target, op0, op1, sel);
26577 /* Generate or test for an insn that supports a constant permutation. */
26579 /* Recognize patterns for the VUZP insns. */
26582 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
26584 unsigned int i, odd, mask, nelt = d->nelt;
26585 rtx out0, out1, in0, in1, x;
26586 rtx (*gen)(rtx, rtx, rtx, rtx);
26588 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26591 /* Note that these are little-endian tests. Adjust for big-endian later. */
26592 if (d->perm[0] == 0)
26594 else if (d->perm[0] == 1)
26598 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26600 for (i = 0; i < nelt; i++)
26602 unsigned elt = (i * 2 + odd) & mask;
26603 if (d->perm[i] != elt)
26613 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
26614 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
26615 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
26616 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
26617 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
26618 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
26619 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
26620 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
26622 gcc_unreachable ();
26627 if (BYTES_BIG_ENDIAN)
26629 x = in0, in0 = in1, in1 = x;
26634 out1 = gen_reg_rtx (d->vmode);
26636 x = out0, out0 = out1, out1 = x;
26638 emit_insn (gen (out0, in0, in1, out1));
26642 /* Recognize patterns for the VZIP insns. */
26645 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
26647 unsigned int i, high, mask, nelt = d->nelt;
26648 rtx out0, out1, in0, in1, x;
26649 rtx (*gen)(rtx, rtx, rtx, rtx);
26651 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26654 /* Note that these are little-endian tests. Adjust for big-endian later. */
26656 if (d->perm[0] == high)
26658 else if (d->perm[0] == 0)
26662 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26664 for (i = 0; i < nelt / 2; i++)
26666 unsigned elt = (i + high) & mask;
26667 if (d->perm[i * 2] != elt)
26669 elt = (elt + nelt) & mask;
26670 if (d->perm[i * 2 + 1] != elt)
26680 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
26681 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
26682 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
26683 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
26684 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
26685 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
26686 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
26687 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
26689 gcc_unreachable ();
26694 if (BYTES_BIG_ENDIAN)
26696 x = in0, in0 = in1, in1 = x;
26701 out1 = gen_reg_rtx (d->vmode);
26703 x = out0, out0 = out1, out1 = x;
26705 emit_insn (gen (out0, in0, in1, out1));
26709 /* Recognize patterns for the VREV insns. */
26712 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
26714 unsigned int i, j, diff, nelt = d->nelt;
26715 rtx (*gen)(rtx, rtx, rtx);
26717 if (!d->one_vector_p)
26726 case V16QImode: gen = gen_neon_vrev64v16qi; break;
26727 case V8QImode: gen = gen_neon_vrev64v8qi; break;
26735 case V16QImode: gen = gen_neon_vrev32v16qi; break;
26736 case V8QImode: gen = gen_neon_vrev32v8qi; break;
26737 case V8HImode: gen = gen_neon_vrev64v8hi; break;
26738 case V4HImode: gen = gen_neon_vrev64v4hi; break;
26746 case V16QImode: gen = gen_neon_vrev16v16qi; break;
26747 case V8QImode: gen = gen_neon_vrev16v8qi; break;
26748 case V8HImode: gen = gen_neon_vrev32v8hi; break;
26749 case V4HImode: gen = gen_neon_vrev32v4hi; break;
26750 case V4SImode: gen = gen_neon_vrev64v4si; break;
26751 case V2SImode: gen = gen_neon_vrev64v2si; break;
26752 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
26753 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
26762 for (i = 0; i < nelt ; i += diff + 1)
26763 for (j = 0; j <= diff; j += 1)
26765 /* This is guaranteed to be true as the value of diff
26766 is 7, 3, 1 and we should have enough elements in the
26767 queue to generate this. Getting a vector mask with a
26768 value of diff other than these values implies that
26769 something is wrong by the time we get here. */
26770 gcc_assert (i + j < nelt);
26771 if (d->perm[i + j] != i + diff - j)
26779 /* ??? The third operand is an artifact of the builtin infrastructure
26780 and is ignored by the actual instruction. */
26781 emit_insn (gen (d->target, d->op0, const0_rtx));
26785 /* Recognize patterns for the VTRN insns. */
26788 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
26790 unsigned int i, odd, mask, nelt = d->nelt;
26791 rtx out0, out1, in0, in1, x;
26792 rtx (*gen)(rtx, rtx, rtx, rtx);
26794 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26797 /* Note that these are little-endian tests. Adjust for big-endian later. */
26798 if (d->perm[0] == 0)
26800 else if (d->perm[0] == 1)
26804 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26806 for (i = 0; i < nelt; i += 2)
26808 if (d->perm[i] != i + odd)
26810 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
26820 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
26821 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
26822 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
26823 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
26824 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
26825 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
26826 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
26827 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
26829 gcc_unreachable ();
26834 if (BYTES_BIG_ENDIAN)
26836 x = in0, in0 = in1, in1 = x;
26841 out1 = gen_reg_rtx (d->vmode);
26843 x = out0, out0 = out1, out1 = x;
26845 emit_insn (gen (out0, in0, in1, out1));
26849 /* Recognize patterns for the VEXT insns. */
26852 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26854 unsigned int i, nelt = d->nelt;
26855 rtx (*gen) (rtx, rtx, rtx, rtx);
26858 unsigned int location;
26860 unsigned int next = d->perm[0] + 1;
26862 /* TODO: Handle GCC's numbering of elements for big-endian. */
26863 if (BYTES_BIG_ENDIAN)
26866 /* Check if the extracted indexes are increasing by one. */
26867 for (i = 1; i < nelt; next++, i++)
26869 /* If we hit the most significant element of the 2nd vector in
26870 the previous iteration, no need to test further. */
26871 if (next == 2 * nelt)
26874 /* If we are operating on only one vector: it could be a
26875 rotation. If there are only two elements of size < 64, let
26876 arm_evpc_neon_vrev catch it. */
26877 if (d->one_vector_p && (next == nelt))
26879 if ((nelt == 2) && (d->vmode != V2DImode))
26885 if (d->perm[i] != next)
26889 location = d->perm[0];
26893 case V16QImode: gen = gen_neon_vextv16qi; break;
26894 case V8QImode: gen = gen_neon_vextv8qi; break;
26895 case V4HImode: gen = gen_neon_vextv4hi; break;
26896 case V8HImode: gen = gen_neon_vextv8hi; break;
26897 case V2SImode: gen = gen_neon_vextv2si; break;
26898 case V4SImode: gen = gen_neon_vextv4si; break;
26899 case V2SFmode: gen = gen_neon_vextv2sf; break;
26900 case V4SFmode: gen = gen_neon_vextv4sf; break;
26901 case V2DImode: gen = gen_neon_vextv2di; break;
26910 offset = GEN_INT (location);
26911 emit_insn (gen (d->target, d->op0, d->op1, offset));
26915 /* The NEON VTBL instruction is a fully variable permuation that's even
26916 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26917 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26918 can do slightly better by expanding this as a constant where we don't
26919 have to apply a mask. */
26922 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26924 rtx rperm[MAX_VECT_LEN], sel;
26925 enum machine_mode vmode = d->vmode;
26926 unsigned int i, nelt = d->nelt;
26928 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26929 numbering of elements for big-endian, we must reverse the order. */
26930 if (BYTES_BIG_ENDIAN)
26936 /* Generic code will try constant permutation twice. Once with the
26937 original mode and again with the elements lowered to QImode.
26938 So wait and don't do the selector expansion ourselves. */
26939 if (vmode != V8QImode && vmode != V16QImode)
26942 for (i = 0; i < nelt; ++i)
26943 rperm[i] = GEN_INT (d->perm[i]);
26944 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26945 sel = force_reg (vmode, sel);
26947 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26952 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26954 /* Check if the input mask matches vext before reordering the
26957 if (arm_evpc_neon_vext (d))
26960 /* The pattern matching functions above are written to look for a small
26961 number to begin the sequence (0, 1, N/2). If we begin with an index
26962 from the second operand, we can swap the operands. */
26963 if (d->perm[0] >= d->nelt)
26965 unsigned i, nelt = d->nelt;
26968 for (i = 0; i < nelt; ++i)
26969 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26978 if (arm_evpc_neon_vuzp (d))
26980 if (arm_evpc_neon_vzip (d))
26982 if (arm_evpc_neon_vrev (d))
26984 if (arm_evpc_neon_vtrn (d))
26986 return arm_evpc_neon_vtbl (d);
26991 /* Expand a vec_perm_const pattern. */
26994 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26996 struct expand_vec_perm_d d;
26997 int i, nelt, which;
27003 d.vmode = GET_MODE (target);
27004 gcc_assert (VECTOR_MODE_P (d.vmode));
27005 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27006 d.testing_p = false;
27008 for (i = which = 0; i < nelt; ++i)
27010 rtx e = XVECEXP (sel, 0, i);
27011 int ei = INTVAL (e) & (2 * nelt - 1);
27012 which |= (ei < nelt ? 1 : 2);
27022 d.one_vector_p = false;
27023 if (!rtx_equal_p (op0, op1))
27026 /* The elements of PERM do not suggest that only the first operand
27027 is used, but both operands are identical. Allow easier matching
27028 of the permutation by folding the permutation into the single
27032 for (i = 0; i < nelt; ++i)
27033 d.perm[i] &= nelt - 1;
27035 d.one_vector_p = true;
27040 d.one_vector_p = true;
27044 return arm_expand_vec_perm_const_1 (&d);
27047 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27050 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
27051 const unsigned char *sel)
27053 struct expand_vec_perm_d d;
27054 unsigned int i, nelt, which;
27058 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27059 d.testing_p = true;
27060 memcpy (d.perm, sel, nelt);
27062 /* Categorize the set of elements in the selector. */
27063 for (i = which = 0; i < nelt; ++i)
27065 unsigned char e = d.perm[i];
27066 gcc_assert (e < 2 * nelt);
27067 which |= (e < nelt ? 1 : 2);
27070 /* For all elements from second vector, fold the elements to first. */
27072 for (i = 0; i < nelt; ++i)
27075 /* Check whether the mask can be applied to the vector type. */
27076 d.one_vector_p = (which != 3);
27078 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27079 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27080 if (!d.one_vector_p)
27081 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27084 ret = arm_expand_vec_perm_const_1 (&d);
27091 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
27093 /* If we are soft float and we do not have ldrd
27094 then all auto increment forms are ok. */
27095 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27100 /* Post increment and Pre Decrement are supported for all
27101 instruction forms except for vector forms. */
27104 if (VECTOR_MODE_P (mode))
27106 if (code != ARM_PRE_DEC)
27116 /* Without LDRD and mode size greater than
27117 word size, there is no point in auto-incrementing
27118 because ldm and stm will not have these forms. */
27119 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
27122 /* Vector and floating point modes do not support
27123 these auto increment forms. */
27124 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
27137 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27138 on ARM, since we know that shifts by negative amounts are no-ops.
27139 Additionally, the default expansion code is not available or suitable
27140 for post-reload insn splits (this can occur when the register allocator
27141 chooses not to do a shift in NEON).
27143 This function is used in both initial expand and post-reload splits, and
27144 handles all kinds of 64-bit shifts.
27146 Input requirements:
27147 - It is safe for the input and output to be the same register, but
27148 early-clobber rules apply for the shift amount and scratch registers.
27149 - Shift by register requires both scratch registers. In all other cases
27150 the scratch registers may be NULL.
27151 - Ashiftrt by a register also clobbers the CC register. */
27153 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
27154 rtx amount, rtx scratch1, rtx scratch2)
27156 rtx out_high = gen_highpart (SImode, out);
27157 rtx out_low = gen_lowpart (SImode, out);
27158 rtx in_high = gen_highpart (SImode, in);
27159 rtx in_low = gen_lowpart (SImode, in);
27162 in = the register pair containing the input value.
27163 out = the destination register pair.
27164 up = the high- or low-part of each pair.
27165 down = the opposite part to "up".
27166 In a shift, we can consider bits to shift from "up"-stream to
27167 "down"-stream, so in a left-shift "up" is the low-part and "down"
27168 is the high-part of each register pair. */
27170 rtx out_up = code == ASHIFT ? out_low : out_high;
27171 rtx out_down = code == ASHIFT ? out_high : out_low;
27172 rtx in_up = code == ASHIFT ? in_low : in_high;
27173 rtx in_down = code == ASHIFT ? in_high : in_low;
27175 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
27177 && (REG_P (out) || GET_CODE (out) == SUBREG)
27178 && GET_MODE (out) == DImode);
27180 && (REG_P (in) || GET_CODE (in) == SUBREG)
27181 && GET_MODE (in) == DImode);
27183 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
27184 && GET_MODE (amount) == SImode)
27185 || CONST_INT_P (amount)));
27186 gcc_assert (scratch1 == NULL
27187 || (GET_CODE (scratch1) == SCRATCH)
27188 || (GET_MODE (scratch1) == SImode
27189 && REG_P (scratch1)));
27190 gcc_assert (scratch2 == NULL
27191 || (GET_CODE (scratch2) == SCRATCH)
27192 || (GET_MODE (scratch2) == SImode
27193 && REG_P (scratch2)));
27194 gcc_assert (!REG_P (out) || !REG_P (amount)
27195 || !HARD_REGISTER_P (out)
27196 || (REGNO (out) != REGNO (amount)
27197 && REGNO (out) + 1 != REGNO (amount)));
27199 /* Macros to make following code more readable. */
27200 #define SUB_32(DEST,SRC) \
27201 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
27202 #define RSB_32(DEST,SRC) \
27203 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
27204 #define SUB_S_32(DEST,SRC) \
27205 gen_addsi3_compare0 ((DEST), (SRC), \
27207 #define SET(DEST,SRC) \
27208 gen_rtx_SET (SImode, (DEST), (SRC))
27209 #define SHIFT(CODE,SRC,AMOUNT) \
27210 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
27211 #define LSHIFT(CODE,SRC,AMOUNT) \
27212 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
27213 SImode, (SRC), (AMOUNT))
27214 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
27215 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
27216 SImode, (SRC), (AMOUNT))
27218 gen_rtx_IOR (SImode, (A), (B))
27219 #define BRANCH(COND,LABEL) \
27220 gen_arm_cond_branch ((LABEL), \
27221 gen_rtx_ ## COND (CCmode, cc_reg, \
27225 /* Shifts by register and shifts by constant are handled separately. */
27226 if (CONST_INT_P (amount))
27228 /* We have a shift-by-constant. */
27230 /* First, handle out-of-range shift amounts.
27231 In both cases we try to match the result an ARM instruction in a
27232 shift-by-register would give. This helps reduce execution
27233 differences between optimization levels, but it won't stop other
27234 parts of the compiler doing different things. This is "undefined
27235 behaviour, in any case. */
27236 if (INTVAL (amount) <= 0)
27237 emit_insn (gen_movdi (out, in));
27238 else if (INTVAL (amount) >= 64)
27240 if (code == ASHIFTRT)
27242 rtx const31_rtx = GEN_INT (31);
27243 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
27244 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
27247 emit_insn (gen_movdi (out, const0_rtx));
27250 /* Now handle valid shifts. */
27251 else if (INTVAL (amount) < 32)
27253 /* Shifts by a constant less than 32. */
27254 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
27256 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27257 emit_insn (SET (out_down,
27258 ORR (REV_LSHIFT (code, in_up, reverse_amount),
27260 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27264 /* Shifts by a constant greater than 31. */
27265 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
27267 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
27268 if (code == ASHIFTRT)
27269 emit_insn (gen_ashrsi3 (out_up, in_up,
27272 emit_insn (SET (out_up, const0_rtx));
27277 /* We have a shift-by-register. */
27278 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
27280 /* This alternative requires the scratch registers. */
27281 gcc_assert (scratch1 && REG_P (scratch1));
27282 gcc_assert (scratch2 && REG_P (scratch2));
27284 /* We will need the values "amount-32" and "32-amount" later.
27285 Swapping them around now allows the later code to be more general. */
27289 emit_insn (SUB_32 (scratch1, amount));
27290 emit_insn (RSB_32 (scratch2, amount));
27293 emit_insn (RSB_32 (scratch1, amount));
27294 /* Also set CC = amount > 32. */
27295 emit_insn (SUB_S_32 (scratch2, amount));
27298 emit_insn (RSB_32 (scratch1, amount));
27299 emit_insn (SUB_32 (scratch2, amount));
27302 gcc_unreachable ();
27305 /* Emit code like this:
27308 out_down = in_down << amount;
27309 out_down = (in_up << (amount - 32)) | out_down;
27310 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
27311 out_up = in_up << amount;
27314 out_down = in_down >> amount;
27315 out_down = (in_up << (32 - amount)) | out_down;
27317 out_down = ((signed)in_up >> (amount - 32)) | out_down;
27318 out_up = in_up << amount;
27321 out_down = in_down >> amount;
27322 out_down = (in_up << (32 - amount)) | out_down;
27324 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
27325 out_up = in_up << amount;
27327 The ARM and Thumb2 variants are the same but implemented slightly
27328 differently. If this were only called during expand we could just
27329 use the Thumb2 case and let combine do the right thing, but this
27330 can also be called from post-reload splitters. */
27332 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27334 if (!TARGET_THUMB2)
27336 /* Emit code for ARM mode. */
27337 emit_insn (SET (out_down,
27338 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
27339 if (code == ASHIFTRT)
27341 rtx done_label = gen_label_rtx ();
27342 emit_jump_insn (BRANCH (LT, done_label));
27343 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
27345 emit_label (done_label);
27348 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
27353 /* Emit code for Thumb2 mode.
27354 Thumb2 can't do shift and or in one insn. */
27355 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
27356 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
27358 if (code == ASHIFTRT)
27360 rtx done_label = gen_label_rtx ();
27361 emit_jump_insn (BRANCH (LT, done_label));
27362 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
27363 emit_insn (SET (out_down, ORR (out_down, scratch2)));
27364 emit_label (done_label);
27368 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
27369 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
27373 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27388 /* Returns true if a valid comparison operation and makes
27389 the operands in a form that is valid. */
27391 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
27393 enum rtx_code code = GET_CODE (*comparison);
27395 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
27396 ? GET_MODE (*op2) : GET_MODE (*op1);
27398 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
27400 if (code == UNEQ || code == LTGT)
27403 code_int = (int)code;
27404 arm_canonicalize_comparison (&code_int, op1, op2, 0);
27405 PUT_CODE (*comparison, (enum rtx_code)code_int);
27410 if (!arm_add_operand (*op1, mode))
27411 *op1 = force_reg (mode, *op1);
27412 if (!arm_add_operand (*op2, mode))
27413 *op2 = force_reg (mode, *op2);
27417 if (!cmpdi_operand (*op1, mode))
27418 *op1 = force_reg (mode, *op1);
27419 if (!cmpdi_operand (*op2, mode))
27420 *op2 = force_reg (mode, *op2);
27425 if (!arm_float_compare_operand (*op1, mode))
27426 *op1 = force_reg (mode, *op1);
27427 if (!arm_float_compare_operand (*op2, mode))
27428 *op2 = force_reg (mode, *op2);
27438 #include "gt-arm.h"